# check input BPF for impossible short chunks
# see maus.trn for details
#
# in case that an impossible short chunk is found, the script 
# prints the start sample of that chunk to stdout and exits with 
# error code 1; otherwise no output is produced and exit code is 0
#
# call this script with option -v RATE=<samplerate> - v DIVISOR=<divisor>
# <divisor> = 1/MinDurSec, e.g. 25 = 40msec, 34 = 30msec, 100 = 10msec
# this script works only when the TRN tier appears after the KAN tier
# otherwise no impossible short chunks are found

/^KAN:/ {
        ps = $3
        i=4
        while($i != "")
        {
          ps = ps $i
          i ++
        }
#print ps
        # ignore 'words' that are transcribed as '<...>'
        if(match(ps,/^<.*>$/) != 0) next
        # strip the label string from chars that do not count for duration
        # - compress <nib> and <usb> to one char
        gsub(/<nib>/,"n",ps)
        gsub(/<usb>/,"n",ps)
        # - diacritics
        gsub(/_[^ ] /," ",ps)
        # - 'P' masker, length ':', '<>' of silence/noise symbols, blank (if there)
	gsub(/[: P'"<>]/,"",ps)
        # now estimate number of phones; symbols with two char count double
	kanlen[$2] = length(ps)
#print ps
#print "kanlen[" $2 "] : " kanlen[$2]
}
/^TRN:/ {
	trnsamdur = $3
	nl = split($4,kanlnks,",")
#print "nl = " nl
        kanl = 0
        for(idx in kanlnks){
		kanl += kanlen[kanlnks[idx]] 
#print "kanl = " kanl " kanlen[" kanlnks[idx] "] = " kanlen[kanlnks[idx]] " idx = " idx
	}
        # assume that each phon has minimum of MinS sec, then the sample length is RATE * MinS
        # instead of MinS(which is a float) we give the script 'divisor' = 1/MinS
        # e.g. divisor = 25 -> 40msec, 34 -> 33msec, 100 -> 10msec 
        # => the total estimated length of the chunk in samples is: 
	kanlsamdurmin = int(kanl*RATE/DIVISOR)
#print $0
#print "trnsamdur = " trnsamdur ", kanlsamdurmin = " kanlsamdurmin
	if(kanlsamdurmin > trnsamdur) {
          print $2
          exit 1
        }
}
