#
# transforms the KAN tier of a Partitur file into a list of Phonemes
# defined in the file INVENTAR
# INVENTAR must contain the valid phones in reverse length 
# order ('aI' before 'a'
# Numberic phoneme labels are translated into 'P' + label to match 
# HTK requirements (e.g. '6' -> 'P6').
# At the beginning the script print a '<' and at the end a '>' for
# beginning and ending silence resp.
#
# This script can be used to create a MLF without timing information,
# but magic number, file names and terminating '.' are not written here

BEGIN {
        invcount = 0
        while ( getline < INVENTAR > 0 )
        { 
          inv[invcount] = $0
#          print inv[invcount]
          invcount ++
        }
        print "<"
        firstpause = 0
      }

/^KAN:/ {
          kanstr = $3
          if ( firstpause == 0 )
            firstpause = 1
          else
            print "#"
          gsub(/#/,"",kanstr)
          gsub(/'/,"",kanstr)
          gsub(/"/,"",kanstr)
          gsub(/\+/,"",kanstr)
          while ( kanstr != "" )
          {
            for (i=0; i<invcount; i++)
              if ( index(kanstr,inv[i]) == 1 ) break
            if ( i == invcount )
            {
              printf("ERROR: unknown phoneme in %s\n",$3) > "/dev/stderr"
              next
            }
            if ( match(inv[i],/[0-9]/) == 1 )
              printf("P%s\n",inv[i])
            else
              print inv[i]
            sub(inv[i],"",kanstr)
          }
        }
END     {
          print ">"

          print "."
        }
