#
# transforms the MAU tier of a Partitur file into a list of HMMs as 
# defined in the file MAPPING (DICT).
# Special characters '#', ''', '"', '\' and '+' are deleted from the 
# MAU tier before processing. 
# Numeric phoneme labels are translated into 'P' + label to match 
# HTK requirements (e.g. '6' -> 'P6').
# '<p:>' is mapped to '<', since '<p:>' points to Tee model HMM, and 
# these cannot be chained in HERest training.
# Then the parsed input phones are mapped to HMM names using MAPPING
# (first column: phone name, second column: hmm name).
# Output is one hmm name per line.

# If an unknown input phone is encountered or no mapping is found, 
# the script prints an error
# message to stderr and exits with code 1.

BEGIN {
        mapcount = 0
        while ( getline < MAPPING > 0 )
        { 
	  split($0,splitarr)
          map[splitarr[1]] = splitarr[2]
          #print "splitarr = " splitarr[1] " " map[splitarr[1]]
          mapcount ++
        }
      }

/^MAU:/ {
          kanstr = $5
	  # delete accent markers
          gsub(/'/,"",kanstr)
          gsub(/"/,"",kanstr)
          gsub(/\\/,"-",kanstr)
          gsub(/<p:>/,"<",kanstr)
	  # transform numerical names in P + name
          if ( match(kanstr,/^[0-9]/) == 1 ) phonename = "P" kanstr
	  else phonename = kanstr
	  if ( map[phonename] == "" )
	  {
            printf("ERROR: no mapping for phoneme (%s) found\n",phonename) > "/dev/stderr"
	    exit 1
	  }  
	  printf("%s\n",map[phonename])

        }
