#!/bin/tcsh

# General purpose tool to insert a KAN tier into an existing 
# BPF file which must have an ORT tier.

# This file is not part of the free-ware MAUS distribution!

# F. Schiel 2004-12-23 / 2011-12-15

# The script reads the ORT from $1, searches for the best pronunciation
# and inserts them as a KAN tier into $1.
# this script uses 'mk_pron', a highly specialized tool to generate
# German canonical pronunciations based on the orthography.

# Changed to use exclusively txt2lex.pl instead of mk_pron
# Reason: - speed up processing
#         - independence of /homes/schiel (txt2lex.pl is on /usr/local/bin!)

# set MK_PRON = /usr/local/bin/mk_pron

set PID = "$$_"

if ( $1 == "" ) then 
  echo "usage: create_kan <BPF file>"
  exit 1
endif

set par = $1
if ( ! -w $par ) then 
  echo "ERROR: cannot write to BPF file $par - exiting"
  exit 1
endif

file $par | grep -q 'ASCII text'
if ( $status != 0 ) then 
  echo "$0 : ERROR : Orthographic input contains either ISO or UTF8 or other coding - exiting"
  exit 1
endif

grep '^ORT:' $par >& /dev/null
if ( $status != 0 ) then 
  echo "ERROR: BPF file $par does not contain an ORT tier - exiting"
  exit 1
endif
set ort_count = `grep '^ORT:' $par | wc -l`

#grep '^ORT:' $par | awk '{print $3}' | $MK_PRON tts=balloon | cut -f 2 >! /tmp/${PID}PRON
#if ( $status != 0 ) then 
#  echo "ERROR in mk_pron pipe - exiting"
#  rm -f /tmp/${PID}PRON 
#  exit 1
#endif

grep '^ORT:' $par | awk '{print $3}' >! /tmp/${PID}ORT
if ( $status != 0 ) then
  echo "ERROR: cannot write to /tmp/${PID}ORT - exiting"
  rm -f /tmp/${PID}ORT
  exit 1
endif
# ORT tiers can be LaTeX or UTF-8 encoded; txt2lex with option U accepts both
( txt2lex.pl -fpgu -i /tmp/${PID}ORT | cut -f 2 >! /tmp/${PID}PRON ) >& /dev/null
if ( $status != 0 ) then
  echo "ERROR calling txt2lex.pl - exiting"
  rm -f /tmp/${PID}PRON /tmp/${PID}ORT
  exit 1
endif

set pron_count = `cat /tmp/${PID}PRON | wc -l`
if ( $ort_count != $pron_count ) then 
  echo "ERROR: mismatch between ORT and KAN tier - exiting"
  echo "ORT:"
  grep '^ORT:' $par
  echo "KAN:"
  cat /tmp/${PID}PRON
  rm -f /tmp/${PID}PRON /tmp/${PID}ORT
  exit 1
endif
set c = 0
touch /tmp/${PID}HEADER
while ( $c < $ort_count )
  printf "KAN:\t%d\n" $c >> /tmp/${PID}HEADER
  @ c ++
end
paste /tmp/${PID}HEADER /tmp/${PID}PRON >> $par

rm -f /tmp/${PID}PRON /tmp/${PID}HEADER /tmp/${PID}ORT

exit 0

