#!/bin/tcsh 


# MAUS segmentation for one file only

# Author F. Schiel (schiel@bas.uni-muenchen.de)

# Version 
set VERSION = 5.92

# To get a help message, simply type in 'maus'
# To get the version number type in 'maus --version'

##########################################################################
# PARAMETERS THAT NEED TO BE ADAPTED #####################################
##########################################################################
# Set the path SOURCE to the dir where you unpacked the maus package.
# Set TEMP to a temporary dir where maus can store intermediate files.
# If you are using a locale that causes script to format floating point
# number with a comma instead of a dot, you need to set the locale here.
##########################################################################
#set SOURCE = /homes/schiel/MAUS/TOOL
set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored 
                           # (even if we start via a symbolic link)
set TEMP = /tmp
setenv LANG en_US.UTF-8  # defines the behavior of text processing, sorting etc.
setenv SOX_OPTS "-D" # this prevents sox version 14.3 and higher to use
                     # automatic dithering in rate conversions which causes
		     # MAUS results to fluctuate randomly
##########################################################################

# In the following <LANGUAGE> denotes a RFC5646 language code e.g. 'gsw-CH'

# Parameter file names that must reside in PARAM dir
set MMF = ""
set KANINVENTAR = KANINVENTAR
set GRAPHINVENTAR = GRAPHINVENTAR
set HMMINVENTAR = HMMINVENTAR
set RULESET = "default"   # default: statistical rule set rml-0.95.rul or
                          # value set in DEFRULESET in file PARAM.<LANGUAGE>/DEFAULTS
                          # use RULESET=*.nrul for phonological rules (no statistics)
set HVITECONF = HVITECONF
set HVITEOPTIONS = '-o N'
set DICT = DICT
set PRECONFIG = $SOURCE/PRECONFIGWAV

#
# Commandline Options (style 'OPTION=<value>')
#
# Note: the default values given here are also coded into the webservices CMDI; if you change any default,
# also change the default there!

# required parameters
set SIGNAL = ""     
set BPF = ""          
# or:
set KANSTR = ""
set OUT = ""

# general options
set USEAUDIOENHANCE = true 
set MODUS = "standard" # macro option
                     # Operation modus. Default is 'standard' which denotes the normal
                     # MAUS technique as published in Schiel ICPhS 1999.
                     # If set to 'align', MAUS performs as in 'standard', but doesn't
                     # model pronunciation, i.e. it aligns only to the given pronunciation
                     # using 'forced alignment' technique. This option was corresponds to
                     # the deprecated former option CANONLY=true.
                     # If set to 'bigram', MAUS performs a phonotactic bigram driven 
                     # speech recognition; the following options
                     # are set overriding command line options:
                     # INSORTTEXTGRID = false
                     # INSKANTEXTGRID = false
                     #   and the following options are being ignored: BPF, KANSTR, 
                     # INFORMAT,CANONLY,RULESET,INSPROB,STARTWORD,ENDWORD,USETRN=true
                     #   and the option OUTFORMAT=emu|EMU|legacyEMU
                     #   will cause an error, because in the bigram modus no hierarchy is created.
                     # If set to 'bigram', MAUS will run a phone recognition on the 
                     # phone classes as defined in the first column of the dictionary 
                     # file defined in DICTBIGRAM constrained only by a phone bigram 
                     # (HTK lattice) as defined in LATBIGRAM.
set CANONLY = no      # set to 'yes' causes maus just to align the string 
                      # of symbols without the MAUS technique.
                      # From version 2.87 this option is deprecated and can be
                      # overruled by option MODUS=align. For backward-compatibility
                      # CANONLY=yes is effectively MODUS=align, if MODUS is not set
                      # (MODUS=standard), since this is probably what the user wants.
set PARAM = $SOURCE/PARAM
# or:
set LANGUAGE = ""     # language of speech: RFC5646 codes: 
                      # 'iso639-3 '-' iso-3166-1 [ '-' iso-3166-2 ] 
                      # e.g. LANGUAGE='eng-AU' -> PARAM.eng-AU
                      # Exception is 'sampa' which denotes language independent mode
set MULTITHREADING = "TRUE"
                      # if set to false, no multi-threading 

# modeling options
set MINPAUSLEN = 5    # setting to 1 causes no inter-word pauses to be deleted,
                      # 2 : pauses of 1 frame are deleted, ...
		      # 40msec seems to be the value that will not yet
		      # be perceived as a pause...
		      # note that only optional pauses '#', '&' and '<p:>'
		      # will be deleted here; '<' and '>' will always stay.
set NOINITIALFINALSILENCE = "false"
                      # is set to 'true', maus will not automatically model an optional initial 
		      # and final silence model, but force into the first/last segment..
set WEIGHT = "default"       
                      # the WEIGHT option weights the influence of the statistical
                      # MAUS model against the acoustical scores. More
		      # precisely WEIGHT is multiplied to the MAUS model
		      # score (log likelihood) before adding the score to 
		      # the acoustical score in the search;
		      # WEIGHT=0.0 will switch off the MAUS model statistic
		      # (= all paths through the MAUS model are equally probably)
		      # this option is defaulted language specific; see 
		      # PARAM.<language>/DEFAULTS for details
                      # if set to 'default' the language specific default 
                      # value is used, other wise the commandline value
set INSPROB = 0.0     # Insertion probability of segments (see above)		      
set RELAXMINDUR = "FALSE"
                     # default maus has a minimum duration of phone segments of 3 frames (= 30msec)
                     # or 40msec for long/tense vowel or diphtongs or 60msecs for affricates and 
                     # silence intervals. This makes sense for achieving robust results
                     # with a minimum of falsely inserted short segments. In some cases (duration of very short
                     # consonants) this causes a 'ceiling effect' at 30msec in the analysed data. If this option 
                     # is set to 'TRUE', this lower minumum duration bound is relaxed to 10msec (1 frame) and 
                     # 20msec respectively. Accuracy of segmentation is degraded when using this option.
set RELAXMINDURTHREE = "FALSE"
                     # alternatively to RELAXMINDUR this can be set to convert all acoustic models 
                     # into models with a minimal duration of 3 states (= 30msec with standard frame rate)
                     # set both options causes an ERROR.
# input options
set USETRN = "no"     # if set to "yes" and the input BPF contains a single TRN entry, 
                      # a pre-segmentation (cutting of leading and trailing silence) of the whole
                      # utterance will be read from this TRN tier entry and the search 
		      # is constrained to the segment given in that TRN tier.
		      # if the input BPF contains more than one TRN entry, the scripts
		      # assumes that a chunk segmentation is provided for the recording
		      # and calls the helper maus.trn (part of distribution) to segment 
		      # each chunk separately.
                      # ! option value 'force' is deprecated with 4.11; use PRESEG=true instead:
		      # if set to 'force', an online  pre-segmentation is carried out by the 
		      # helper WAV2TRN; if WAV2TRN is not installed on the system,
		      # a warning is issued and the option is ignored; a TRN tier in the BPF 
		      # input is being ignored.
		      # if no BPF *.par|PAR is given on the command line, this option is ignored.
set PRESEG = "false"  # if set to "true", maus will use the helper WAV2TRN to perform a 
                      # pre-segmentation; if WAV2TRN is not installed on the system,
                      # a warning is issued and the option is ignored; a TRN tier in the BPF
                      # input is being ignored. If this option is set in combination with USETRN=true
                      # and the input BPF contains a chunk segmentation (tier TRN), then the 
                      # presegmentation is carried out for every single chunk.
set STARTWORD = 0
set ENDWORD = 999999
set allowresamp = yes # set to 'yes' will cause maus to resample signals
                      # with the wrong sampling rate using sox; if set to 'no' input with 
                      # wrong sample rate will cause an error. Note: videos are always re-sampled
                      # to 16kHz, 16bit, mono
set INFORMAT = "bpf"  # deprecated; do not use this option; input format is detected by extension
                      # input format; default is BPF with (minimal) tier KAN
                      # if set to "bpf-sampa, SAM-PA processing mode is invoked and 
                      # PARAM and LANGUAGE options are ignored
set INSYMBOL = "sampa"
                      # defines the phonological symbol encoding in the input; default 
                      # is 'sampa', alternate is 'ipa' which requires blank-separated IPS symbols
                      # in the KAN tier, e.g. 'KAN: 0  h ɔʏ t ə'; 
                      # in both symbol encodings special characters '.#'"+' are deleted, if they are not part 
                      # of a symbol (e.g. /g_+/)
set BPFTHRESHOLD = 3000
                     # threshold for prevalidation of input: if the loaded BPF contains more that 
                     # BPFTHRESHOLD lines of the KAN tier, we expect that the Viterbi will take a very long time 
                     # to process this file and most likely will not come to valid result; the script therefore 
                     # issues an ERROR message and exits with exit code 1; if you are sure about what you are 
                     # doing, you can set this threshold to other values via this option.
                     set GETBPFTHRESHOLD = "FALSE" 
                     # if set, maus returns a single number representing the option BPFTHRESHOLD and 
                     # exits with error code 0; this allows front ends to to check where the pre-validation  
                     # threshold is.

# output options
set OUTFORMAT = "mau" # if set to 'mau' a BPF mau tier is created in OUT
                      # if set to 'csv' a CSV spread sheet is created in OUT
                      # if set to 'TextGrid' a praat TextGrid is created in OUT
                      # if set to 'emu', legacy Emu *.hlb and *.phonetic files are created
                      # if set to 'emuR|emuDB', an Emu *_annot.json file is created
                      # if set to 'EMU' or 'legacyEMU' a file *.EMU is created which contains the *.hlb
		      #  and *.phonetic legacy Emu files separated by a line '--- cut here ---'
		      # if set to 'mau-append' or 'par' or 'bpf' the mau tier is added to the BPF 
		      # input file '*.par', if writable, or written together with BPF input
                      # to OUT (if given).
                      # Further output formats are accepted, if ANNOTCONV can handle them.
set INSORTTEXTGRID = "yes" 
                      # if set to "yes" an additional word tier is inserted in OUTFORMAT=TextGrid output
set INSKANTEXTGRID = "yes" 
                      # if set to "yes", an additional canonical transcript tier is inserted in OUTFORMAT=TextGrid output
                      # for all other output formats this options have no effect
set OUTIPA = "false"  # deprecated option; use OUTSYMBOL=ipa instead
set OUTSYMBOL = "sampa"
		      # encoding of phonetic symbol output in all file formats.  
		      # requires a mapping table defined in IPATABLE.
		      # if set to 'ipa', maus replaces all SAMPA-Symbols in the output 
                      # files by the corresponding IPA coded in UTF-8. Note that BPF tiers
		      # MAU (*.mau, option OUTFORMAT=mau) are not conform BPF standard then;
		      # OUTFORMAT=TextGrid should work in most cases; the effect in legacy Emu 
		      # is unknown; silence intervals, human noise and other noise are coded
		      # as (...), (..) and (.) respectively; 
		      # if set to 'manner', maus replaces all SAMPA-Symbols in the output
                      # files by the corresponding IPA manner class (line of IPA consonant chart) + 
		      # 'vowel' + 'diphthong';
		      # if set to 'place', maus replaces all SAMPA-Symbols in the output
                      # files by the corresponding IPA place of articulation for consonants, and 
                      # by locations/movements in the vowel space for vowels/diphthongs.
set TARGETRATE = 100000 # frame rate of the HTK front-end measured in 100nsec units (10000 = 1msec)
                      # the segmental results of MAUS are quantizised by this amount, i.e. a 
                      # TARGETRATE of 100000 results in segment boundaries that are placed on multiples
                      # of 10msecs. The allowed values of TARGETRATE are 10000,20000,100000 (1, 2, 10msec);
                      # decreasing TARGETRATE *does not* increase accuracy, merely the quantization 
                      # is more fine grained, but not more accurate; TARGETRATE does not influence
                      # MAUSSHIFT (see details above), but it does influence the segmentation accuracy as 
                      # does the use of options RELAXMINDUR/RELAXMINDURTHREE (see details above); 
                      # when MAUS is used to obtain quantitative
                      # duration measurements, it might make sense to decrease TARGETRATE; however, a decrease
                      # of TARGETRATE increases computation time (because more frames have to be
                      # processed in the same amount of signal length) and degrades the accuracy.
set CSVLIMITER = ";"  # column limiter for CSV input tables
set MAUSSHIFT = "default"   # constant shift of MAUS output boundaries in msecs
                            # positive value = shift into future
		            # this option is defaulted language specific; see 
		            # PARAM.<language>/DEFAULTS for details
                            # if set to 'default' the language specific default 
                            # value is used, otherwise the commandline value
set ADDSEGPROB = false      # if true the frame-normalzed natural log Viterbi 
                            # likelihood is appended to the segment label

# helpers
set RELAXMINDURHED = "${SOURCE}/relaxMinDur.hed"
set RELAXMINDURTHREEAWK = "${SOURCE}/relaxMinDur30.awk"
                     # HHEd/AWK helper scripts to do on-the-fly conversion of HMMs 
set PAR2EMU = $SOURCE/par2emu
                      # helper to create an legacy Emu compatible output
		      # (option OUTFORMAT=emu|EMU|legacyEMU)
set PAR2EMUR = $SOURCE/mausbpf2emuR
                      # helper to create Emu compatible output
                      # (option OUTFORMAT=emuR|emuDB)
set PAR2TEXTGRID = $SOURCE/par2TextGrid
		      # helper to create TextGrid output
set WAV2TRN = "wav2trn"	
                      # helper to pre-segment signal
set MAUSTRN = "$SOURCE/maus.trn"
                      # helper to perform batch process based on a chunk segmentation
set MAUSBPF2CSV = "$SOURCE/mausbpf2csv"
                      # helper to transform output BPF to a CSV table
set ANNOTCONV = "$SOURCE/AnnotConv/annotConv"
                      # helper to transform BPF output to any format; this helper replaces
                      # the usage of PAR2EMUR,PAR2TEXTGRID,MAUSBPF2CSV; however, they are
                      # still in the code as fallbacks, in case that ANNOTCONV does not work
set AUDIOENHANCE = $SOURCE/AudioEnhance/audioEnhance
                      # helper to transform audio input formats and do some optimization on the signal

# debug options
set v = 0             # verbose level
set TRACE = 0
set CLEAN = 1
set OUTSLF = ""       # if set, maus copies his internal SLF into it
set PRINTINV = no     # if set to yes, maus copies the set of allowed SAM-PA
                      # symbols in the input for the chosen language to the 
		      # output file OUT or to SAMPA.tab, if OUT not given. The list 
		      # is taken from PARAM.<language>/KANINVENTAR.inv.
set DICTBIGRAM = ""
set LATBIGRAM = ""   # Defaults: DICTBIGRAM = $PARAM/DICT.bigram, LATBIGRAM = $PARAM/LAT.bigram
set MODELRATE = 0     # sampling rate of HMM; will be read from PRECONFIG
                      # this can be overwritten by option from commandline,
		      # but then the rate IS NOT checked against the HMM!

# end options

if ( $1 == '--version' ) then 
  echo $VERSION
  exit 0
endif

set mausoptions = "$*"

# Actually do the argument parsing here

#echo parsing commandline
if ( $v > 0 ) echo "$0 $*"
while ( "$1" != "" )
	switch ("$1")
	case *=*:
		#set key = `echo $1 | cut -d= -f1`
		set key = `echo $1 | awk -F= '{ print $1 }'`
		#check if option is known (set)
		eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then 
		  echo "ERROR: ${0:t} : unknown option $key - exiting" >> /dev/stderr
		  exit 1
		endif  
		#set val = `echo $1 | cut -d= -f2`
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

# boolen variable check; define all boolean input parameters here

set bool = ( PRINTINV allowresamp CANONLY CLEAN USETRN PRESEG INSORTTEXTGRID INSKANTEXTGRID OUTIPA NOINITIALFINALSILENCE RELAXMINDUR RELAXMINDURTHREE MULTITHREADING GETBPFTHRESHOLD ADDSEGPROB USEAUDIOENHANCE )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val ) 
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  case force:
    eval set $booleanvariable = force
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no,(force)' - exiting" >> /dev/stderr
    exit 1
  endsw    
end

if ( $GETBPFTHRESHOLD == "TRUE" ) then
  echo $BPFTHRESHOLD
  exit 0
endif

# MODUS=bigram must be excempt from this case, since this modus does not require a BPF or KANSTR option 
if ( "$PRINTINV" == "FALSE" && ( "$SIGNAL" == "" || ( "$BPF" == "" && "$KANSTR" == "" && $MODUS != "bigram" ) ) ) then 
  echo "${0:t} : version $VERSION"
  cat <<ENDE | sed 's/^# //' 

# With BPF input:
# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl BPF=signal.par|csv [MODUS=standard][INSYMBOL=sampa|ipa][OUT=maustier.mau][OUTFORMAT=mau|mau-append|TextGrid|emu|EMU|legacyEMU|csv|par|bpf|emuR|emuDB...][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][CANONLY=no][allowresamp=yes][WEIGHT=default][INSPROB=insprob][MINPAUSLEN=5][STARTWORD=0][ENDWORD=999999][MAUSSHIFT=default][INSORTTEXTGRID=no][INSKANTEXTGRID=no][USETRN=no][PRESEG=no][RULESET=rml-0.95.rul][NOINITIALFINALSILENCE=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=false][RELAXMINDURTHREE=false][MULTITHREADING=true][BPFTHRESHOLD=3000][TARGETRATE=100000][USEAUDIOENHANCE=true]
# With KANSTR input (just for debugging):
# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl KANSTR="a: b e: # t s e:" [MODUS=standard][INSYMBOL=sampa|ipa][OUT=maustier.mau][OUTFORMAT=mau|mau-append|TextGrid|emu|EMU|legacyEMU|csv|par|bpf|emuR|emuDB...][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][CANONLY=no][allowresamp=yes][WEIGHT=default][INSPROB=insprob][MINPAUSLEN=5][MAUSSHIFT=default][INSORTTEXTGRID=no][INSKANTEXTGRID=no][USETRN=no][PRESEG=no][RULESET=rml-0.95.rul][NOINITIALFINALSILENCE=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=false][RELAXMINDURTHREE=false][MULTITHREADING=true][TARGETRATE=100000][USEAUDIOENHANCE=true]
# MINNI usage:
# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl MODUS=bigram [OUT=maustier.mau][OUTFORMAT=mau|TextGrid|csv...][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][allowresamp=yes][WEIGHT=default][USETRN=no][PRESEG=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=false][RELAXMINDURTHREE=false][MULTITHREADING=true][TARGETRATE=100000][USEAUDIOENHANCE=true]
# usage: maus PRINTINV=yes [LANGUAGE=iso639|rfc5646][OUT=InventarTable.txt]
# usage: maus GETBPFTHRESHOLD=yes
# usage: maus --version

# Reporting: ERRORs/WARNINGs are printed to stderr; DEBUG infos (option v > 0) 
# and output of parameter retrieval is printed to stdout 

# General remarks:
# The script reads a string of phonemic symbols as defined in the param
# file KANINVENTAR, reads a signal from the file signal.nis and 
# performs a MAUS segmentation according to these inputs.
# The resulting segmentation is either written into a BPF MAU tier file *.mau
# or into a Praat compatible *.TextGrid file or in Emu files or appended to the BPF
# input file..
# Phonemic symbols are read either from commandline or from a KAN tier of 
# a BPF file (input file *.par|PAR) or from a CSV table with two ';'-separated columns
# (input file *.csv|CSV).

# Maus is also implemented as a webservice. If you are using maus as a web service,
# not all options are available as for the plain script. Some options have different 
# names and values to standardize certain concepts. Please look for remarks headed
# by 'WebService:' for such special options in the following.

# There are a number of general constraints on how to use maus:

# With the default set of parameter files PARAM (LANGUAGE='' or LANGUAGE='deu-DE') 
# only German language may be
# segmented. Furthermore the statistical rule set used here is optimized 
# to non-prompted dialogue speech used in a formal situation. However,
# we found that this set also works reasonable well for read speech.

# To adapt this script to another language, several parameter files and 
# programs in PARAM must be adapted: The set of phonemic symbols used in the input,
# the MAUS internal symbol set, the mapping function between them, the 
# Hidden Markov Models used for the search, the mapping from MAUS internal
# symbols to HMM and of course the rule set.
# See file USAGE for further details, and many examples in subdirs PARAM.<LANGUAGE>.

# The string of phonemic input symbols must not contain any other symbol as
# defined in $PARAM/KANINVENTAR. 
# The symbol '#' may be used between words indicating possible pauses
# between the words. This is highly recommended.
# When reading from a BPF file (option BPF) these optional pauses are
# inserted automatically.

# WebService: you may download the content of KANINVENTAR (i.e. the allowed 
# phonetic symbols in the maus input) for the selected language by pressing
# the button 'Inventar' next to the LANGUAGE option in the web form or by 
# directly issuing the following curl command:
# curl -X POST -H 'content-type: application/x-www-form-urlencoded' 'http://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runMAUSGetInventar?LANGUAGE=iso639code|rfc5646code'


# This script is intended to work for mono NIST and WAV sound files with 
# 16 kHz sampling rate and 16 bit linear (FIXRATE), because the HMM are 
# trained to this type of data. Multi-channel files are not supported.
# Maus will automatically resample the signal using sox (without dither!)
# if you set the option 'allowresamp' to 'yes' (default is 'yes'; the times 
# given in samples in the mau output are based on the original
# sampling rate of the signal; the times given in the TextGrid output 
# files are NOT affected by any re-sampling!) and also
# recognize ALAW coded / 8kHz telephone speech input by the extensions
# '.al', '.AL', '.dea' and '.DEA' and converts it into PCM / 16kHz using
# sox. Note that telephone speech is band limited and influenced by a 
# number of varying transfer functions and therefore not suited to be 
# recognized by HMM that are trained on more or less clean lab speech.
# But you can try...

# You may also either re-sample using sox beforehand or
# adjust the parameters in PRECONFIG and replace the HMM by HMM trained with 
# data of the respective sampling rate.
# The script will complain if you try to use other sampling rates or HMM
# trained with other sampling rates. Note that ALL kinds of re-sampling
# detoriate the signals!

# If you use WAV/ALAW signal files as input, the tool sox must be installed 
# on your computer.

# Other tools (partly residing in SOURCE) required by this script:
# awk(*), HVite(*), HCopy(*), HHEd(*), dos2unix(*), mac2unix(*), ffmpeg(*), SoX(*),
# kan2mlf.awk, word_var-2.0, rec2mau.awk,
# mau2TextGrid.awk, mau2TextGridORT.awk, correctusetrn.awk, finalusetrn.awk,
# par2emu, par2emu1.awk, par2emu2.awk, par2TextGrid, relaxMinDur.hed,
# mausbpf2csv, mausbpf2emuR, mausbpf2emuR_class1.awk, mausbpf2emuR_class1lnks.awk,
# mausbpf2emuR_class1mult.awk, mausbpf2emuR_class1multlnks.awk, 
# mausbpf2emuR_class4.awk, mausbpf2emuR_class4lnks.awk, annotConv(*)
# (tools marked with (*) are NOT part of the MAUS distribution package!)

# word_var-2.0 is a C++ based program compiled for Linux that computes the
# statistically weighted pronuciation model. If you are intending
# to use MAUS for a different language than German AND not do not intend to 
# use pronunciation rule sets (MODUS=align), i.e. make a simple alignment 
# to phonemic SAM-PA transcript, then you can use maus WITHOUT installing
# the binary word_var-2.0. The script will not call word_var-2.0, if you 
# set the option MODUS=align. This might help on platforms where the compilation
# of word_var-2.0 does not work or where no C++ compiler is available.

# Warning: the script will write intermediary files into 
# the cache $TEMP. To be sure that these are deleted after finishing use
# the option TEMP=/mytemp and the option CLEAN=1.
# You may set CLEAN=0 for debugging purposes

# The silence model '#' in the HMM set must be a tee-model.
# The HVite will always complain about the 'words' '#' or '&' that are 
# tee-words. It's safe to ignore these warning.

# History: See file DOCU/HISTORY in this dir

# Options:

# Option v=1 or higher causes maus to produce more and more information 
# at stdout. In general v=1 is good value to start (default: v=0).

# Option SIGNAL must point to a readable media file that helper AUDIOENHANCE can process.

# The option 'allowresamp=yes' will cause maus to automatically resample
# input signal files to 16kHz sampling rate using sox (polyphase).
# For this to work, sox must be installed on your system. Note that all kind 
# of re-sampling might detoriate your signal.

# WebService: in the webservice 'allowresamp=yes' is always set.

# If option BPF is given, maus will try to read the canonical pronunciation
# either from a BAS Partitur Format (BPF) file (tier KAN), file extension 
# *.par|PAR, or from a spead sheet CSV table (*.csv|CSV). The CSV table has 
# the format: two ';'-separated columns, column 1: orthography of a word in UTF-8,
# column 2: canonical pronunciation of the word in SAMPA with or without 
# separating blanks between phonetic symbols. Optional pauses will be inserted
# between words automatically. Note that the SAMPA symbols have to match the
# set of symbols in PARAM/KANINVENTAR. 
# If you want to model additional optional silence intervals within words, use
# the model name '<p:>', e.g.:
# KAN:  0  hOY<p:>t@
# KAN:  1  ?Ist
# If your BPF input files must contain 'silence words' (= words that consist only of a silence
# symbol) use the '<p>' (enforced silence interval) symbol. E.g.:
# KAN:  0  hOYt@
# KAN:  1  <p>
# KAN:  2  ?Ist
# If your BPF input file contains noise markers (e.g. 'door slam') use the 
# '<nib>' symbol (= non-speech interval), e.g.:
# KAN:  0  hOYt@
# KAN:  1  <nib>
# KAN:  2  ?Ist
# If your BPF input file contains human noise (e.g. 'cough') or garbage speech use the 
# '<usb>' symbol (= human noise interval), e.g.:
# KAN:  0  hOYt@
# KAN:  1  <usb>
# KAN:  2  ?Ist

# Option BPFTHRESHOLD=prevalThreshold
# Since the processing time of the Viterbi increases quadratically with input length, we do 
# a prevalidation of the input whether it makes sense to process the data. If the number of 
# words (KAN tier lines) in the BPF input file (which roughly corresponds to the length of input) exceeds 
# the number given in this option, the maus script issues an error and terminates with exit code 2
# Do not change this number unless you know exactly what you are doing.

# The option CANONLY=yes causes maus to skip the creation of the statistical
# pronunciation graph and simply align the given string of phonemic symbols.
# This maybe be for instance useful to align a manually created transcript or
# to avoid erroneous variations in noisy or out-of-domain speech or when using
# maus on a language where no pronunciation rule set is avaliable.
# The script maus will not call the binary word_var-2.0 when CANONLY=yes;
# this might help on platforms where the C++ program does not compile.
# This option is deprecated from version 2.87 upwards; use the new option 
# MODUS=align instead.

# If the option CLEAN=1 is given (default), the script will erases all cached 
# slf, rec, htk and mlf files from TEMP at the end of the script. This 
# is highly recommended unless you know exactly what you are doing and need
# to safe computing time.

# The option INSPROB influences the probability of deletion of segments.
# It is a constant factor (a constant value added to the log likelihood score)
# after each segment. Therefore, a higher value of INSPROB will cause
# the probability of segmentations with more segments go up, thus decreasing
# the probability of deletions (and increasing the probability of insertions,
# which are rarely modelled in the rule set). This parameter has been
# evaluated on parts of the German Verbmobil data set (27425 segments)
# which were segmented and labelled manually (MAUS DEV set) and found to have
# its optimum at 0.0 (which is nice). Therefore we set the default value
# of INSPROB to 0.0
# INSPROB was also tested against the MAUS TEST set to confirm the value of
# 0.0. It had an optimum at 0.0 as well.
# Note that this might NOT be the optimal value for other MAUS tasks.

# Option INSORTTEXTGRID
# If set to 'yes' and option OUTFORMAT is set to 'TextGrid' and input
# is read from a BPF|CSV, maus will try to identify an ORT tier and
# write an additional interval section into the TextGrid file
# containing the word segmentation based on the underlying MAUS
# segmentation. The tier is called either 'ORT';
# it contains non-labeled segments where MAUS labelled
# a silence interval and a segment labelled with the
# orthography for the words. 

# Option INSKANTEXTGRID
# Same as INSORTTEXTGRID with the KAN tier. If INSORTTEXTGRID is set as well
# both tiers are exported after another before the phonemic tier. If the 
# source BPF does not contain an ORT tier, only one KAN tier is being exported
# Both options INSORTTEXTGRID and INSKANTEXTGRID will have no effect if the 
# canonic transcript is read from the option KANSTR instead from a BPF|CSV file
# (option BPF).

# Option INSYMBOL defines the phonological encoding in the input file; default
# is 'sampa', alternate is 'ipa'; note that only IPA symbols are recognized that 
# correspond to the SAMPA symbols of the selected language (option LANGUAGE); if the
# input file contains other IPA symbols or combinations thereof, an error is issued.
# IPA input does only work, if the IPA symbols are blank separated in the KAN tier.
# Aside from the IPA symbols defined in $PARAM/KANINVENTAR.inv (3rd column) the 
# following symbols are allowed in the input (but ignored): ˈ ˌ . '

# If option KANSTR is given, maus will use the string of phonemic symbols
# given from command line. Each symbol is separated by the next by a 
# single blank. Note that these symbols have to match the 
# set of symbols in PARAM/KANINVENTAR. Also note that no optional pauses
# will be inserted between words. You have to do that explicitely by 
# inserting the symbol '#'(optional) or '<p>' (non-optional). If you want to force MAUS to model
# a silence interval at a certain point, insert the symbol '<p>'.
# Do not use the symbol '#' as the first or last symbol in KANSTR!
# If option KANSTR is used the option INSORTTEXTGRID and INSKANTEXTGRID will
# have no effect.

# WebService: KANSTR is not supported by the MAUS Webservices. Use the KAN
# tier in the input BPF files instead.

# The option LANGUAGE=rfc5646 can be used instead of PARAM to set the language.
# If set, LANGUAGE will override option PARAM. Currently supported are:
# afr-ZA
# aus-AU        (= Australian Aboriginal Languages)
# cat,cat-ES
# deu,deu-DE
# ekk,ekk-EE
# eng,eng-GB
# eng-AU
# eng-NZ
# eng-SC
# eng-US
# eus-ES,eus-FR
# far-IR
# fin,fin-FI
# fra,fra-FR
# gsw-CH
# hun,hun-HU
# isl-IS
# ita,ita-IT
# jpn,jpn-JP
# kat,kat-GE
# ltz-LU
# mlt,mlt-MT
# nld,nld-NL,nld-BE
# nor,nor-NO
# pol,pol-PL
# por,por-PT
# ron,ron-RO
# rus,rus-RU
# spa,spa-ES
# sqi,sqi-AL
# swe,swe-SE
# tha-TH
# and 'und' and the non-standard code 'sampa' to denote the language independent MAUS mode 
# (then basically all known SAM-PA symbols are allowed in the 
# input BPF KAN tier but they must be blank separated, e.g. instead of 
# 'hOYt@' -> 'h OY t @'). 
# SAMPA symbol 'P' and X-SAMPA diacriticum 'palatalized' e.g. /k'/ are not 
# supported; for palatalisation use /k_j/.

# Option MAUSSHIFT causes the calculated MAUS segment boundaries to be 
# shift by MAUSSHIFT msec. (default : 0)
# As reported by different sources, segmentations produced by a HMM model
# show s consistant shift in the segment boundaries of about 8-12msec too early 
# (Probably due to average systematic shifts in the training material, see the BA thesis 
# of Bernhard Jackl 2016).

# The option MINPAUSLEN controls the behaviour of optional inter-word silence.
# Inter-word silence is modelled by the symbols '#', '&' (only for 
# compatibility) and '<p:>' (which may denote a word-internal silence
# interval), not by '<p>' which models explicit silence intervals.
# If set to 1, maus will detect all inter-word silence intervals that can be
# found (minimum length for a silence interval is then 1 unit of TARGETRATE (default: 10msec). 
# If set to values > 1, 
# the minimum length for an inter-word silence interval to be detected 
# is set to n*TARGETRATE. For example MINPAUSLEN=5 will cause MAUS to 
# suppress inter-word silence intervals up to a length of 40msec (with TARGETRATE=100000).
# Since 40 msec seems to be the border of perceivable silence, we set 
# this option default to 5. With other words: inter-word silences smaller
# than 50msec are not segmented but rather distributed equally to the adjacent segments.
# (If you change the framerate via TARGETRATE option, you
# might consider increasing MINPAUSLEN but usually this is not necessary).
# If one of the adjacent segments happens to be a plosive (set of plosives
# defined in PARAM/PLOSIVES) then the deleted silence interval is added totally
# to the plosive; if both adjacent segments are plosives, the interval is
# equally spread as with non-plosive adjacent segments.
# (Here we assume that the frame length is always 10msec!)
# If you want to force MAUS to insert am silence interval at a certain point
# use the symbol '<p>'.

# The option MMF may be used to superseed the default HMM macro file in $PARAM/MMF.mmf.
# MMF must contain a HMM definition for each phonetic symbol defined in the second 
# column of the $PARAM/DICT file. HMM definitions must have a minimum number of 3 states
# (NUMSTATES >= 5); otherwise option RELAXMINDUR=TRUE will cause an error.

# Option MODUS - macro option
# Operation modus. Default is 'standard' which denotes the normal
# MAUS technique as published in Schiel ICPhS 1999.
# If set to 'align', MAUS performs the in 'standard', but doesn't
# model pronunciation, i.e. it aligns only to the given pronunciation
# using 'forced alignment' technique. This option corresponds to
# the deprecated former option CANONLY=true.
# If set to 'bigram', MAUS will run a phone recognition on the
# phone classes as defined in the first column of the dictionary
# file defined in DICTBIGRAM constrained only by a phone bigram
# (HTK lattice) as defined in LATBIGRAM.
# If set to 'bigram', the following options
# are set default values overriding command line options:
# INSORTTEXTGRID = false
# INSKANTEXTGRID = false
#   and the following options are being ignored: BPF, KANSTR,
# INFORMAT,CANONLY,RULESET,INSPROB,STARTWORD,ENDWORD,USETRN=true
#   and the option OUTFORMAT=emu|EMU|legacyEMU|mau-append|emuR will cause an error.

# Option MULTITHREADING (default true)
# If set to false, multi-threading in helper maus.trn is switched off

# Option NOINITIALFINALSILENCE=no
# If set to 'yes', the maus script will suppress the automatic modelling 
# of optional initial/final silence intervals; this can be useful if the recording
# starts with a stop and you don't want MAUS to place a silence interval '<p:>' before
# the stop (instead of the pause).

# Deprecated Option OUTIPA, superseeded by option 'OUTSYMBOL'.
# Use OUTSYMBOL=ipa in the future.
# If set to 'true', maus replaces all SAMPA-Symbols in the output 
# files by the corresponding IPS coded in UTF-8. 
# OUTFORMAT=TextGrid should work in most cases; the effect in Emu 
# is unknown; silence intervals, human noise and other noise are coded
# as (...), (..) and (.) respectively. 
# Requires a mapping table defined in IPATABLE with SAMPA in the 1st
# and IPA in the 3rd column.

# If the option OUT for the output is not given, maus will create an
# output file named with the body of the signal file and 
# extension 'mau', 'csv', 'par' or 'TextGrid' in the same 
# location as the signal file (provided the user is allowed to write there).
# Setting OUT=file will write the results into 'file' regardless the 
# setting of OUTFORMAT, except if option OUTFORMAT=emu|EMU|legacyEMU, the resulting two legacy Emu
# files (*.hlb, *.phonetic) or *.EMU will be written to the path of OUT, but named
# as the signal file.
# The output *.mau is not a valid BPF file because it does not contain a 
# header. But it can be appended to the corresponding input BPF file and should match
# the other tiers exactly. To create a valid BPF containing the MAU tier
# use OUTFORMAT=mau-append|par|bpf (works only with BPF|CSV input!).

# Option OUTFORMAT (default: 'mau')
# Default output is a file with extension 'mau' that contains a three-column table
# (tab-separated): 'MAU:' (1st col), word index staring with 0 (2nd col) and the 
# phonetic symbol label (3rd col, coding dependent of option OUTSYMBOL). This is 
# a BPF compatible tier that can be appended to a BPF annotation file. 
# If set to 'csv' a CSV spreadsheet table is produced by the helper 'mausbpf2csv' and the 
# file extension is set to 'csv' to ease loading of the result table into spread 
# sheet processing software.
# If set to 'TextGrid', maus will produce a praat TextGrid
# file instead of the MAU tier file (if the input contains LaTeX-Umlauts in the 
# ORT tier they are converted to UTF-8, since praat cannot handle LaTeX label names). 
# Since TextGrid files contain explicit times, the sampling rate problem is 
# irrelevant in this case. See also options INSORTTEXTGRID, INSKANTEXTGRID.
# Options START/ENDWORD do not work with TextGrid output.
# If set to 'emu', maus will create legacy Emu compatible hierarchy and segmentation
# files, *.hlb and *.phonetic in the path of OUT and named as the input signal
# file (if OUT not given in the location of the signal file). This works only
# if input is read from a BPF file (option BPF=) and the input file contains
# an ORT tier and an KAN tier.
# The resulting Emu hierarchy consists of 3 levels (bundle,ORT,MAU)
# and two additional labels (bundle -> source, ORT -> KAN) and only
# one type of label file (*.phonetic) for level MAU.
# WARNING: for the creation of emu output maus will add a MAU tier to the 
# input BPF, if the input BPF is in the same location as the signal file (SIGNAL=).
# A already existing MAU tier is overwritten without warning; if you want to
# prevent this, place the input BPF in a different directory than the signal file.
# WARNING: Legacy Emu does not tolerate some SAM-PA labels such as curly brackets ({}).
# If you are using a language containing such symbols, you must replace these
# labels by Emu-comform symbols before using the maus-created Emu files.
# If the option OUTFORMAT is set to 'EMU|legacyEMU', maus will do the same as emu, but store
# both legacy Emu files into a single file *.EMU separated by a line '--- cut here ---'.
# Options START/ENDWORD do not work with Emu output.
# If the option OUTFORMAT is set to 'emuR|emuDB', an Emu compatible *_annot.json file is 
# created. The same remarks as to OUTFORMAT='emu' apply here, except that all SAM-PA 
# labels are tolerated here.
# If the option OUTFORMAT is set to 'mau-append' or 'par' or 'bpf', the MAU tier is added to the 
# BPF input file (an existing MAU is being replaced) or a BPF is created from a CSV input file.
# This works only with BPF|CSV input, not with KANSTR input.
# Further output formats are accepted if ANNOTCONV can handle them; to check which formats
# are supported by ANNOTCONV call 'annotConv --listOutFormats'

# Option OUTSYMBOL=sampa
# Encoding of phonetic symbol output in all file formats;
# requires a mapping table defined in IPATABLE.
# if set to 'ipa', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA coded in UTF-8. Note that BPF tiers
# MAU (*.mau, option OUTFORMAT=mau) are not conform BPF standard then;
# OUTFORMAT=TextGrid should work in most cases; the effect in legacy Emu
# is unknown; silence intervals, human noise and other noise are coded
# as (...), (..) and (.) respectively;
# if set to 'manner', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA manner class (line of IPA consonant chart) +
# 'vowel' + 'diphthong';
# if set to 'place', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA place of articulation (col of IPA consonant)
# chart + 'front', 'mid and 'back' for vowels/diphthongs.

# Option PRESEG
# If set to "true", maus will use the helper WAV2TRN to perform a
# pre-segmentation; if WAV2TRN is not installed on the system,
# a warning is issued and the option is ignored; a TRN tier in the BPF
# input is being ignored. If this option is set in combination with USETRN=true
# and the input BPF contains a chunk segmentation (tier TRN), then the
# presegmentation is carried out for every single chunk.

# Option PRINTINV
# If this option is set to 'yes', maus will not process any given input
# but merely print the set of allowed SAM-PA input symbols as a simple
# UTF-8 table to the output file OUT or to SAMPA.tab if OUT is not given. 
# This is useful, if you are not sure, which symbols are allowed in the 
# selected language, especially in SAMPA mode (see options PARAM or LANGUAGE).
# The UTF-8 table has the following columns:
# MAUS:           (X-)SAMPA symbols as supported by MAUS (KAN tier input)
# SAMPA:          Original (X-)SAMPA symbol
# IPA:            IPA symbo
# PHONETICS:      phonetic description (if applicable)
# EXAMPLES:       list of orthographic examples (if applicable), usually in the
#                 form 'Language1 word1, Language2 word2, ...' or just a 
#                 language name in parantheses '(Yongning Na)'
# ISO639-3:       Internal Iso 639-3 code of the SAMPA set if exclusively
#                 found in this set; 'xxx' = used in multiple languages

# The option PARAM determines another directory containing the parameter
# files of maus (default is $SOURCE/PARAM). By this another ruleset 
# together with all adapted inventory lists, HMM etc. can be chosen.
# Typically this done to select another operating language for maus.
# Note that also a mapping script par2mlf.awk is
# part of PARAM, since the mappings to the internal phoneme
# set GRAPHINVENTAR can be dependend of the set.
# Option LANGUAGE overrides this option.

# Option RELAXMINDUR=false
# default maus has a minimum duration of phone segments of 3 frames (= default 30msec)
# or 40msec for long/tense vowel or diphtongs or 60msec for affricates. This makes sense to achieve robust results
# with a minimum of falsely inserted short segmenst. In some cases (duration of very short
# consonants) this causes a 'ceiling effect' at 30msec in the analysed data. If this option
# is set to 'TRUE', this lower minumum duration bound is relaxed to 1 unit of TARGETRATE 
# (default: 10msec) and 2 units respectively.
# Using this option hurts MAUS accuracy; so please use it just as a last measure;
# (to decrease the TARGETRATE to achieve a similar effect is an alternative but also might hurt
# MAUS accuracy.)
# Technically RELAXMINDUR is done on-the-fly by converting the loaded HMM set in MMF 
# by adding a 0.05 transition from 
# (virtual) state 2 to 5 (= real states 1 to EXIT) allowing HMMs with 3 states to exit from the 
# first state and HMM with 4 states to jump from the first to the forth state.
# The file defined in RELAXMINDURHED must be a HHEd script with the single command 
# AT 2 5 0.05 { *.transP }

# Option RELAXMINDURTHREE=false
# alternative to RELAXMINDUR: all models are set to a minimum duration of 3 states (= 30msec).
# setting both options causes an ERROR.

# Option RULESET
# Via this option the default statistical rule set 'rml-0.95.rul' for which maus is
# looking in PARAM can be overwritten. If the filename starts with
# an '/' maus does not look for that file in PARAM but rather loads 
# it directly (absolute path!). Otherwise maus will try to locate 
# the file in the given PARAM dir.
# Please note that the file name extension must be either of '.rul' 
# (denoting a statistical rule set) or '.nrul' (denoting a non-statistical
# rule set. Other extensions are not accepted.
# Please keep in mind that the phonetic symbols used in the rule set you 
# use must in any case match the inventar of symbols defined in 
# PARAM/GRAPHINVENTAR (numerical symbols preceeded by 'P', e.g. 'P6') and that
# no non-ASCII characters are used (e.g. an UTF-8 hyphen). 
# For details on how to formulate new rule sets, see the docu file USAGE.
# If RULESET points to a file named 'dummy.rul', the script will assume that no 
# valid rule set exists for the selected language and will force the option 
# MODUS=align (thus performing a simple forced alignment in a any case). 
# The latter will be indicated by a WARNING message.

# The options STARTWORD and ENDWORD control which words from the input
# BPF file are selected for the canonical input (only for mau format output). 
# Default STARTWORD is 0, ENDWORD is 999999 (all words). 
# These options are useful, if the BPF file
# contains not only one utterance but a whole dialogue, whereas the 
# input sound file contains only a part of it (e.g. from a chunk segmentation).
# The timing information in the resulting segmentation is of course in 
# relation to the input signal file.
# Note that the word numbering is passed through to the BPF output (mau,csv,par),
# if selected. Therefore *.mau files (OUTFORMAT=mau) or *.par files
# (OUTFORMAT=mau-append|par|bpf) are still conform to the source BPF file.
# These options have no effect if the option BPF is not used; if the 
# option OUTFORMAT is set to emu|EMU|legacyEMU|emuR|emuDB, the script terminates with an error;
# if set to TextGrid, options INSORTTEXTGRID/INSKANTEXTGRID are disabled.
# In combination with option USETRN and a TRN entry in the input BPF, these
# options may be used to perform partial segmentations in larger recordings.
# See the script maus.trn in the maus distribution package for an example.

# Option TARGETRATE=100000 
# frame rate of the HTK front-end measured in 100nsec units (10000 = 1msec)
# the segmental results of MAUS are quantizised by this amount, i.e. a
# TARGETRATE of 100000 results in segment boundaries that are placed on multiples
# of 10msecs. The allowed values of TARGETRATE are 10000,20000,100000 (1, 2, 10msec);
# decreasing TARGETRATE *does not* increase accuracy, merely the quantization
# is more fine grained, but not more accurate; TARGETRATE does not influence
# MAUSSHIFT (see details above), but it does influence the segmentation accuracy as
# does the use of options RELAXMINDUR/RELAXMINDURTHREE (see details above);
# when MAUS is used to obtain quantitative
# duration measurements, it might make sense to decrease TARGETRATE; however, a decrease
# of TARGETRATE increases computation time (because more frames have to be
# processed in the same amount of signal length) and degrades the accuracy.

# Option USETRN (works only with BPF=*.par|PAR)
# If set to 'yes' maus will search the input BPF for a TRN tier
# that segments the utterance within the recording. If the input 
# BPF contains no TRN tier, a warning is issued and processing resumes
# as usual. If only one TRN entry is found, maus will cut
# out the segment defined there and run the MAUS segmentation only within the
# cut out segment. Afterwards the offset and final cut off are
# re-calculated into the final result file. Note that word links 
# of the TRN entry are ignored, that is the script will segment all words 
# given in the KAN tier within the time segment defined by the TRN entry. 
# (this functionality is kept for backwards compatibility, because that is the 
# way maus handled TRN input before verion 2.50; if you want to contain the 
# maus segmentation not only to a time segment but also to a certain span of
# words in the input BPF, use the options STARTWORD/ENDWORD.)
# If more than one TRN entry is found, maus assumes that a chunk segmentation
# is available for the input recording 
# (see http://www.bas.uni-muenchen.de/Bas/BasFormatseng.html#TRN for details).
# The script will then call the helper maus.trn (must be installed in the 
# same location as this script) to segment each chunk separately and aggregates the 
# results into the results file.
# Chunk segmentations may contain overlapping chunks which cause problems when
# aggregating the segmentation results of the individual chunks into one 
# TextGrid or Emu result file (since these formats do not support intervals
# with negative times, while BPF does). Therefore if the option OUTFORMAT
# is set to TextGrid, the script will check the input for overlaps. If there
# are overlaps, the script will issue an error message and terminate. If 
# OUTFORMAT is set to emu|EMU|legacyEMU|emuR|emuDB, the script will terminate 
# with an error (not supported yet).
# The following option value 'force' is deprecated in 4.11, user PRESEG=true instead : 
# If set to 'force' maus will search for a helper WAV2TRN to run an online
# pre-segmentation ignoring any TRN entries in the input BPF. If the helper
# is not found, maus will issue a warning and proceed without any pre-segmentation.
# Default is set to 'no'. If no BPF is given on the command line or for CSV input 
# this option is ignored.

# The option WEIGHT weights the influence of the statistical
# MAUS model against the acoustical scores. More
# precisely WEIGHT is multiplied to the MAUS model
# score (log likelihood) before adding the score to 
# the acoustical score within the search. Since the MAUS model in most cases
# will favour the canonical pronunciation, increasing WEIGHT will at some 
# point cause MAUS to choose the canonical pronunciation; lower values of 
# WEIGHT will cause more less probable paths be selected according acoustic evidence.
# If the acoustic quality of the signal is very good and the HMMs of the language
# are well trained, it makes sense to lower WEIGHT.
# In an evaluation on parts of the German Verbmobil data set (27425 segments)
# which were segmented and labelled manually (MAUS DEV set) this
# heuristical parameter was optimized to 7.0 (which is rather similar to
# the optimal value used in ASR which is 6.5). Therefore the default value
# for WEIGHT is set to 7.0
# WEIGHT was also tested against the MAUS TEST set to confirm the value of
# 7.0. The symmetric inter-labeller agreement for 7.0 was 79.47, for 8.0
# 79.31, for 6.0 79.51 and for 5.0 79.47 and dropping.
# The differences between these
# three values are not significant; therefore we can say that the value of
# 7.0 holds for other data sets.
# Note that this might NOT be the optimal value for other MAUS tasks. For instance 
# for Italian we found that WEIGHT=1.0 yields better results (tahts why the 
# WEIGHT is default set to 1.0 for LANGUAGE=ita)

# Exit codes

# 0 : everything seems ok
# 1 : serious error
# 2 : problem with the input signal file
# 3 : printing allowed SAM-PA set to output file
# 4 : main arguments missing, printing help message to stdout
# 5 : a helper program reports serious error

ENDE
  exit 4
endif  

if ( $v == 1 ) set TRACE = 0
if ( $v == 2 ) set TRACE = 1
if ( $v == 3 ) set TRACE = 3
if ( $v == 4 ) set TRACE = 7

if ( $v > 0 ) echo "DEBUG: ${0:t} : Starting on `date`"
if ( ! -d $TEMP ) then 
  echo "ERROR: ${0:t} : cannot find temporary dir $TEMP - exiting" >> /dev/stderr
  echo "       please create such a dir and define it to maus" >> /dev/stderr
  echo "       using the option 'TEMP=...'" >> /dev/stderr
  exit 1
endif  

set PID = $$_`date "+%s"`_

# check parameter set
# normalize some OUTFORMAT values
if ( $OUTFORMAT == "emuR" ) set OUTFORMAT = "emuDB"
if ( $OUTFORMAT == "bpf" || $OUTFORMAT == "BPF" || $OUTFORMAT == "mau-append" || $OUTFORMAT == "PAR" ) set OUTFORMAT = "par"
if ( $OUTFORMAT == "csv" || $OUTFORMAT == "CSV" ) set OUTFORMAT="csv"
if ( $OUTFORMAT == "textgrid" || $OUTFORMAT == "tg" || $OUTFORMAT == "TG" ) set OUTFORMAT="TextGrid"
# check availability of output conversion
if ( $OUTFORMAT != "mau" && $OUTFORMAT != "par" && $OUTFORMAT != "emu" && $OUTFORMAT != "EMU" && $OUTFORMAT != "legacyEMU" && $OUTFORMAT != "emuDB" && $OUTFORMAT != "csv" && $OUTFORMAT != "TextGrid" ) then 
  if ( -x $ANNOTCONV ) then
    # check for ANNOTCONV formats
    $ANNOTCONV --listOutFormats | grep -q "$OUTFORMAT"
    if ( $status != 0 ) then
      echo "ERROR: ${0:t} : unsupported output format OUTFORMAT = $OUTFORMAT - exiting" >> /dev/stderr
      exit 1
    endif
  else
    echo "ERROR: ${0:t} : unsupported output format OUTFORMAT = $OUTFORMAT - exiting" >> /dev/stderr
    exit 1
  endif
endif
if ( $INFORMAT == "bpf-sampa" ) then 
  set LANGUAGE = "sampa"
else if ( $INFORMAT != "bpf" ) then
  echo "ERROR: ${0:t} : unknown option INFORMAT = $INFORMAT - exiting" >> /dev/stderr
  exit 1
endif
if ( "$LANGUAGE" == "und" ) set LANGUAGE = "sampa"
# LANGUAGE: rfc5646 language codes, singular iso639-3 codes (just
# for backwards compatibility) and 'sampa' 
if ( "$LANGUAGE" != "" ) set PARAM = "$SOURCE/PARAM.${LANGUAGE}"
if ( $v > 0 && ( "$LANGUAGE" == "sampa" || "$PARAM" == "$SOURCE/PARAM.SAMPA" ) ) echo "DEBUG: ${0:t} : Using special SAMPA mode; expect input KAN strings to be blank-separated"
if ( ! -d $PARAM ) then 
  echo "ERROR: ${0:t} : Unknown language code $LANGUAGE or PARAM dir $PARAM not found - exiting" >> /dev/stderr
  echo "       In case you are using LANGUAGE codes 'aus','nze': these have been deprecated with maus version 2.141" >> /dev/stderr
  exit 1
endif
if ( $v > 0 ) echo "DEBUG: ${0:t} : Found parameter set $PARAM for LANGUAGE $LANGUAGE"

# check for language specific defaults
if ( -e $PARAM/DEFAULTS ) source $PARAM/DEFAULTS

# check and set language specific parameters
if ( $WEIGHT == "default" ) then 
  if ( ${?DEFWEIGHT} == 1 ) then 
    set WEIGHT = $DEFWEIGHT
  else 
    set WEIGHT = 1.0              # global default 
  endif
endif
if ( $MAUSSHIFT == "default" ) then 
  if ( ${?DEFMAUSSHIFT} == 1 ) then 
    set MAUSSHIFT = $DEFMAUSSHIFT
  else 
    set MAUSSHIFT = 0              # global default 
  endif
endif
if ( "$RULESET" == "" ) then 
  #echo "WARNING: ${0:t} : option RULESET is empty string - ignoring" >> /dev/stderr
  set RULESET = "default"
endif
if ( "$RULESET" == "default" ) then 
  if ( ${?DEFRULESET} == 1 ) then 
    set RULESET = $DEFRULESET
  else 
    set RULESET = "rml-0.95.rul"   # global default 
  endif
endif

# setting macro options
if ( $MODUS == 'standard' ) then 
  if ( $CANONLY == "TRUE" ) then 
    echo "WARNING: ${0:t} : conflicting options MODUS = $MODUS and CANONLY (deprecated) = $CANONLY" >> /dev/stderr
    echo "         Option CANONLY is deprecated - use option MODUS=align in the future" >> /dev/stderr
    echo "         Switching to forced alignment modus - rule set is being ignored" >> /dev/stderr
    set MODUS = "align"
  else
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Using standard modus"
  endif
else if ( $MODUS == 'align' ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Switching to forced alignment modus - rule set is being ignored"
else if ( $MODUS == 'bigram' ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Switching to bigram modus" 
  if ( $CANONLY == "TRUE" ) then 
    echo "WARNING: ${0:t} : conflicting options MODUS = $MODUS and CANONLY (deprecated) = $CANONLY" >> /dev/stderr
    echo "         Option CANONLY is deprecated - use option MODUS=align in the future" >> /dev/stderr
    echo "         Ignoring option CANONLY" >> /dev/stderr
  endif
  if ( $USETRN == 'TRUE' ) then
    set USETRN = 'FALSE'
    echo "WARNING: ${0:t} : option USETRN set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  if ( $INSORTTEXTGRID == 'TRUE' ) then
    set INSORTTEXTGRID = 'FALSE'
    #echo "WARNING: ${0:t} : option INSORTTEXTGRID set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  if ( $INSKANTEXTGRID == 'TRUE' ) then
    set INSKANTEXTGRID = 'FALSE'
    #echo "WARNING: ${0:t} : option INSKANTEXTGRID set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  set KANSTR = ''
  if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then 
    echo "ERROR: ${0:t} : cannot run in bigram modus with emu|EMU|legacyEMU output format - exiting" >> /dev/stderr
    exit 1
  endif
else
  echo "ERROR: ${0:t} : unknown modus selected MODUS = $MODUS - exiting" >> /dev/stderr
  exit 1
endif

set IPATABLE = $PARAM/KANINVENTAR.inv
                      # table that maps all known MAUS SAMPA symbols (1st column) to the corresponding
		      # IPA symbol (sequence) coded in UTF-8 (3rd column), the manner class (7th col),
		      # the place of articulation (8th col).

set HMMINVENTAR = $PARAM/$HMMINVENTAR
if ( ! -e  $HMMINVENTAR ) then 
  echo ERROR: ${0:t} : cannot find HMMINVENTAR $HMMINVENTAR
  exit 1
endif  
# check for HMM, transform HMM to a temporary HMM with additional transitions if RELAXMINDUR* is set
if ( $MMF == "" ) set MMF = $PARAM/MMF.mmf
if ( ! -e  $MMF ) then 
  echo ERROR: ${0:t} : cannot find MMF $MMF
  exit 1
endif  
if ( $RELAXMINDUR == "TRUE" && $RELAXMINDURTHREE == "TRUE" ) then
  echo "ERROR: ${0:t} : you can set either 'Relax Min Duration' (RELAXMINDUR) or 'Relax Min Duration Three' (RELAXMINDURTHREE) but not both - exiting" >> /dev/stderr
  exit 1
endif  
if ( $RELAXMINDUR == "TRUE" ) then
  if ( ! -e $RELAXMINDURHED ) then 
    echo "WARNING: ${0:t} : cannot find helper script $RELAXMINDURHED for option RELAXMINDUR - ignoring RELAXMINDUR" >> /dev/stderr
  else
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Option RELAXMINDUR: convert all HMM to allow minimum duration segments (1 state and 2 state)" 
    HHEd -H $MMF -w $TEMP/${PID}.MMF.mmf $RELAXMINDURHED $HMMINVENTAR
    set hhedstatus = $status
    if ( $hhedstatus != 0 ) then
      echo "WARNING: ${0:t} : error $hhedstatus occurred in helper HHEd when adding transitions to HMM - using unchanged HMM - option RELAXMINDUR ignored" >> /dev/stderr
    else
      set MMF = $TEMP/${PID}.MMF.mmf 
    endif
  endif
endif
if ( $RELAXMINDURTHREE == "TRUE" ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Option RELAXMINDURTHREE: converting all HMM to minimum duration of 3 states (= 30msec with standard frame rate)" 
  awk -f $RELAXMINDURTHREEAWK "$MMF" >! $TEMP/${PID}.MMF.mmf
  if ( $status != 0 ) then
      echo "WARNING: ${0:t} : conversion of HMM into minimum duration of 3 states (RELAXMINDURTHREE=true) failed - using unchanged HMM - option RELAXMINDURTHREE ignored" >> /dev/stderr
  else
    set MMF = $TEMP/${PID}.MMF.mmf
  endif
endif

set DICT = $PARAM/$DICT
if ( ! -e  $DICT ) then 
  echo "ERROR: ${0:t} : cannot find DICT $DICT" >> /dev/stderr
  exit 1
endif  
set HVITECONF = $PARAM/$HVITECONF
if ( ! -e  $HVITECONF ) then 
  echo "ERROR: ${0:t} : cannot find HVITECONF $HVITECONF" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $PRECONFIG ) then 
  echo "ERROR: ${0:t} : cannot find PRECONFIG file $PRECONFIG" >> /dev/stderr
  exit 1
endif  
set KANINVENTAR = $PARAM/$KANINVENTAR
if ( ! -e  $KANINVENTAR ) then 
  echo "ERROR: ${0:t} : cannot find KANINVENTAR $KANINVENTAR" >> /dev/stderr
  exit 1
endif  
# output list of allowed input SAM-PA symbols
if ( $PRINTINV == 'TRUE' ) then 
  if ( $OUT == "" ) set OUT = SAMPA.tab
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Printing list of allowed SAM-PA symbols to $OUT"
  touch $OUT
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write to output file $OUT - exiting" >> /dev/stderr
    exit 1
  endif
  if ( ! -e ${KANINVENTAR}.inv ) then
    echo "WARNING: ${0:t} : cannot find SAMPA table ${KANINVENTAR}.inv" >> /dev/stderr
  else
    cat ${KANINVENTAR}.inv >! $OUT
  endif
  exit 3
endif
set GRAPHINVENTAR = $PARAM/$GRAPHINVENTAR
if ( ! -e  $GRAPHINVENTAR ) then 
  echo "ERROR: ${0:t} : cannot find GRAPHINVENTAR $GRAPHINVENTAR" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $SOURCE/kan2mlf.awk ) then 
  echo "ERROR: ${0:t} : cannot find script kan2mlf.awk in $SOURCE" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $SOURCE/rec2mau.awk ) then 
  echo "ERROR: ${0:t} : cannot find script rec2mau.awk in $SOURCE" >> /dev/stderr
  exit 1
endif  
if ( ( $OUTFORMAT == "par" ) && ( $BPF == "" ) && ( $MODUS != "bigram" ) ) then
  echo "ERROR: ${0:t} : option OUTFORMAT=mau-append|par|bpf works only with BPF|CSV input not with KANSTR input - exiting" >> /dev/stderr
  exit 1
endif  
# check input conversion helper
if ( ! -x $AUDIOENHANCE && $USEAUDIOENHANCE == TRUE ) then 
  echo "ERROR: $0:t : cannot find executable service $AUDIOENHANCE - exiting" >> /dev/stderr
  exit 1
endif
# check output conversion helpers; single helpers are kept for fall back, but usually
# output conversion should now (5.33) be handled by ANNOTCONV
if ( ! -x $ANNOTCONV ) then
  echo "WARNING: ${0:t} : cannot find output coverter $ANNOTCONV - try to fall back to older converters" >> /dev/stderr
  if ( $OUTFORMAT == "TextGrid" && ( ! -x $PAR2TEXTGRID ) ) then 
    echo "ERROR: ${0:t} : cannot find script $PAR2TEXTGRID required for praat compatible output" >> /dev/stderr
    exit 1
  endif  
  if ( ( $OUTFORMAT == "csv" || $OUTFORMAT == "CSV" ) && ( ! -x $MAUSBPF2CSV ) ) then 
    echo "ERROR: ${0:t} : cannot find script $MAUSBPF2CSV required for csv (spreadsheet) output" >> /dev/stderr
    exit 1
  endif  
  if ( ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) && ( ! -x $PAR2EMU ) ) then 
    echo "ERROR: ${0:t} : cannot find script $PAR2EMU required for legacy Emu output (*.hlb, *.phonetic)" >> /dev/stderr
    exit 1
  endif  
  if ( ( $OUTFORMAT == "emuDB" ) && ( ! -x $PAR2EMUR ) ) then 
    echo "ERROR: ${0:t} : cannot find script $PAR2EMUR required for Emu output (*_annot.json)" >> /dev/stderr
    exit 1
  endif  
endif
if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuDB" ) then 
  if ( $BPF == "" && $MODUS != "bigram" ) then
    echo "ERROR: ${0:t} : cannot create Emu output without BPF input - use option BPF" >> /dev/stderr
    echo "       BPF file input must contain the tiers ORT and KAN" >> /dev/stderr
    exit 1
  endif
endif  
# find rule sets/bigram data etc. for the different operation modi
if ( $MODUS == "align" || $MODUS == "ALIGN" ) then 
  if ( $v > 0 ) then
    echo "DEBUG: ${0:t} : Doing forced alignment to the phonological input"
    echo "      helper word_var-2.0 is not called; rule set ignored."
  endif  
  set MODUS = "align"
else if ( $MODUS == "standard" ) then  
  if ( ! -e "$RULESET" ) then 
    if ( `echo "$RULESET" | sed 's/^\(.\).*$/\1/'` != '/' ) then 
      # no absolute path: try to find RULESET in PARAM
      #set RULESET = `/bin/ls "$PARAM/${RULESET}"`
      if ( ! -e "$PARAM/$RULESET" ) then 
        echo "ERROR: ${0:t} : cannot find RULESET $RULESET (not even in ${PARAM})" >> /dev/stderr
	echo "       maybe the language selected does not have a rule set yet" >> /dev/stderr
	echo "       try using the option MODUS=align to work without rule set" >> /dev/stderr
        exit 1
      else
        set RULESET = `/bin/ls "$PARAM/${RULESET}"`
      endif
    else
      echo "ERROR: ${0:t} : cannot find RULESET $RULESET" >> /dev/stderr
      echo "       maybe the language selected does not have a rule set yet" >> /dev/stderr
      echo "       try using the option MODUS=align to work without rule set" >> /dev/stderr
      exit 1
    endif
  endif    
  if ( "${RULESET:e}" != 'rul' && "${RULESET:e}" != 'nrul' ) then
    echo "ERROR: ${0:t} : extension of RULESET $RULESET is not 'rul' or 'nrul'" >> /dev/stderr
    exit 1
  endif
  # check for dummy rule sets, which should not be used with MODUS=standard
  ls -l "${RULESET}" | grep -q 'dummy.rul'
  if ( $status == 0 ) then 
    echo "WARNING: ${0:t} : dummy rule set found, cannot be used with MODUS=standard" >> /dev/stderr
    echo "         Forcing option MODUS=align" >> /dev/stderr
    echo "         Doing forced alignment to the phonological input" >> /dev/stderr
    echo "         Results are valid, but no pronunciation modelling was applied" >> /dev/stderr
    set MODUS = align
  else
    # no dummy.rul set : check for ASCII content and type matching extension (word_var will crash on mismatches!)
    file -L "$RULESET" | grep 'ASCII text' >& /dev/null
    if ( $status != 0 ) then
      echo "ERROR: ${0:t} : RULESET $RULESET is not a pure ASCII file; make sure that you are using only ASCII characters" >> /dev/stderr
      exit 1
    endif
    if ( "${RULESET:e}" == "rul" ) then
      # extension indicates statistical rule set: each line must end with '000'
      grep -v -q '000$' "${RULESET}"
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : RULESET $RULESET is not a statistical rule set (found a line without trailing number)" >> /dev/stderr 
        exit 1
      endif
    else
      # extension indicates phonological rule set: each line must not end with '000' 
      grep -q '000$' "${RULESET}"
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : RULESET $RULESET is not a phonological rule set (found a line with trailing number)" >> /dev/stderr 
        exit 1
      endif
    endif
  endif  
else if ( $MODUS == "bigram" ) then 
  if ( $DICTBIGRAM == "" ) set DICTBIGRAM = $PARAM/DICT.bigram
  if ( $LATBIGRAM == "" ) set LATBIGRAM = $PARAM/LAT.bigram
  if ( ! -e $DICTBIGRAM ) then 
    echo "ERROR: ${0:t} : cannot find bigram dictionary DICTBIGRAM = $DICTBIGRAM - exiting" >> /dev/stderr
    echo "       most likely you have selected a LANGUAGE for that no bigram modus is (yet) available" >> /dev/stderr
    exit 1
  endif
  if ( ! -e $LATBIGRAM ) then 
    echo "ERROR: ${0:t} : cannot find phone bigram LATBIGRAM = $LATBIGRAM - exiting" >> /dev/stderr
    exit 1
  endif
else
  echo "ERROR: ${0:t} : unknown option MODUS = $MODUS - exiting" >> /dev/stderr
  exit 1
endif  
which sox >& /dev/null
if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : sox is not installed on your system or not in your PATH - exiting" >> /dev/stderr
    exit 1
endif  
if ( $BPF == "" || $OUTFORMAT != "TextGrid" ) then 
  set INSORTTEXTGRID = "FALSE"
  set INSKANTEXTGRID = "FALSE"
endif
if ( $OUTFORMAT == "TextGrid" && ( $STARTWORD != 0 || $ENDWORD != 999999 ) && ( $INSORTTEXTGRID == "TRUE" || $INSKANTEXTGRID == "TRUE" ) ) then 
  set INSORTTEXTGRID = "FALSE"
  set INSKANTEXTGRID = "FALSE"
  echo "WARNING: ${0:t} : options INSORTTEXTGRID and INSKANTEXTGRID disabled" >> /dev/stderr
  echo "         since we are segmenting a subsection of the BPF file (options STARTWORD and ENDWORD)" >> /dev/stderr
endif  
if ( ( $STARTWORD != 0 || $ENDWORD != 999999 ) && ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuDB" ) ) then 
  echo "ERROR: ${0:t} : options STARTWORD/ENDWORD cannot be used with Emu output (emu|EMU|legacyEMU|emuR|emuDB) - exiting" >> /dev/stderr
  exit 1
endif  
if ( $STARTWORD > $ENDWORD ) then 
  echo "ERROR: ${0:t} : option STARTWORD ($STARTWORD) must smaller/equal to ENDWORD ($ENDWORD) - exiting" >> /dev/stderr
  exit 1
endif  
if ( $OUTIPA == "TRUE" ) then
  echo -n "WARNING: ${0:t} : usage of deprecated option OUTIPA" >> /dev/stderr
  if ( $OUTSYMBOL == "sampa" ) then 
    set OUTSYMBOL = "ipa"
    echo " - setting option OUTSYMBOL=ipa" >> /dev/stderr
  else
    echo " - ignored" >> /dev/stderr
  endif
endif
if ( $INSYMBOL != "ipa" && $INSYMBOL != "sampa" ) then 
  echo "ERROR: ${0:t} : unknown value $INSYMBOL for option INSYMBOL, valids are sampa|ipa" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif
if ( $OUTSYMBOL != "sampa" && $OUTSYMBOL != "ipa" && $OUTSYMBOL != "manner" && $OUTSYMBOL != "place" ) then 
  echo "ERROR: ${0:t} : unknown value $OUTSYMBOL for option OUTSYMBOL, valids are sampa|ipa|manner|place" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif
# check for IPA mapping tables
if ( $OUTSYMBOL == "ipa" || $OUTSYMBOL == "manner" || $OUTSYMBOL == "place" || $INSYMBOL == "ipa" ) then
  if ( ! -e $IPATABLE ) then 
    echo "WARNING: ${0:t} : you requested symbol mapping in input or output other than SAMPA but no mapping table ${IPATABLE} can be found - switching to (default) SAMPA symbols" >> /dev/stderr
    set OUTSYMBOL = "sampa"
    set INSYMBOL = "sampa"
  else
    grep -q '^MAUS.*IPA.*MANNER.*PLACE' $IPATABLE
    if ( $status != 0 ) then 
      echo "WARNING: ${0:t} : you requested IPA symbols in input/output (options INSYMBOL/OUTSYMBOL) but the mapping table $IPATABLE is probably corrupt - switching to (default) SAMPA symbol encoding for input/output" >> /dev/stderr
      set OUTSYMBOL = "sampa"
      set INSYMBOL = "sampa"
      set IPATABLE = ""
    endif
  endif  
endif
if ( $v > 0 && $MODUS != "bigram" ) echo "DEBUG: ${0:t} : Expecting phonological input encoded in $INSYMBOL" 
if ( $v > 0 ) echo "DEBUG: ${0:t} : Phonetic output encoded in $OUTSYMBOL" 


# check sampling rate of preprocessing
# first change the HTK front-end config file if TARGETRATE is not the default of 10msec
if ( "$TARGETRATE" != 100000 ) then
  if ( "$TARGETRATE" < 10000 || "$TARGETRATE" > 100000 ) then 
    echo "ERROR: ${0:t} : TARGETRATE = $TARGETRATE is out of range 10000...100000 - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  if ( $v > 0 ) echo "DEBUG: ${0:t} : changing output frame rate to $TARGETRATE units of 100nsec (HTK: TARGETRATE)"
  sed "s/TARGETRATE = 10000/TARGETRATE = ${TARGETRATE}/" $PRECONFIG >! $TEMP/${PID}PRECONFIG
  set PRECONFIG = $TEMP/${PID}PRECONFIG
endif
# check: get the HTK sampling rate of the preprocessing (in Hz)
set MODELRATE = `grep -a 'SOURCERATE' $PRECONFIG | awk '{ print 10000000 / $3 }'`
# check: get the frame rate in Hz 
set FRAMERATE = `grep -a 'TARGETRATE' $PRECONFIG | awk '{ print 10000000 / $3 }'`

# save original input BPF in case we need e.g. the original KAN tier in output 
set BPFORG = "$BPF"

# check temporary file
set MAU = $TEMP/${PID}${SIGNAL:t:r}
if ( -e ${MAU}.mau ) then 
  echo -n "" >! ${MAU}.mau
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write temporary file ${MAU}.mau - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
else 
  touch ${MAU}.mau
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write temporary file ${MAU}.mau - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  chmod 666 ${MAU}.mau
endif

# check signal input, make format conversions (if necessary),
# determine some important signal features for later

if ( ! -e $SIGNAL ) then 
  echo "ERROR: ${0:t} : cannot find SIGNAL input file $SIGNAL - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif  
# check for zero files: they will cause HVite to loop for ever, so we reject them
sox $SIGNAL -n stat >& $TEMP/${PID}_zeroTest.txt
grep -q 'Maximum amplitude:.*0.000000' $TEMP/${PID}_zeroTest.txt
set sox_err1 = $status
grep -q 'Minimum amplitude:.*0.000000' $TEMP/${PID}_zeroTest.txt
set sox_err2 = $status
if ( $sox_err1 == 0 && $sox_err2 == 0 ) then
  echo "ERROR: ${0:t} : cannot process input signal containing zero signal - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
rm -f $TEMP/${PID}dummy.wav

set SIGNALORG = $SIGNAL
# check/convert input, possible trim
set HTK = $TEMP/${PID}${SIGNAL:t:r}.htk
set signaltype = ${SIGNAL:e}
# starting with 5.67 we use audioEnhance to process all input signals (except *.wav) as in maus.pipe
if ( $USEAUDIOENHANCE == TRUE ) then
  if ( $signaltype == "wav" || $signaltype == "WAV" ) then
    cp $SIGNAL $TEMP/${PID}${SIGNAL:t}
    set SIGNAL = $TEMP/${PID}${SIGNAL:t}
    set SIGNALRATE = `soxi -r $SIGNAL`
    chmod 666 ${SIGNAL}
  else
    $AUDIOENHANCE v=$v SIGNAL="$SIGNAL" OUT="$TEMP/${PID}${SIGNAL:t:r}.wav" RESAMPLE=0  
    set exitCode = $status
    if ( $exitCode != 0 ) then
      echo "ERROR: $0:t : audioEnhance returns error $exitCode - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit $exitCode
    endif  
    set SIGNAL = "$TEMP/${PID}${SIGNAL:t:r}.wav"
    chmod 666 "$SIGNAL"
  endif
endif

set SIGNALRATE = `soxi -r "$SIGNAL"`
if ( $status != 0 || "$SIGNALRATE" == "" ) then 
  echo "ERROR: $0:t : cannot determine sample rate from input SIGNAL, something is seriously wrong - exiting" >> /dev/stderr
  echo "       Possibly the format of the SIGNAL input file is not recognized" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif

### switch ( $signaltype )
### case "nis":
### case "NIS":
### case "nist":
### case "NIST":
### case "sph":
### case "SPH":
###   if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting NIST SPHERE to RIFF WAVE, mono, 16 bit ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
###   sox -t sph ${SIGNAL} -t wav -e signed-integer -b 16 -c 1 $TEMP/${PID}${SIGNAL:r:t}.wav
###   if ( $status != 0 ) then 
###     echo "ERROR: ${0:t} : cannot convert NIST SPHERE input signal to RIFF WAVE 16bit PCM using sox - exiting" >> /dev/stderr
###     if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
###     exit 5
###   endif  
###   chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
###   set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
###   set SIGNALRATE = `soxi -r $SIGNAL`
###   breaksw
### case "al":
### case "AL":
### case "dea":
### case "DEA":
###   set SIGNALRATE = 8000
###   if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting ALAW/8kHz raw into RIFF WAVE ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
###   sox -t raw -e a-law -c 1 -r 8000 ${SIGNAL} -t wav -e signed-integer -b 16 -c 1 $TEMP/${PID}${SIGNAL:r:t}.wav
###   if ( $status != 0 ) then 
###     echo "ERROR: ${0:t} : cannot convert ALAW input signal to RIFF WAVE 16kHz 16bit PCM using sox - exiting" >> /dev/stderr
###     if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
###     exit 5
###   endif  
###   chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
###   set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
###   breaksw
### case "wav":
### case "WAV":
###   cp $SIGNAL $TEMP/${PID}${SIGNAL:t}
###   set SIGNAL = $TEMP/${PID}${SIGNAL:t}
###   set SIGNALRATE = `soxi -r $SIGNAL`
###   chmod 666 ${SIGNAL}
###   breaksw
### case "mp4":
### case "mpeg":
### case "mpg":
### case "avi":
### case "flv":
### case "MP4":
### case "MPEG":
### case "MPG":
### case "AVI":
### case "FLV":
###   which ffmpeg >& /dev/null
###   if ( $status != 0 ) then 
###     echo "ERROR: ${0:t} : ffmpeg is not installed; cannot convert signal file with extension $signaltype - exiting" >> /dev/stderr
###     if ( $CLEAN ==1 ) rm -rf $TEMP/${PID}* >& /dev/null
###     exit 5
###   endif
###   if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting into RIFF WAVE ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
###   # try to determine the original sample rate of the audiotrack in the video 
###   # why do we do this, if we later re-sample to 16000Hz anyway? Because then the resultig 
###   # BPF or emuDB files are based on the original sampling rate of the video and not 16000Hz.
###   # Video may contain more than one soundtrack or even none; therefore look how many channels 
###   # are there and the select the default
###   set ffprobeNchannels = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | wc -l`
###   if ( "$ffprobeNchannels" == 0 ) then 
###     echo "ERROR: ${0:t} : ffprobe detects no soundtrack found in $SIGNAL - exiting"
###     if ( $CLEAN ==1 ) rm -rf $TEMP/${PID}* >& /dev/null
###     exit 5
###   else if ( "$ffprobeNchannels" == 1 ) then
###     set VIDEOSAMPLERATE = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | head -n 1 | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
###   else
###     # multiple sound tracks: select default
###     set VIDEOSAMPLERATE = `ffprobe $SIGNAL |& grep 'Stream .* Audio.*(default)' | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
###     # check language code in mp4 stream
###     set ffprobeLng = `ffprobe $SIGNAL |& grep 'Stream .* Audio.*(default)' | sed 's/^.*(\(...\)): Audio.*$/\1/'`
###     if ( "$ffprobeLng" != "" ) then
###       set LANGUAGE_3code = `echo "$LANGUAGE" | sed 's/^\(...\).*$/\1/'`
###       if ( "$LANGUAGE_3code" != "$ffprobeLng" ) echo "WARNING: ${0:t} : LANGUAGE code (${LANGUAGE}) does not match the language code in the MP4 input soundtrack (${ffprobeLng})" >> /dev/stderr
###     endif
###   endif
###   if ( $VIDEOSAMPLERATE == "" ) then 
###     echo "WARNING: ${0:t} : cannot determine audio sample rate from input video; using 16000Hz" >> /dev/stderr
###     echo "                  this means that e.g. BPF results are based on 16000Hz sampling rate" >> /dev/stderr
###     set VIDEOSAMPLERATE = 16000
###   else
###     if ( $v > 0 ) echo "DEBUG: ${0:t} : extracting audiotrack with ${VIDEOSAMPLERATE}Hz sampling rate from video input"
###   endif
###   # we assume that '-ac 1' equals the default soundtrack (until now this always has been true):
###   ffmpeg -loglevel quiet -y -i ${SIGNAL} -acodec pcm_s16le -ac 1 -ar $VIDEOSAMPLERATE $TEMP/${PID}${SIGNAL:r:t}.wav
###   if ( $status != 0 ) then 
###     echo "ERROR: ${0:t} : cannot convert input signal to RIFF WAVE ${VIDEOSAMPLERATE}Hz 16bit PCM - exiting" >> /dev/stderr
###     if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
###     exit 5
###   endif  
###   chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
###   set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
###   set SIGNALRATE = `soxi -r $SIGNAL`
###   breaksw
### default:
###   echo "ERROR: ${0:t} : unknown signal type extension ${signaltype} - exiting" >> /dev/stderr
###   if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
###   exit 5
###   breaksw
### endsw
### 
# check other parameters
set NISTCODING = "`soxi -e $SIGNAL`"
if ( "$NISTCODING" != "Signed Integer PCM" ) then
 if ( "$NISTCODING" == "" ) then 
   echo "WARNING: ${0:t} : cannot determine coding - assuming 'pcm'" >> /dev/stderr
   set NISTCODING = "pcm"
 else
   echo "ERROR: ${0:t} : coding is $NISTCODING - only 'Signed Integer PCM' is allowed - exiting" >> /dev/stderr
   if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
   exit 2
 endif  
endif 
set NISTBITS = `soxi -b $SIGNAL`
if ( $NISTBITS != 16 ) then 
  echo "ERROR: ${0:t} : wrong bit number $NISTBITS per sample; only 16bits allowed - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif  
set NISTCHANNELS = `soxi -c $SIGNAL`
if ( $NISTCHANNELS != 1 ) then 
  echo "ERROR: ${0:t} : number of channels is $NISTCHANNELS - only 1 channel is allowed - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif 
# we need the number of samples in the original signal later for end segment correction!
set SAMPLELENGTH = `soxi -s $SIGNAL`
if ( $SAMPLELENGTH == 0 ) then 
  echo "ERROR: ${0:t} : something is wrong: I detect a zero length signal; maybe a conversion of SIGNAL went wrong - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif  

# fork operation modus: if MODUS=standard/align, check/preprocess the BPF input (KANSTR input)
# [note that depending on BPF content in this block the helper MAUSTRN
# might be called (and then the remainder of the maus processing is skipped
# until the output format conversion]
if ( $MODUS == "standard" || $MODUS == "align" ) then 
  
  # option BPF superceeds option KANSTR
  if ( $BPF != "" ) set KANSTR = ""
  
  if ( "$KANSTR" == "" ) then 
    # checking for file
    if ( ! -e $BPF ) then 
      echo "ERROR: ${0:t} : cannot find BPF input file $BPF - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif  
    # check for standard extensions
    if ( ${BPF:e} != "csv" && ${BPF:e} != "CSV" && ${BPF:e} != "par" && ${BPF:e} != "PAR" ) then 
      echo "ERROR: ${0:t} : BPF input file $BPF has no standard extensions par|PAR|csv|CSV - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    # if the input BPF has extension csv|CSV, we assume that it is a two-column, ';'-separated spreadsheet
    # table with one word per line, orthography in the 1st and canonical pronunciation in the 2nd column.
    # We check the table, transform it into a BPF, in case OUTFORMAT is set to par|maus-append we write
    # the BPF using the basename and location of the CSV file (write permissions must be set!), and then 
    # proceed for the rest of the script with this BPF file as input. 
    if ( ${BPF:e} == "csv" || ${BPF:e} == "CSV" ) then 
      # check for limiter symbol CSVLIMITER and proper table form
      set numcsvlines = `cat $BPF | wc -l`
      set numcsvlimiters = `grep -E "^[^;]+${CSVLIMITER}"'[^;]+$' $BPF | wc -l`
      if ( $numcsvlines != $numcsvlimiters ) then 
        echo "ERROR: ${0:t} : CSV input file $BPF does not contain well-formed table - exiting" >> /dev/stderr
        echo "       One line per word: <orthography>${CSVLIMITER}<pronunciation>" >> /dev/stderr
        echo "       <orthography> = UTF-8 encoded orthography (no ${CSVLIMITER} !)" >> /dev/stderr
        echo "       <pronunciation> = SAMPA/IPA encoded canonical pronunciation" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # create output or temporary BPF (we replace the sample rate later)
      set CSVBPF = $TEMP/${PID}_CSVBPF.par
      if ( $v > 0 ) echo "DEBUG: ${0:t} : Detected CSV input - transforming into temporary BPF $CSVBPF"
      cat << END >! ${CSVBPF}
LHD: Partitur 1.3
SAM: ##SAMPLERATE##
NCH: 1
LBD:
END
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot create temporary output BPF file ${CSVBPF} - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # transform CSV table into ORT and KAN tier 
      awk 'BEGIN{FS=";";idxcnt=0}{printf("ORT:\t%d\t%s\n",idxcnt,$1);idxcnt++}' $BPF | tr -d '\r' >> ${CSVBPF}
      awk 'BEGIN{FS=";";idxcnt=0}{printf("KAN:\t%d\t%s\n",idxcnt,$2);idxcnt++}' $BPF | tr -d '\r' >> ${CSVBPF}
      mv ${CSVBPF} ${BPF:r}.par
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot create output BPF file ${BPF:r}.par - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      set BPF = ${BPF:r}.par
      chmod 666 ${BPF:r}.par
      set BPFORG = ${BPF}
    endif
    # check if output file is writable
    if ( $OUTFORMAT == "par" ) then
      if ( ! -w $BPF ) then 
        echo "ERROR: ${0:t} : option OUTFORMAT=mau-append|par|bpf but cannot write to BPF input file - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif  
    endif
    # checking for KAN tier
    grep -a '^KAN:' $BPF >& /dev/null
    if ( $status != 0 ) then
      echo "ERROR: ${0:t} : input BPF file $BPF contains no KAN tier - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    else
      # checking for singular silence models in KAN tier, which are not allowed because
      # if a word consists only of a silence model we get problem with word indices
      # (from version 3.33 all languages model '<p>' as non-optional silence model; 
      # only '<p:>', '#' and '&' are optional silence models).
      grep -a '^KAN:' $BPF | awk '{print $3}' | grep -q '^#$'
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : partitur file $BPF contains 'optional silence word' '#' in the KAN tier - exiting" >> /dev/stderr
        echo "       Optional silence model is a model that can be skipped; if you need to model a word as silence, use the '<p>' model, e.g. 'KAN: 8 <p>'" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # roughly checking if KAN tier is of the right format
      if ( `grep '^KAN:' $BPF | awk 'BEGIN{err="ok"}{if(NF<3)err="error"}END{print err}'` == "error" ) then 
        echo "ERROR: ${0:t} : partitur file $BPF contains non-valid KAN lines with less than 3 columns - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
    endif  
    # checking for optional TRN tier(s), call maus.trn or determine offset and duration of pre-segmentation
    if ( $USETRN == "TRUE" ) then 
      set trnanz = `grep -a '^TRN:' $BPF | wc -l`
      switch ( $trnanz )
      case 0:
        echo "WARNING: ${0:t} : partitur file $BPF contains no TRN tier although you set option USETRN=true" >> /dev/stderr
        echo "         will ignore option USETRN" >> /dev/stderr
        set USETRN = "FALSE"
        breaksw
      case 1:
        # For backward compatibility (and to be compatible to WAV2TRN output!) we check
        # whether this single TRN entry carries only the time information start and length as in WAV2TRN output, or a 
        # proper TRN tier entry consisting of start, length and list of word links, e.g. 'TRN: 0 1999 0,1,2,3,4 ...'
        # In the first case we do the normal MAUS segmentation but within the given segment,
        # in the latter case we pass the whole thing to maus.trn and hope for the best.
        set lnklist = `grep -a '^TRN:' $BPF | head -n 1 | awk '{if($4 ~ /^[0-9][0-9]*,*[0-9,]*$/){print 1}else{print 0} }'`
        if ( "$lnklist" == 0 ) then 
          # no proper TRN: determine offset and duration of utterance within the recording
          # in original samples (SIGNALRATE) and continue with normal MAUS procedure
          set TRNOFFSET = `grep -a '^TRN:' $BPF | head -n 1 | awk '{print $2}'`
          set TRNDUR = `grep -a '^TRN:' $BPF | head -n 1 | awk '{print $3}'`
          # the helper WAV2TRN is buggy and delivers the duration + 1 sample, and the following
          # 'proper' TRN tier contains duration without first sample and the sox
          # trim operation expects duration *exclusive* the first sample; therefore we substract
          # 2 sample from TRNDUR
          @ TRNDUR -= 2
          breaksw
        endif
        # seems to be a proper single TRN: continue with 'default'
      default:
        # at least one proper TRN entry -> recursive batch processing of chunk segmentation
        # the following WARNING became anoying - and it is not very helful anyway
        #echo "WARNING: ${0:t} : $trnanz TRN entries found in BPF - calling chunk segmentation" >> /dev/stderr
        if( $v > 0 ) echo "DEBUG: ${0:t} : $trnanz TRN entries found in BPF - calling chunk segmentation"
        if ( ! -x $MAUSTRN ) then 
          echo "ERROR: ${0:t} : cannot execute helper $MAUSTRN - exiting" >> /dev/stderr
  	if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  	exit 5
        endif
        if ( $v > 0 ) echo "DEBUG: ${0:t} : $MAUSTRN TARGETRATE=$TARGETRATE BPFTHRESHOLD=$BPFTHRESHOLD $mausoptions OUT=${MAU}.mau SIGNAL=$SIGNAL"
        $MAUSTRN TARGETRATE="$TARGETRATE" BPFTHRESHOLD=$BPFTHRESHOLD $mausoptions OUT="${MAU}.mau" SIGNAL="$SIGNAL"
        set maus_trn_err = $status
        if ( $maus_trn_err != 0 ) then 
          echo "ERROR: ${0:t} : helper $MAUSTRN failed - exiting" >> /dev/stderr
          if ( $maus_trn_err == 5 ) then
            echo "       Chunk segmentation with EMU output requires synchronized TRN and KAN tier." >> /dev/stderr
            echo "       Check if the TRN tier in your input BPF covers the complete KAN tier." >> /dev/stderr
          endif
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 5
        endif	
        # result of MAUSTRN should now be a MAU tier in ${MAU}.mau
        goto out_format_conversion
        breaksw
      endsw
    endif  
    # checking for BPF input for Emu output, KAN might be created later
    if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuDB" ) then
      grep -a '^ORT:' $BPF >& /dev/null
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : input BPF does not contain ORT tier for emu output" >> /dev/stderr
        echo "       Either use another OUTFORMAT than emu|EMU|legacyEMU|emuR|emuDB or correct input BPF" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif  
      grep -a '^KAN:' $BPF >& /dev/null
      if ( $status != 0 ) then 
        if ( $PARAM != $SOURCE/PARAM ) then 
          echo "ERROR: ${0:t} : input BPF does not contain KAN tier for emu output and language is not German" >> /dev/stderr
          echo "       Either use another OUTFORMAT than emu|EMU|legacyEMU|emuR|emuDB or provide a KAN tier or select LANGAUGE=deu" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif	
      endif  
    endif
  endif
  # If the phonological form is given on the commandline in option KANSTR, create
  # a pseudo BPF to process
  if ( "$KANSTR" != "" ) then
    # switch USETRN off since we cannot read from a BPF
    if ( $USETRN == "TRUE" ) set USETRN = "FALSE"
    # if the input canonical string is given from command line, produce a 
    # pseudo BPF file containing only the KAN tier and proceed
    set BPF = $TEMP/${PID}${SIGNALORG:t:r}.par
    touch $BPF
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot create intermediate BPF file $BPF - exiting" >> /dev/stderr
      exit 1
    endif  
    set linkcount = 0
    printf "KAN:\t%d\t" $linkcount >> $BPF
    foreach phon ( $KANSTR ) 
      if ( ${phon} == "#" ) then 
        @ linkcount ++
        printf "\nKAN:\t%d\t" $linkcount >> $BPF
      else  
        printf "%s" $phon >> $BPF
      endif  
    end  
    printf "\n" >> $BPF
  endif  
  # Pre-validation of input
  # check if a KAN tier is present in BPF
  grep -a '^KAN:' $BPF >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : input BPF $BPF does not contain a KAN tier - exiting >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  
  # In the normal case simply count KAN lines in input BPF
  set numKANLines = `grep -a '^KAN:' $BPF | wc -l`
  # If USETRN==TRUE and a single TRN: line in input, count the number of words in the trn chunk
  if ( $USETRN == "TRUE" ) then 
    set numKANLines = `grep '^TRN:' $BPF | head -n 1 | awk '{print $4}' | tr ',' '\n' | wc -l`
  endif
  if ( $numKANLines > $BPFTHRESHOLD ) then 
    echo "ERROR: ${0:t} : number of words $numKANLines in BPF (or TRN chunk) exceeds threshold BPFTHRESHOLD = $BPFTHRESHOLD"  >> /dev/stderr
    echo "       maus cannot process effectively such large files; consider breaking the file up into" >> /dev/stderr
    echo "       smaller parts or use an automatic chunking service (Chunker) before MAUS - exiting" >> /dev/stderr
    echo "       PS: or maybe you just forgot to set the option USETRN=true ?" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 2
  endif
  # If option INSYMBOL was set to IPA, we map the IPA symbols in $BPF to 
  # corresponding MAUS SAMPA (1st column) symbols as defined in the mapping table 
  # $IPATABLE; remember that IPA input requires blank-separated symbols in the KAN tier
  # non-IPA symbols (UTF-8!) in the KAN tier of $BPF will cause an error, except special 
  # characters ˈˌ.#"'+ which will be deleted
  if ( $INSYMBOL == "ipa" ) then
    touch $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    if ( $status != 0 ) then
      echo "ERROR: ${0:t} : cannot create intermediate BPF file $TEMP/${PID}${SIGNALORG:t:r}.ipa.par - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    chmod 666 $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    grep -v '^KAN:' $BPF | tr -d '\r' >! $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    awk -v IPATABLE=$IPATABLE 'BEGIN {err=""; while(getline < IPATABLE > 0){it[$3]=$1}} /^KAN:/ {i=3; printf("KAN:\t%d\t",$2); while($i!="") { if($i=="."||$i=="#"||$i=="'"'"'"||$i=="\""||$i=="+"||$i=="ˌ"||$i=="ˈ"){i++; continue} if(it[$i]==""){err=$i}else{printf("%s ",it[$i])} i++} printf("\n")}END{if(err!=""){printf("ERROR: maus : unknown ipa symbol %s in input KAN tier: %s - exiting\n",err,$0)>>"/dev/stderr";exit(1)}}' $BPF | tr -d '\r' >> $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    if ( $status != 0 ) then 
      #echo "ERROR: ${0:t} : at least one unknown ipa symbol found while mapping input KAN tier encoded in IPA to SAMPA - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    set BPF = $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
  endif
  
  # Finished Processing of phonological Input
  # By now $BPF must contain a valid BPF with a valid KAN tier
  # encoded as SAMPA or blank separated SAMPA 
  
  # Create a MLF file containing the chain of canonical phonemes as listed
  # in the KAN tier of the input partitur file. If unknown phonemes
  # are found, the script terminates with an error message. 
  # Accent, composita and function word markers ("'#+) are deleted; 
  # Word are separated by the word boundary symbol '#'
  # The chain starts with a '<' denoting silence and end with a '>' denoting
  # final silence
  # The phoneme inventory KANINVENTAR defines all possible German SAM-PA
  # phonemes that can be handled by this step including the silence 
  # symbols '<p>' '<p:>' '<', '>', '#' and '&'
  # < : initial silence
  # > : final silence
  # #,<p:> : optional silence, automatically modelled  between words (can be of zero length; then no silence is assumed)
  # & : arbitray word separator (a word separator that is caused by a rule
  #     that 'glues' two words together. For example the words 'ab Frankfurt'
  #     /ap#fra:nkfu:6t/  ->  /a&fra:nkfu:6t/; treated as '#'.
  # <p> : explicitely modelled silence (cannot be of zero length)
  # This step also does any mappings from KANINVENTAR to GRAPHINVENTAR which 
  # defines the symbols used in the rule set, in the graph and on the left 
  # side of the dictionary.
  set MLF = $TEMP/${PID}${SIGNALORG:t:r}.mlf
  # create MLF (linear pronunciation) as input to graph generator
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating MLF file $MLF from BPF file $BPF"
  if ( $NOINITIALFINALSILENCE == "TRUE" ) echo "DEBUG: ${0:t} : suppressing initial/trailing optional silence modelling (NOINITIALFINALSILENCE=true)"
  echo '#\!MLF\!#' >! $MLF
  chmod 666 $MLF
  echo '"*/'${MLF:t:r}'.lab"' >> $MLF
  cat $BPF | tr -d '\r' | awk -v INVENTAR=$KANINVENTAR -v STARTWORD=$STARTWORD -v ENDWORD=$ENDWORD -v NOINITIALFINALSILENCE=$NOINITIALFINALSILENCE -f $SOURCE/kan2mlf.awk >> $MLF
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : something went wrong while reading the BPF input, probably" >> /dev/stderr
    echo "       it contains a symbol that is not defined for this language - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif  
  
endif
# end fork operation modus standard/align


# do signal pre-processing
# (note that at this the maus call is processing a single turn only;
# i.e. we either had just a single turn from the beginning, or we are now
# in one of several maus calls initiated by MAUSTRN in the previous section.
# Thus, TRNOFFSET etc. are either not set, or to a single turn read from the input BPF 
# (USETRN=true), or to one of many turns set by MAUSTRN.)

# handle online pre-segmentation
# The input signal (total) or the cut-out signal of a turn (MAUSTRN) are
# pre-segmented.
if ( $USETRN == "force" ) then
  set PRESEG = "TRUE"
  set USETRN = "FALSE"
  echo "WARNING ${0:t} : option value USETRN=force has been deprecated with version 4.11; use option PRESEG=true instead" >> /dev/stderr
endif
if ( $PRESEG == "TRUE" ) then
  which $WAV2TRN >& /dev/null   
  if ( $status != 0 ) then 
    echo "WARNING: ${0:t} : option PRESEG=true but no helper $WAV2TRN found to perform the pre-segmentation" >> /dev/stderr
    echo "         ignoring option PRESEG=true" >> /dev/stderr
    set PRESEG = "FALSE"
  else
    # There are two cases possible here:
    # 1. This is a maus call with USETRN=false => we perform a WAV2TRN to the entire input signal,
    #    set TRNOFFSET and TRNDUR and USETRN=true, so that the signal is later cut accordingly.
    # 2. Original maus call was with USETRN=true and the input BPF contained one or multiple TRN entries
    #    => TRNOFFSET and TRNDUR are already set here (to a single TRN entry or one of the multiple 
    #    TRN entries (called by MAUSTRN!); in both cases we perform a WAV2TRN on the signal defined
    #    by TRNOFFSET and TRNDUR, and correct TRNOFFSET and TRNDUR accordingly; then we set 
    #    USETRN=true and let the signal processing cut out the signal later.
    #    Note that TRNDUR is the length of the turn (chunk) minus 1!
    if ( "$USETRN" == "FALSE" ) then 
      # case 1
      set TRNOUT = `$WAV2TRN wav=$SIGNAL`
      if ( $status != 0 ) then 
        echo "WARNING: ${0:t} : helper call $WAV2TRN (1) failed - proceed without online pre-segmentation" >> /dev/stderr
      else	
        set TRNOFFSET = `echo "$TRNOUT" | awk '{ print $1 }'`       
        set TRNDUR = `echo "$TRNOUT" | awk '{ print $2 }'` 
        # the helper WAV2TRN delivers the duration + 1 sample, and the following 
        # trim operation expects duration *exclusive* the first sample; therefore we substract 
        # 2 sample from TRNDUR
        @ TRNDUR -= 2
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Pre-segmentation (1: PRESEG=true) using $WAV2TRN : offset = $TRNOFFSET , duration = $TRNDUR"
        set USETRN = "TRUE"
      endif
    else
      # case 2
      # first trim the SIGNAL according to TRNOFFSET and TRNDUR into a temporary file as input for WAV2TRN
      sox $SIGNAL ${SIGNAL:r}_trim_tmp.wav trim ${TRNOFFSET}"s" ${TRNDUR}"s"
      chmod 666 ${SIGNAL:r}_trim_tmp.wav
      # then make the pre-segmentation and adjust offset and duration
      set TRNOUTTMP = `$WAV2TRN wav=${SIGNAL:r}_trim_tmp.wav`
      if ( $status != 0 ) then 
        echo "WARNING: ${0:t} : helper call $WAV2TRN (2) failed - proceed without online pre-segmentation" >> /dev/stderr
      else	
        set TRNOFFSETTMP = `echo "$TRNOUTTMP" | awk '{ print $1 }'`       
        set TRNDURTMP = `echo "$TRNOUTTMP" | awk '{ print $2 }'`
        # the helper WAV2TRN delivers the duration *including' the start sample, but the following 
        # trim operation expects duration *exclusive* the first sample; therefore we substract 
        # one sample from TRNDURTMP
        @ TRNDURTMP --
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Pre-segmentation (2: PRESEG=true) using $WAV2TRN : offset = $TRNOFFSETTMP , duration = $TRNDURTMP"
        # re-calculate TRNOFFSET and TRNDUR with the results of WAV2TRN
        @ TRNOFFSET = $TRNOFFSET + $TRNOFFSETTMP
        set TRNDUR = $TRNDURTMP
        set USETRN = "TRUE"
      endif
    endif
  endif
endif  
# if USETRN is set, cut out the relevant signal 
# (signal is still sampled in SIGNALRATE to be synchrone to BPF!)
# save offset and length of final cut off; we assume here that all our 
# signals have 16 bits per sample (has been tested)
if ( $USETRN == "TRUE" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Offset $TRNOFFSET, length $TRNDUR samples (option USETRN)"
  set totalsam = `/usr/bin/soxi -s $SIGNAL`
  set TRNFINAL = $totalsam
  @ TRNFINAL -= $TRNOFFSET
  @ TRNFINAL -= $TRNDUR
  # since TRNDUR is length in samples minus 1, we have to substract 1
  @ TRNFINAL --
  if ( $v > 0 ) echo "DEBUG: ${0:t} :    Cut off final segment is $TRNFINAL samples long (option USETRN)"
  if ( $TRNFINAL < 0 ) then 
    if ( $v > 0 ) echo "DEBUG: ${0:t} : TRN segment exceeds the signal length; check the last TRN tier entry in BPF input"
    echo "ERROR: ${0:t} : faulty information in TRN tier: segment exceeds signal length by $TRNFINAL samples; check the last TRN tier entry in BPF input  - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) then 
      rm -rf $TEMP/${PID}* >& /dev/null
    endif  
    exit 1
  endif  
  if ( $v > 0 ) echo "DEBUG: ${0:t} :    $SIGNAL -> ${SIGNAL:r}_trim.wav"
  sox $SIGNAL ${SIGNAL:r}_trim.wav trim ${TRNOFFSET}"s" ${TRNDUR}"s"
  chmod 666 ${SIGNAL:r}_trim.wav
  set SIGNAL = ${SIGNAL:r}_trim.wav
endif
# check if re-sampling is necessary
if ( $SIGNALRATE != $MODELRATE ) then 
 if ( $SIGNALRATE < $MODELRATE ) then 
  echo "WARNING: ${0:t} : sample rate of signal $SIGNALRATE is less than $MODELRATE" >> /dev/stderr
  echo "         This will lead to very bad segmentation results! Use at least 16000Hz sampling rate!" >> /dev/stderr
 endif
 if ( $allowresamp == "TRUE" ) then 
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : input sampling rate $SIGNALRATE does not match model rate $MODELRATE" 
    echo "         re-sampling using sox standard resampling method" >> /dev/stderr
    echo "         ${SIGNAL} -> ${SIGNAL:r}_resamp.wav" >> /dev/stderr
  endif  
  # the following sox syntax does a rate conversion without dithering.
  # this works also for sox version 14.3 and higher if SOX_OPTS is set to '-D'
  # (-D is being ignored by lower sox versions, but not on the commandline!)
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE"
    sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE
    set err = $status
  else
    sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE >& /dev/null
    set err = $status
  endif
  if ( $err != 0 ) then 
    echo "ERROR: ${0:t} : resampling using sox failed - exiting" >> /dev/stderr
    echo "       set debug option v > 0 to view error messages of sox" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif
  chmod 666 ${SIGNAL:r}_resamp.wav
  set SIGNAL = ${SIGNAL:r}_resamp.wav
 else
  echo "ERROR: ${0:t} : input sampling rate $SIGNALRATE does not match model rate $MODELRATE" >> /dev/stderr
  echo "       use option 'allowresamp=yes' for automatic re-sampling" >> /dev/stderr
  echo "       exiting"
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
 endif 
else
 if ( $v > 0 ) echo "DEBUG: ${0:t} : Sample rate of input is equal model sample rate - ok"
endif  
# from now on we assume that the signal has MODELRATE
if ( $v > 1 ) echo "DEBUG: ${0:t} : Original sample rate: $SIGNALRATE, processing sampling rate: $MODELRATE"
if ( $v > 1 ) echo "DEBUG: ${0:t} :     coding: $NISTCODING, bits: $NISTBITS"
if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating HTK file $HTK"
HCopy -T $TRACE -C $PRECONFIG $SIGNAL $HTK
if ( $status != 0 ) then 
  echo "ERROR: ${0:t} : HCopy crashed with error code <> 0 - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
chmod 666 $HTK

# fork operation modus
if ( $MODUS == 'standard' || $MODUS == "align" ) then 
  
  # write original sampling rate of input signal into input BPF (only necessary 
  # for CSV input)
  sed "s/##SAMPLERATE##/${SIGNALRATE}/" $BPF | tr -d '\r' >! $TEMP/${PID}_CSVBPFTMP.par
  mv $TEMP/${PID}_CSVBPFTMP.par $BPF
  # Create the graph from the MLF into a SLF 
  # If you use a phonological based rule set (e.g. regeln9.nrul) without
  # statistics, you need the option 'wwt=man' to word_var-2.0
  # The rule set must contain the same symbols as in in GRAPHINVENTAR
  if ( ! -e $MLF ) then 
    echo "ERROR: ${0:t} : something went terrible wrong: the MLF I just created vanished" >> /dev/stderr
    echo "       exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif  
  # If we only align to the input string of phonetic SAM-PA symbols,
  # we don't need a SLF file and skip the following
  if ( $MODUS == "standard" ) then 
    set SLF = ${MLF:r}.slf
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating SLF file $SLF"
    if ( $v > 0 ) echo "DEBUG: ${0:t} :   using rule set $RULESET"
    # remove potential BOM and change line terminators to NL
    touch "$RULESET" >& /dev/null
    if ( $status != 0 ) then 
      # RULESET is not writable; make copy and then dos2unix the copy
      cp "$RULESET" $TEMP/${PID}_RULESET.${RULESET:e}
      set RULESET = $TEMP/${PID}_RULESET.${RULESET:e}
      chmod 666 "$RULESET"
    endif
    dos2unix -q "$RULESET"
    mac2unix -q "$RULESET"
    if ( ${RULESET:e} == "rul" ) then 
      if ( $v > 1 ) then 
        echo "DEBUG: ${0:t} : $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg=$RULESET jwk=-1" 
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 
        set err = $status
      else
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR \
          rg="$RULESET" jwk=-1 >& /dev/null
        set err = $status
      endif
      if ( $err != 0 ) then 
        echo "ERROR: ${0:t} : word_var crashed with error code <> 0 - exiting" >> /dev/stderr
        echo "       try running with option v=2 to view error messages" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif  
    else if ( "${RULESET:e}" == "nrul" ) then    
      # due to the separator symbol '-' between contexts in 'nrul' sets, we have to remove
      # all X-SAMPA symbols from the (internal) symbol inventory that contain '-', e.g. /N-/.
      egrep -v -e '-' $GRAPHINVENTAR >! $TEMP/${PID}_GRAPHINVENTAR
      set GRAPHINVENTAR = $TEMP/${PID}_GRAPHINVENTAR
      # if the input sampa symbol sequence in the MLF contains any symbols ending on '\' (which
      # has been mapped to '-' in the reading script), we issue a warning because these 
      # SAMPA symbols will be ignored by word_var
      egrep -q -e '-$' "$MLF"
      if ( $status == 0 ) then 
        echo -n "WARNING: ${0:t} : using a phonological rule set (*.nrul) will affect SAMPA symbols ending on '\'; the symbols "
        egrep -e '-$' $MLF | sort -u | tr '\n' ' ' >> /dev/stderr
        echo " will be deleted from the input to the MAUS alignment." >> /dev/stderr
      endif 
      if ( $v > 1 ) then 
        echo "DEBUG: ${0:t} : $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg=$RULESET jwk=-1 wwt=man" 
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 wwt=man 
        set err = $status
      else
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 wwt=man >& /dev/null
        set err = $status
      endif
      if ( $err != 0 ) then 
        echo "ERROR: ${0:t} : word_var crashed with error code <> 0 - exiting" >> /dev/stderr
        echo "       try running with option v=2 to view error messages" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif  
    else
      echo "ERROR: ${0:t} : unknown extension in rule set name : ${RULESET:e}" >> /dev/stderr
      exit 1
    endif  
    chmod 666 $SLF
    if ( $v > 2 ) then
      $SOURCE/ShowLattice $SLF &
    endif
    if ( $OUTSLF != "" ) cp -f $SLF $OUTSLF
  
    # Align the signal file HTK to the SLF
    if ( $v > 0 ) then 
      echo "DEBUG: ${0:t} : Aligning SLF file to HTK file"
      HVite -A -C $HVITECONF $HVITEOPTIONS -w -X slf -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
      $DICT $HMMINVENTAR $HTK
    else  
      HVite -C $HVITECONF $HVITEOPTIONS -w -X slf -H $MMF -s $WEIGHT -p $INSPROB  \
      $DICT $HMMINVENTAR $HTK >& /dev/null
    endif
    if ( $status != 0 ) then 
      # theoretically since we do not use any pruning, HVite should always produce a
      # result; however, ifthe signal and network are totally mis-matched HVite 
      # reports 'no tokens survived to final node' and exits with non-zero
      echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
      echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 5
    endif  
  else  
    # If MODUS=align we simply make a forced alignment (no SLF available)
    # Align the signal file HTK to the MLF
    if ( $v > 0 ) then 
      echo "DEBUG: ${0:t} : Aligning MLF file to HTK file"
      HVite -A -C $HVITECONF $HVITEOPTIONS -a -X lab -I $MLF -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
      $DICT $HMMINVENTAR $HTK
    else  
      HVite -C $HVITECONF $HVITEOPTIONS -a -X lab -I $MLF -H $MMF -s $WEIGHT -p $INSPROB  \
      $DICT $HMMINVENTAR $HTK >& /dev/null
    endif
    if ( $status != 0 ) then 
      # theoretically since we do not use any pruning, HVite should always produce a
      # result; however, ifthe signal and network are totally mis-matched HVite 
      # reports 'no tokens survived to final node' and exits with non-zero
      echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
      echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 5
    endif  
  
  endif # operation modus = 'standard/align'

# fork operation modus 'bigram'
else
  
  # run a free HTK recognition cycle constrained by a phone bigram on SIGNAL
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : Run phone recognition on HTK file"
    HVite -A -C $HVITECONF $HVITEOPTIONS -w "$LATBIGRAM" -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
    $DICTBIGRAM $HMMINVENTAR $HTK
  else  
    HVite -C $HVITECONF $HVITEOPTIONS -w "$LATBIGRAM" -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
    $DICTBIGRAM $HMMINVENTAR $HTK
  endif
  if ( $status != 0 ) then 
    # theoretically since we do not use any pruning, HVite should always produce a
    # result; however, if the signal and network are totally mis-matched HVite 
    # reports 'no tokens survived to final node' and exits with non-zero
    echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
    echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  

endif
# end fork operation modus

# Check on results
if ( ! -e ${HTK:r}.rec || -z ${HTK:r}.rec ) then
  echo "ERROR: ${0:t} : HVite returns ok but no ${HTK:r}.rec found or is zero length" >> /dev/stderr
  echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
chmod 666 ${HTK:r}.rec

# Producing output

if ( $v > 0 ) echo "DEBUG: ${0:t} : Producing temporary mau output based on sample rate $SIGNALRATE in ${MAU}.mau"
if ( $v > 0 && $MAUSSHIFT != 0 ) echo "DEBUG: ${0:t} : Boundaries are increased by $MAUSSHIFT msec (option MAUSSHIFT)"
if ( $MINPAUSLEN < 1 ) set MINPAUSLEN = 1
if ( $v > 0 && $MINPAUSLEN > 1 ) then 
  echo "DEBUG: ${0:t} : Inter-word pauses smaller than $MINPAUSLEN frames are spread to"
  echo "DEBUG: ${0:t} :   adjacent segments; except if the preceeding or following segment is a plosive"
  echo "DEBUG: ${0:t} :   in which case the pause is completely added to that plosive (option MINPAUSLEN)"
endif  
# Produce a MAU tier in the TEMP area based on SIGNALRATE
# Extract the MAU tier from the HTK *.rec file
# The HMM '#' is designed in a way that it
# has a minimum duration of zero frames. Since the graph contains a '#'
# between each word, we can detect '#' segments that are less than 
# MINPAUSLEN 
# frames long and delete them, because they are not a 'real' pause between 
# words. The silence is equally spread to the adjacent phonemes.
# However, if the initial phoneme of the following words happens to be 
# a plosive, we distribute the deleted silence interval totally to the plosiv.
if ( ! -f $PARAM/PLOSIVES ) then 
  echo "ERROR: ${0:t} : cannot find list of plosives $PARAM/PLOSIVES - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif  
cat ${HTK:r}.rec | awk -v MINPAUSLEN=$MINPAUSLEN \
	-v MODELRATE=$MODELRATE -v FRAMERATE=$FRAMERATE \
	-v SIGNALRATE=$SIGNALRATE -v PLOSIVES=$PARAM/PLOSIVES \
  	-v STARTWORD=$STARTWORD -v MAUSSHIFT=$MAUSSHIFT \
	-v ADDSEGPROB=$ADDSEGPROB \
	-f $SOURCE/rec2mau.awk >! ${MAU}.mau
chmod 666 ${MAU}.mau	
 

# if we have used USETRN and have cut out the signal (chunk), the mau file contains 
# now segments that are shifted by TRNOFFSET and the total length of the 
# recording is by TRNFINAL too small. Here we correct this, but first we have 
# to make sure that the HTK result covers the total chunk (see below); 
# if not, we augment the last segment.
if ( $USETRN == "TRUE" ) then 
    if ( $v > 0 ) echo "DEBUG: ${0:t} : TRNDUR = $TRNDUR, TRNFINAL = $TRNFINAL (USETRN=true)"
  set lastmaubegin = `tail -n 1 ${MAU}.mau | awk '{print $2}'` 
  set lastmaudur = `tail -n 1 ${MAU}.mau | awk '{print $3}'` 
  @ lastmausample = $lastmaubegin + $lastmaudur
  @ lastmausample ++
  # chunk length is TRNDUR + 1 (TRNDUR was decreased by 1 because of sox trim operation!)
  set chunklength = $TRNDUR
  @ chunklength ++
  if ($lastmausample != $chunklength ) then
    if ( $v > 0 ) echo "DEBUG: ${0:t} : last segment (end = ${lastmausample}) does not match chunk length (${chunklength}) (USETRN=true) - correcting"
    @ corlastmaudur = $chunklength - $lastmaubegin
    @ corlastmaudur --
    if ( $corlastmaudur < 1 ) then 
      echo "WARNING: ${0:t} : correction of last segment is larger than last segment (USETRN=true) - doing nothing" >> /dev/stderr
    else
      head --lines=-1 ${MAU}.mau >! ${MAU}.mau.tmp
      # the following weird way to correct the last MAU line is just because in 
      # cases, where the label starts with a '{' we get an error when assigning
      # it to the variable $lastmaulabel; therefore we leave the line as it is 
      # and just replace the duration value
      #printf "MAU:\t%d\t%d\t%d\t%s\n" "$lastmaubegin" "$corlastmaudur" "$lastmaulnk" "$lastmaulabel" >> ${MAU}.mau.tmp
      tail -n 1 ${MAU}.mau | sed "s%	${lastmaudur}	%	${corlastmaudur}	%" >> ${MAU}.mau.tmp
      mv -f ${MAU}.mau.tmp ${MAU}.mau
      chmod 666 ${MAU}.mau
    endif 
  endif
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Shifting and expanding mau results option (USETRN)"
  cat ${MAU}.mau | awk -v TRNOFFSET=$TRNOFFSET -f $SOURCE/correctusetrn.awk | awk -v TRNFINAL=$TRNFINAL -f $SOURCE/finalusetrn.awk >! ${MAU}.mau.tmp
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif
 
# The HTK recognizer does not deliver a last segment matching the exact length of the 
# input signal (i.e. the last segment ends before the end of the signal). Since some 
# tools (e.g. praat scripts) are sensitive against this mismatch, we get the exact 
# sample size of the input signal file and match the last segment in the MAU tier 
# against this number.
# Get the exact length (in samples) of the signal file, and take care that
# the last segment in the MAUS tier matches this length.

# correct the last line in temporary MAU so that the segment covers until the last sample
set lastmaubegin = `tail -n 1 ${MAU}.mau | awk '{print $2}'` 
set lastmaudur = `tail -n 1 ${MAU}.mau | awk '{print $3}'` 
#set lastmaulnk = `tail -n 1 ${MAU}.mau | awk '{print $4}'` 
#set lastmaulabel = `tail -n 1 ${MAU}.mau | awk '{print $5}'` 
@ lastmausample = $lastmaubegin + $lastmaudur
@ lastmausample ++
if ($lastmausample != $SAMPLELENGTH ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : last segment (end = ${lastmausample}) does not match input signal length (${SAMPLELENGTH}) - correcting"
  @ corlastmaudur = $SAMPLELENGTH - $lastmaubegin
  @ corlastmaudur --
  if ( $corlastmaudur < 1 ) then 
    echo "WARNING: ${0:t} : correction of last segment is larger than last segment - doing nothing" >> /dev/stderr
  else
    head --lines=-1 ${MAU}.mau >! ${MAU}.mau.tmp
    # the following weird way to correct the last MAU line is just because in 
    # cases, where the label starts with a '{' we get an error when assigning
    # it to the variable $lastmaulabel; therefore we leave the line as it is 
    # and just replace the duration value
    #printf "MAU:\t%d\t%d\t%d\t%s\n" "$lastmaubegin" "$corlastmaudur" "$lastmaulnk" "$lastmaulabel" >> ${MAU}.mau.tmp
    tail -n 1 ${MAU}.mau | sed "s%	${lastmaudur}	%	${corlastmaudur}	%" >> ${MAU}.mau.tmp
    mv -f ${MAU}.mau.tmp ${MAU}.mau
    chmod 666 ${MAU}.mau
  endif 
endif
# if we requested symbols other than (default) SAMPA symbols we replace them here
# LANGUAGEs with tone markers "..._1" etc. map the sampa symbol with tone marker to IPA
# with tone marker without being listed in IPATABLE!
# if option ADDSEGPROB is set, the probability is printed after the replaced symbol ($6!)
if ( $OUTSYMBOL != "sampa" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Coding phonetic symbols in output into $OUTSYMBOL"
  switch ( "$OUTSYMBOL" )
    case "ipa":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){if(substr($1,1,3)=="---")break; it[$1]=$3}} /^MAU:/ {s=$5;if(it[s]==""){gsub(/_[1-9]$/,"",s);if(it[s]==""){printf("WARNING: maus : no mapping to OUTSYMBOL=$OUTSYMBOL found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}else{t=substr($5,length($5));printf("MAU:\t%d\t%d\t%d\t%s_%s",$2,$3,$4,it[s],t)}}else{printf("MAU:\t%d\t%d\t%d\t%s",$2,$3,$4,it[$5])}if($6==""){printf("\n")}else{printf("\t%s\n",$6)}}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
    case "manner":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){if(substr($1,1,3)=="---")break; it[$1]=$7}} /^MAU:/ {s=$5;if(it[s]==""){gsub(/_[1-9]$/,"",s);if(it[s]==""){printf("WARNING: maus : no mapping to OUTSYMBOL=$OUTSYMBOL found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}else{printf("MAU:\t%d\t%d\t%d\t%s",$2,$3,$4,it[s])}}else{printf("MAU:\t%d\t%d\t%d\t%s",$2,$3,$4,it[$5])}if($6==""){printf("\n")}else{printf("\t%s\n",$6)}}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
    case "place":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){if(substr($1,1,3)=="---")break; it[$1]=$8}} /^MAU:/ {s=$5;if(it[s]==""){gsub(/_[1-9]$/,"",s);if(it[s]==""){printf("WARNING: maus : no mapping to OUTSYMBOL=$OUTSYMBOL found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}else{printf("MAU:\t%d\t%d\t%d\t%s",$2,$3,$4,it[s])}}else{printf("MAU:\t%d\t%d\t%d\t%s",$2,$3,$4,it[$5])}if($6==""){printf("\n")}else{printf("\t%s\n",$6)}}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
  endsw
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif

if ( $MODUS == "bigram" ) then 
  # replace the '0' word link by '-1' so that following file format converters work 
  # for the special case that the MAU tier has no links to words"
  awk '/^MAU:/{if($4 != "-1") printf("MAU:\t%d\t%d\t-1\t%s\n",$2,$3,$5); else print $0}' ${MAU}.mau >! ${MAU}.mau.tmp
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif

# Then decide what to do with the output:
out_format_conversion:

set MAO = `echo "VERSION=$VERSION $mausoptions" | tr -d '"' | sed 's%[:,]%_%g' | sed 's% %,%g'`
if ( $v > 0 ) echo "DEBUG: ${0:t} : Options string (MAO): $MAO"

# First we have to handle legacy EMU and mau output, because ANNOTCONV cannot do this
if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then   

  if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into legacy Emu files *.hlb and *.phonetic"
  # select location of output dir
  if ( $OUT != "" ) then 
    # take from OUT file
    echo "WARNING: ${0:t} : option OUT=$OUT provided together with Emu output (OUTFORMAT=emu|EMU|legacyEMU)" >> /dev/stderr
    echo "         Emu result files are named as the signal file and written into the location ${OUT:h}" >> /dev/stderr
    if ( $OUT:h != $OUT:t ) then 
      set OUTEMU = $OUT:h
    else
      set OUTEMU = "."
    endif
  else
    # take from signal file
    if ( ${SIGNALORG:h} == ${SIGNALORG:t} ) then
      # SIGNALORG has no dir
      set OUTEMU = "."
    else
      set OUTEMU = ${SIGNALORG:h}
    endif 
  endif  
  # Since the tool PAR2EMU needs a fully equipped BPF as input and the 
  # Emu phonetic file needs the full path of the corresponding signal file,
  # we first create a temporary BPF file in the location of the signal file
  # (write permission needed!) and then call PAR2EMU
  set BPFTMP = ${SIGNALORG:r}.par
  if ( $BPFTMP == $BPF ) then
    # input BPF is already there: just add MAU tier
    set bpftmp_e = 1
    touch $BPFTMP
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot write to BPF input file for emu export - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    chmod 666 $BPFTMP
    # delete a possible MAU tier in input BPF
    grep -v '^MAU:' $BPFTMP >! /tmp/$$BPF
    mv /tmp/$$BPF $BPFTMP
  else
    # input BPF is elsewhere: create a temporary one in the location 
    # of the signal file; if this file already exists, try to overwrite
    set bpftmp_e = 0
    if ( -e $BPFTMP ) then 
      echo "WARNING: ${0:t} : Emu output requires temporary BPF file in signal file location, which already exists - trying to overwrite" >> /dev/stderr
      echo -n "" >! $BPFTMP # empty temp BPF file; do not attempt to remove it because this might fail even if you can write to the file
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot write to temporary BPF in $BPFTMP - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
    endif
    grep -v '^MAU:' $BPFORG | tr -d '\r' >! $BPFTMP
  endif  
  # add MAU tier
  cat ${MAU}.mau >> $BPFTMP
  # if not already there, add SAM entry in BPF header (needed by par2emu)
  grep '^SAM:' $BPFTMP >& /dev/null
  if ( $status != 0 ) then
    head -n 1 $BPFTMP | tr -d '\r' >! /tmp/$$BPF
    echo "SAM:	$SIGNALRATE" >> /tmp/$$BPF
    tail -n +2 $BPFTMP | tr -d '\r' >> /tmp/$$BPF
    mv /tmp/$$BPF $BPFTMP
  endif  
  # create Emu files
  set PAR2EMUMAO = `echo "$MAO" | tr '=' ':'`
  if ( $v > 0 ) echo "DEBUG: ${0:t} : $PAR2EMU SOURCE=$SOURCE outdir=$OUTEMU force=yes hea=source:maus,${PAR2EMUMAO} $BPFTMP"
  $PAR2EMU SOURCE=$SOURCE outdir=$OUTEMU force=yes "hea=source:maus,${PAR2EMUMAO}" $BPFTMP
  if ( $status != 0 ) echo "ERROR occured in $PAR2EMU - probably no Emu files created" >> /dev/stderr
  if ( $bpftmp_e == 0 ) rm -f $BPFTMP
  # check for non-Emu conform symbols and issue a warning
  egrep -q '{' ${MAU}.mau
  if ( $status == 0 ) echo "WARNING: ${0:t} : Emu does not tolerate some SAM-PA labels such as curly brackets ({})." >> /dev/stderr
  if ( $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then 
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Packing Emu files *.hlb and *.phonetic into *.EMU"
    mv -f $OUTEMU/${BPFTMP:r:t}.hlb $OUTEMU/${BPFTMP:r:t}.EMU
    echo '--- cut here ---' >> $OUTEMU/${BPFTMP:r:t}.EMU
    cat $OUTEMU/${BPFTMP:r:t}.phonetic >> $OUTEMU/${BPFTMP:r:t}.EMU
    rm -f $OUTEMU/${BPFTMP:r:t}.phonetic
  endif

else if ( "$OUTFORMAT" == "mau" ) then 

  if ( $OUT == "" ) set OUT = ${SIGNALORG:r}.par
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into BPF $OUT with single MAU tier"
  touch $OUT
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  cp -f ${MAU}.mau $OUT

else

  # now that we have legacy Emu/mau out of the way we can let ANNOTCONV do the modern formats;
  # if - however- ANNOTCONV is not available, we fall back to older scripts
  if ( -x $ANNOTCONV ) then
    # first make a BPF output (as with OUTFORMAT=par|bpf) and let ANNOTCONV convert it to the
    # OUTFORMAT (we assume that we earlier check that only OUTFORMAT are accepted by the 
    # script that can be handled by ANNOTCONV!)
    # we name this temporary BPF as the original input signal because the mausbpf2emuR 
    # converter will make an entry 'annotates : ...' and need the input bpf to be named
    # as the signal
    mkdir $TEMP/${PID}_BPFDIR >& /dev/null
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot create temporary $TEMP/${PID}_BPFDIR - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    # Delete '$$_TIMESEC_<basename>_AUDIOENHANCE' from the basename of the temporary BPF (SIGNALORG) if there,
    # so that the bundlename of the created *_annot.json matches the SIGNAL basename of a pipeline input.
    set BPFTMP = `echo "${SIGNALORG:t:r}.par" | sed 's/^[0-9][0-9]*_[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_\(.*\)_AUDIOENHANCE/\1/'`
    set BPFTMP = "$TEMP/${PID}_BPFDIR/$BPFTMP"
    touch $BPFTMP
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot write to temporary file $BPFTMP - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    chmod 666 $BPFTMP
    echo "LHD: Partitur 1.3" >! $BPFTMP
    echo "MAO: $MAO" >> $BPFTMP
    if ( $BPFORG != "" ) then
      # check for valid BPF header in input BPF: if there, use it and just insert MAO entry;
      # if not there (e.g. G2P output), create a minimal header; then filter the rest of the BPF
      # for MAU,MAO,LHD and append it
      grep -q '^LHD:' $BPFORG
      if ( $status != 0 ) then 
        echo "SAM: $SIGNALRATE" >> $BPFTMP
        echo "LBD:" >> $BPFTMP
        # the following hack should enable the deprecated options INSORTTEXTGRID and INSKANTEXTGRID
        # since some users still use them
        if ( "$OUTFORMAT" == "TextGrid" ) then
         if ( $INSORTTEXTGRID == "FALSE" ) then 
          if ( $INSKANTEXTGRID == "FALSE" ) then
            grep -v '^MAU:' $BPFORG | grep -v '^ORT:' | grep -v '^KAN:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          else
            grep -v '^MAU:' $BPFORG | grep -v '^ORT:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          endif
         else
          if ( $INSKANTEXTGRID == "FALSE" ) then
            grep -v '^MAU:' $BPFORG | grep -v '^KAN:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          else
            grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          endif
         endif
        else
         grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
        endif
      else
        # the following hack should enable the deprecated options INSORTTEXTGRID and INSKANTEXTGRID
        # since some users still use them
        if ( "$OUTFORMAT" == "TextGrid" ) then
         if ( $INSORTTEXTGRID == "FALSE" ) then 
          if ( $INSKANTEXTGRID == "FALSE" ) then
            grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^ORT:' | grep -v '^KAN:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          else
            grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^ORT:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          endif
         else
          if ( $INSKANTEXTGRID == "FALSE" ) then
            grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^KAN:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          else
            grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
          endif
         endif
        else
         grep -v '^MAU:' $BPFORG | grep -v 'LHD:' | grep -v '^MAO:' | tr -d '\r' >> $BPFTMP
        endif
      endif
      cat ${MAU}.mau >> $BPFTMP
    else
      # there never was an input BPF (this can happen in MODUS=bigram or with option KANSTR): make a minimal BPF header,
      # and if OUT is not given, use the SIGNAL file name as output file name
      echo "SAM: $SIGNALRATE" >> $BPFTMP
      echo "LBD:" >> $BPFTMP
      cat ${MAU}.mau >> $BPFTMP
      if ( $OUT == "" ) then 
        set OUT = ${SIGNALORG:r}.par
        touch $OUT >& /dev/null
        if ( $status != 0 ) then
          echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
      endif
    endif
    # OUTFORMAT is BPF (par|bpf|mau-append) -> just output|append the result
    if ( "$OUTFORMAT" == "par") then
      if ( $OUT == "" ) then 
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Appending results as MAU tier to input $BPF"
        cat $BPFTMP >! $BPF
      else
        if ( $v > 0 ) echo "Extracting into BAS Partitur Format (BPF) $OUT"
        cat $BPFTMP >! $OUT
      endif
    else
      # convert temporary BPF by ANNOTCONV
      if ( $OUT == "" ) then
        if ( "$OUTFORMAT" == "emuDB" ) then
          set OUT = ${SIGNALORG:r}_annot.json
        else
          set OUT = ${SIGNALORG:r}."$OUTFORMAT"
        endif
        touch $OUT >& /dev/null
        if ( $status != 0 ) then
          echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
      endif
      if ( $v > 0 ) echo "DEBUG: ${0:t} : calling $ANNOTCONV INP=$BPFTMP OUT=$OUT outFormat="$OUTFORMAT" v=$v"
      $ANNOTCONV INP=$BPFTMP OUT=$OUT outFormat="$OUTFORMAT" v=$v
      set err_code = $status
      if ( $err_code != 0 ) then 
        echo "ERROR: ${0:t} : helper $ANNOTCONV returns exit code $err_code - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif
    endif
 
  else

    # ANNOTCONV not available -> fall back to older routines

    # TextGrid
    if ( $OUTFORMAT == "TextGrid" ) then 
      # If INSORTTEXTGRID is set and input is read from a BPF, we try to create
      # an additional word tier in the output TextGrid that is synchronized 
      # to the maus output. If input is read from command line the option is ignored
      # Extract a praat compatible TextGrid file from the MAU tier (and input BPF)
      # Note that there is no way to store the MAUS option string (MAO) in TextGrid.
      if ( $OUT == "" ) set OUT = ${SIGNALORG:r}.TextGrid
      touch $OUT >& /dev/null
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
      # the helper PAR2TEXTGRID expects a full BPF as input
      set TEMPORARYPAR = $TEMP/${PID}_${SIGNAL:t:r}.par
      touch $TEMPORARYPAR >& /dev/null 
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot create temporary file $TEMPORARYPAR - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      chmod 666 $TEMPORARYPAR
      if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating temporary BPF for TextGrid conversion: $TEMPORARYPAR"
      echo "SAM: $SIGNALRATE" >! $TEMPORARYPAR
      if ( $MODUS != 'bigram' ) then 
        #grep '^ORT:' $BPF | tr -d '\r' >> $TEMPORARYPAR
        #grep '^KAN:' $BPF | tr -d '\r' >> $TEMPORARYPAR
        grep -v '^MAU:' $BPF | grep -v '^MAO:' | grep -v '^SAM:' | tr -d '\r' >> $TEMPORARYPAR
      endif
      cat ${MAU}.mau >> $TEMPORARYPAR
      # call helper
      $PAR2TEXTGRID v=$v BPF=$TEMPORARYPAR OUT=$OUT INSORTTEXTGRID=$INSORTTEXTGRID INSKANTEXTGRID=$INSKANTEXTGRID
      if ( $status != 0 ) then
        echo "ERROR: ${0:t} : helper PAR2TEXTGRID reports error - exiting"
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif
    
    
    # EMU output
    else if ( $OUTFORMAT == "emuDB" ) then   
     
      if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into Emu file (*_annot.json)"
      # use of helper requires a fully equipped BPF file with ORT,KAN and MAU
      # with the same base name as SIGNAL
      mkdir $TEMP/${PID}_BPFDIR >& /dev/null
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot create temporary $TEMP/${PID}_BPFDIR - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # Delete '$$_TIMESEC_<basename>_AUDIOENHANCE' from the basename of the temporary BPF (SIGNALORG) if there,
      # so that the bundlename of the created *_annot.json matches the SIGNAL basename of a pipeline input.
      set BPFTMP = `echo "${SIGNALORG:t:r}.par" | sed 's/^[0-9][0-9]*_[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_\(.*\)_AUDIOENHANCE/\1/'`
      set BPFTMP = "$TEMP/${PID}_BPFDIR/$BPFTMP"
      touch $BPFTMP
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot write to temporary file $BPFTMP - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      chmod 666 $BPFTMP
      echo -n "" >! $BPFTMP # empty temp BPF file; do not attempt to remove it because this might fail even if you can write to the file
      if ( $MODUS != "bigram" ) then 
        grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | tr -d '\r' >! $BPFTMP
      else
        printf "LHD: Partitur 1.3\nLBD:\n" >! $BPFTMP
      endif
      # add MAU tier
      cat ${MAU}.mau >> $BPFTMP
      # if not already there, add SAM entry in BPF header (needed by helper)
      grep -q '^SAM:' $BPFTMP 
      if ( $status != 0 ) then
        head -n 1 $BPFTMP | tr -d '\r' >! $TEMP/${PID}BPF
        echo "SAM:	$SIGNALRATE" | tr -d '\r' >> $TEMP/${PID}BPF
        tail -n +2 $BPFTMP | tr -d '\r' >> $TEMP/${PID}BPF
        mv $TEMP/${PID}BPF $BPFTMP
      endif  
      # add MAO header entry (is later translated into an emuDB attribute to level 'bundle')
      head -n 1 $BPFTMP | tr -d '\r' >! /tmp/$$BPF 
      echo "MAO:  $MAO" >> /tmp/$$BPF
      tail -n +2 $BPFTMP | tr -d '\r' >> /tmp/$$BPF
      mv /tmp/$$BPF $BPFTMP
      # create Emu file
      touch ${BPFTMP:r}_annot.json 
      if ( $status != 0 ) then 
          echo "ERROR: ${0:t} : cannot create (temporary) ${BPFTMP:r}_annot.json - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
      endif
      if ( $v > 0 ) echo "DEBUG: ${0:t} : $PAR2EMUR v=$v force=TRUE validate=true $BPFTMP"
      $PAR2EMUR v=$v force=TRUE validate=true $BPFTMP
      if ( $status != 0 ) then 
        # TODO: insert proper ERROR and ext 5 here
        echo "ERROR occured in $PAR2EMUR - probably no Emu file created" >> /dev/stderr
      endif  
      if ( $OUT != "" ) then 
        if ( $v > 0 ) echo "Extracting into $OUT"
        cp ${BPFTMP:r}_annot.json $OUT >& /dev/null
      else 
        if ( $v > 0 ) echo "Extracting into ${SIGNALORG:r}_annot.json"
        cp ${BPFTMP:r}_annot.json ${SIGNALORG:r}_annot.json
      endif
    
    # mau|par|bpf|mau-append|csv or UNKNOWN (bpf,BPF,PAR and mau-append have been mapped to par and CSV to csv earlier!)
    else
      if ( $OUTFORMAT != "mau" && $OUTFORMAT != "MAU" && $OUTFORMAT != "csv" && $OUTFORMAT != "par" ) then
        echo "ERROR: ${0:t} : Unknown output format $OUTFORMAT - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif  
      # OUTFORMAT = par|bpf|mau-append
      if ( $OUTFORMAT == "par" ) then
        echo "LHD: Partitur 1.3" >! $TEMP/${PID}_BPF
        echo "MAO: $MAO" >> $TEMP/${PID}_BPF
        if ( $BPFORG != "" ) then
          # check for valid BPF header in input BPF: if there, use it and just insert MAO entry;
          # if not there (e.g. G2P output), create a minimal header; then filter the rest of the BPF
          # for MAU,MAO,LHD and append it
          grep -q '^LHD:' $BPFORG
          if ( $status != 0 ) then 
            echo "SAM: $SIGNALRATE" >> $TEMP/${PID}_BPF
            echo "LBD:" >> $TEMP/${PID}_BPF
            grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | tr -d '\r' >> $TEMP/${PID}_BPF
          else
            grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | grep -v 'LHD:' | tr -d '\r' >> $TEMP/${PID}_BPF
          endif
        else
          # there never was an input BPF (this can happen in MODUS=bigram only!): make a minimal BPF header,
          # and if OUT is not given, use the SIGNAL file name as output file name
          echo "SAM: $SIGNALRATE" >> $TEMP/${PID}_BPF
          echo "LBD:" >> $TEMP/${PID}_BPF
          if ( $OUT == "" ) then 
            set OUT = ${SIGNALORG:r}.par
            touch $OUT >& /dev/null
            if ( $status != 0 ) then
              echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
              if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
              exit 1
            endif
            echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
          endif
        endif
        if ( $OUT == "" ) then 
          if ( $v > 0 ) echo "DEBUG: ${0:t} : Appending results as MAU tier to input $BPF"
          cat $TEMP/${PID}_BPF ${MAU}.mau | tr -d '\r' >! $BPF
        else
          if ( $v > 0 ) echo "Extracting into BAS Partitur Format (BPF) $OUT"
          cat $TEMP/${PID}_BPF ${MAU}.mau | tr -d '\r' >! $OUT
        endif
      else
      # OUTFORMAT = csv|mau
        if ( $OUT == "" ) then 
          if ( $OUTFORMAT == "csv" ) then 
            set OUT = ${SIGNALORG:r}.csv
            if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into CSV style table $OUT"
          else
            set OUT = ${SIGNALORG:r}.par
            if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into BPF $OUT with single MAU tier"
          endif
        else
          if ( $OUTFORMAT == "csv" ) then 
            if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into CSV style table $OUT"
          else
            if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into BPF $OUT with single MAU tier"
          endif
        endif
        touch $OUT
        if ( $status != 0 ) then 
          echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        if ( $OUTFORMAT == "mau" ) then 
          cp -f ${MAU}.mau $OUT
        else 
          # CSV output : helper MAUSBPF2CSV transforms a BPF
          if ( "$BPF" != "" ) then 
            # if we have a BPF input, we just add the MAU tier and call the helper
            set TEMPORARYPAR = $TEMP/${PID}_${SIGNAL:t:r}.par
            touch $TEMPORARYPAR >& /dev/null 
            if ( $status != 0 ) then 
              echo "ERROR: ${0:t} : cannot create temporary file $TEMPORARYPAR - exiting" >> /dev/stderr
              if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
              exit 1
            endif
            chmod 666 $TEMPORARYPAR
            if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating temporary BPF for CSV conversion: $TEMPORARYPAR"
            grep -v '^MAU:' "$BPFORG" >> $TEMPORARYPAR
            cat ${MAU}.mau >> $TEMPORARYPAR
            # call the helper
            $MAUSBPF2CSV OUT=$OUT $TEMPORARYPAR
            if ( $status != 0 ) then
              echo "ERROR: ${0:t} : helper MAUSBPF2CSV reports error - exiting"
              if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
              exit 5
            endif
          else
            # if we have no input BPF (KANSTR input), we just transform the MAU tier
            # call the helper
            $MAUSBPF2CSV OUT=$OUT ${MAU}.mau
            if ( $status != 0 ) then
              echo "ERROR: ${0:t} : helper MAUSBPF2CSV reports error - exiting"
              if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
              exit 5
            endif
          endif  
        endif           
      endif
    endif	

  endif # fallback routines

endif # legacy Emu output

# clean up
if ( $CLEAN == "TRUE" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Cleaning up temp area"
  rm -rf $TEMP/${PID}* >& /dev/null
endif  

if ( $v > 0 ) echo "DEBUG: ${0:t} : Finishing maus on `date`"

exit 0

