#!/bin/tcsh 


# MAUS segmentation for one file only

# Author F. Schiel (schiel@bas.uni-muenchen.de)

# Version 
set VERSION = 5.14

# To get a help message, simply type in 'maus'
# To get the version number type in 'maus --version'

##########################################################################
# PARAMETERS THAT NEED TO BE ADAPTED #####################################
##########################################################################
# Set the path SOURCE to the dir where you unpacked the maus package.
# Set TEMP to a temporary dir where maus can store intermediate files.
# If you are using a locale that causes script to format floating point
# number with a comma instead of a dot, you need to set the locale here.
##########################################################################
#set SOURCE = /homes/schiel/MAUS/TOOL
set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored 
                           # (even if we start via a symbolic link)
set TEMP = /tmp
setenv LANG en_US.UTF-8  # defines the behavior of text processing, sorting etc.
setenv SOX_OPTS "-D" # this prevents sox version 14.3 and higher to use
                     # automatic dithering in rate conversions which causes
		     # MAUS results to fluctuate randomly
##########################################################################

# In the following <LANGUAGE> denotes a RFC5646 language code e.g. 'gsw-CH'

# Parameter file names that must reside in PARAM dir
set MMF = ""
set KANINVENTAR = KANINVENTAR
set GRAPHINVENTAR = GRAPHINVENTAR
set HMMINVENTAR = HMMINVENTAR
set RULESET = "default"   # default: statistical rule set rml-0.95.rul or
                          # value set in DEFRULESET in file PARAM.<LANGUAGE>/DEFAULTS
                          # use RULESET=*.nrul for phonological rules (no statistics)
set HVITECONF = HVITECONF
set DICT = DICT
set PRECONFIG = $SOURCE/PRECONFIGWAV

#
# Commandline Options (style 'OPTION=<value>')
#
# Note: the default values given here are also coded into the webservices CMDI; if you change any default,
# also change the default there!

set MINPAUSLEN = 5    # setting to 1 causes no inter-word pauses to be deleted,
                      # 2 : pauses of 1 frame are deleted, ...
		      # 40msec seems to be the value that will not yet
		      # be perceived as a pause...
		      # note that only optional pauses '#', '&' and '<p:>'
		      # will be deleted here; '<' and '>' will always stay.
set MODELRATE = 0     # sampling rate of HMM; will be read from PRECONFIG
                      # this can be overwritten by option from commandline,
		      # but then the rate IS NOT checked against the HMM!
set SIGNAL = ""
set BPF = ""
set OUT = ""
set OUTSLF = ""       # if set, maus copies his internal SLF into it
set OUTFORMAT = "mau" # if set to 'mau' a BPF mau tier is created in OUT
                      # if set to 'csv' a CSV spread sheet is created in OUT
                      # if set to 'TextGrid' a praat TextGrid is created in OUT
                      # if set to 'emu', legacy Emu *.hlb and *.phonetic files are created
                      # if set to 'emuR|emuDB', an Emu *_annot.json file is created
                      # if set to 'EMU' or 'legacyEMU' a file *.EMU is created which contains the *.hlb
		      #  and *.phonetic legacy Emu files separated by a line '--- cut here ---'
		      # if set to 'mau-append' or 'par' or 'bpf' the mau tier is added to the BPF 
		      #  input file '*.par', if writable.
set INSORTTEXTGRID = "no" 
                      # if set to "yes" an additional word tier is inserted 
set INSKANTEXTGRID = "no" 
                      # if set to "yes", an additional canonical transcript tier is inserted 
set USETRN = "no"     # if set to "yes" and the input BPF contains a single TRN entry, 
                      # a pre-segmentation (cutting of leading and trailing silence) of the whole
                      # utterance will be read from this TRN tier entry and the search 
		      # is constrained to the segment given in that TRN tier.
		      # if the input BPF contains more than one TRN entry, the scripts
		      # assumes that a chunk segmentation is provided for the recording
		      # and calls the helper maus.trn (part of distribution) to segment 
		      # each chunk separately.
                      # ! option value 'force' is deprecated with 4.11; use PRESEG=true instead:
		      # if set to 'force', an online  pre-segmentation is carried out by the 
		      # helper WAV2TRN; if WAV2TRN is not installed on the system,
		      # a warning is issued and the option is ignored; a TRN tier in the BPF 
		      # input is being ignored.
		      # if no BPF *.par|PAR is given on the command line, this option is ignored.
set PRESEG = "false"  # if set to "true", maus will use the helper WAV2TRN to perform a 
                      # pre-segmentation; if WAV2TRN is not installed on the system,
                      # a warning is issued and the option is ignored; a TRN tier in the BPF
                      # input is being ignored. If this option is set in combination with USETRN=true
                      # and the input BPF contains a chunk segmentation (tier TRN), then the 
                      # presegmentation is carried out for every single chunk.
set OUTIPA = "false"  # deprecated option; use OUTSYMBOL=ipa instead
set OUTSYMBOL = "sampa"
		      # encoding of phonetic symbol output in all file formats.  
		      # requires a mapping table defined in IPATABLE.
		      # if set to 'ipa', maus replaces all SAMPA-Symbols in the output 
                      # files by the corresponding IPA coded in UTF-8. Note that BPF tiers
		      # MAU (*.mau, option OUTFORMAT=mau) are not conform BPF standard then;
		      # OUTFORMAT=TextGrid should work in most cases; the effect in legacy Emu 
		      # is unknown; silence intervals, human noise and other noise are coded
		      # as (...), (..) and (.) respectively; 
		      # if set to 'manner', maus replaces all SAMPA-Symbols in the output
                      # files by the corresponding IPA manner class (line of IPA consonant chart) + 
		      # 'vowel' + 'diphthong';
		      # if set to 'place', maus replaces all SAMPA-Symbols in the output
                      # files by the corresponding IPA place of articulation for consonants, and 
                      # by locations/movements in the vowel space for vowels/diphthongs.
set NOINITIALFINALSILENCE = "false"
                      # is set to 'true', maus will not automatically model an optional initial 
		      # and final silence model, but force into the first/last segment..

set v = 0
set TRACE = 0
set KANSTR = ""
set CLEAN = 1
set PARAM = $SOURCE/PARAM
# or:
set LANGUAGE = ""     # language of speech: RFC5646 codes: 
                      # 'iso639-3 '-' iso-3166-1 [ '-' iso-3166-2 ] 
                      # e.g. LANGUAGE='eng-AU' -> PARAM.eng-AU
                      # Exception is 'sampa' which denotes language independent mode
set CANONLY = no      # set to 'yes' causes maus just to align the string 
                      # of symbols without the MAUS technique.
                      # From version 2.87 this option is deprecated and can be
                      # overruled by option MODUS=align. For backward-compatibility
                      # CANONLY=yes is effectively MODUS=align, if MODUS is not set
                      # (MODUS=standard), since this is probably what the user wants.
set WEIGHT = "default"       
                      # the WEIGHT option weights the influence of the statistical
                      # MAUS model against the acoustical scores. More
		      # precisely WEIGHT is multiplied to the MAUS model
		      # score (log likelihood) before adding the score to 
		      # the acoustical score in the search;
		      # WEIGHT=0.0 will switch off the MAUS model statistic
		      # (= all paths through the MAUS model are equally probably)
		      # this option is defaulted language specific; see 
		      # PARAM.<language>/DEFAULTS for details
                      # if set to 'default' the language specific default 
                      # value is used, other wise the commandline value
set INSPROB = 0.0     # Insertion probability of segments (see above)		      
set allowresamp = yes # set to 'yes' will cause maus to resample signals
                      # with the wrong sampling rate using sox; if set to 'no' input with 
                      # wrong sample rate will cuase an error. Note: videos are always re-sampled
                      # to 16kHz, 16bit, mono
#set CREATEKAN = $SOURCE/create_kan 3 disabled in version 5.11 since this has been long out of date
                      # helper to insert a temporary KAN tier,
		      # if only the ORT tier is present in the BPF input file. This is not part 
		      # of the distributed MAUS package!
set PAR2EMU = $SOURCE/par2emu
                      # helper to create an legacy Emu compatible output
		      # (option OUTFORMAT=emu|EMU|legacyEMU)
set PAR2EMUR = $SOURCE/mausbpf2emuR
                      # helper to create Emu compatible output
                      # (option OUTFORMAT=emuR|emuDB)
set PAR2TEXTGRID = $SOURCE/par2TextGrid
		      # helper to create TextGrid output
set WAV2TRN = "wav2trn"	
                      # helper to pre-segment signal
set MAUSTRN = "$SOURCE/maus.trn"
                      # helper to perform batch process based on a chunk segmentation
set MAUSBPF2CSV = "$SOURCE/mausbpf2csv"
                      # helper to transform output BPF to a CSV table
set STARTWORD = 0
set ENDWORD = 999999
set MAUSSHIFT = "default"   # constant shift of MAUS boundaries in msecs
                            # positive value = shift into future
		            # this option is defaulted language specific; see 
		            # PARAM.<language>/DEFAULTS for details
                            # if set to 'default' the language specific default 
                            # value is used, otherwise the commandline value
set PRINTINV = no    # if set to yes, maus copies the set of allowed SAM-PA
                     # symbols in the input for the chosen language to the 
		     # output file OUT or to SAMPA.tab, if OUT not given. The list 
		     # is taken from PARAM.<language>/KANINVENTAR.inv.
set INFORMAT = "bpf" # deprecated; do not use this option; input format is detected by extension
                     # input format; default is BPF with (minimal) tier KAN
                     # if set to "bpf-sampa, SAM-PA processing mode is invoked and 
                     # PARAM and LANGUAGE options are ignored
set INSYMBOL = "sampa"
                     # defines the phonological symbol encoding in the input; default 
                     # is 'sampa', alternate is 'ipa' which requires blank-separated IPS symbols
                     # in the KAN tier, e.g. 'KAN: 0  h ɔʏ t ə' 
set MODUS = "standard" # macro option
                     # Operation modus. Default is 'standard' which denotes the normal
                     # MAUS technique as published in Schiel ICPhS 1999.
                     # If set to 'align', MAUS performs as in 'standard', but doesn't
                     # model pronunciation, i.e. it aligns only to the given pronunciation
                     # using 'forced alignment' technique. This option was corresponds to
                     # the deprecated former option CANONLY=true.
                     # If set to 'bigram', MAUS performs a phonotactic bigram driven 
                     # speech recognition; the following options
                     # are set overriding command line options:
                     # INSORTTEXTGRID = false
                     # INSKANTEXTGRID = false
                     #   and the following options are being ignored: BPF, KANSTR, 
                     # INFORMAT,CANONLY,RULESET,INSPROB,STARTWORD,ENDWORD,USETRN=true
                     #   and the option OUTFORMAT=emu|EMU|legacyEMU
                     #   will cause an error, because in the bigram modus no hierarchy is created.
                     # If set to 'bigram', MAUS will run a phone recognition on the 
                     # phone classes as defined in the first column of the dictionary 
                     # file defined in DICTBIGRAM constrained only by a phone bigram 
                     # (HTK lattice) as defined in LATBIGRAM.
set DICTBIGRAM = ""
set LATBIGRAM = ""
                     # Defaults: DICTBIGRAM = $PARAM/DICT.bigram, LATBIGRAM = $PARAM/LAT.bigram
set RELAXMINDUR = "FALSE"
                     # default maus has a minimum duration of phone segments of 3 frames (= 30msec)
                     # or 40msec for long/tense vowel or diphtongs. This makes sense for achieving robust results
                     # with a minimum of falsely inserted short segments. In some cases (duration of very short
                     # consonants) this causes a 'ceiling effect' at 30msec in the analysed data. If this option 
                     # is set to 'TRUE', this lower minumum duration bound is relaxed to 10msec (1 frame) and 
                     # 20msec respectively. 
set RELAXMINDURHED = "${SOURCE}/relaxMinDur.hed"
                     # HHEd helper script to do on-the-fly conversion of HMMs 
set BPFTHRESHOLD = 3000
                     # threshold for prevalidation of input: if the loaded BPF contains more that 
                     # BPFTHRESHOLD lines of the KAN tier, we expect that the Viterbi will take a very long time 
                     # to process this file and most likely will not come to valid result; the script therefore 
                     # issues an ERROR message and exits with exit code 1; if you are sure about what you are 
                     # doing, you can set this threshold to other values via this option.
set GETBPFTHRESHOLD = "FALSE" 
                     # if set, maus returns a single number representing the option BPFTHRESHOLD and 
                     # exits with error code 0; this allows front ends to to check where the pre-validation  
                     # threshold is.
set CSVLIMITER = ";" # column limiter for CSV input tables

if ( $1 == '--version' ) then 
  echo $VERSION
  exit 0
endif

set mausoptions = "$*"

# Actually do the argument parsing here

#echo parsing commandline
if ( $v > 0 ) echo "$0 $*"
while ( "$1" != "" )
	switch ("$1")
	case *=*:
		#set key = `echo $1 | cut -d= -f1`
		set key = `echo $1 | awk -F= '{ print $1 }'`
		#check if option is known (set)
		eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then 
		  echo "ERROR: ${0:t} : unknown option $key - exiting" >> /dev/stderr
		  exit 1
		endif  
		#set val = `echo $1 | cut -d= -f2`
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

# boolen variable check; define all boolean input parameters here

set bool = ( PRINTINV allowresamp CANONLY CLEAN USETRN PRESEG INSORTTEXTGRID INSKANTEXTGRID OUTIPA NOINITIALFINALSILENCE RELAXMINDUR GETBPFTHRESHOLD )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val ) 
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  case force:
    eval set $booleanvariable = force
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no,(force)' - exiting" >> /dev/stderr
    exit 1
  endsw    
end

if ( $GETBPFTHRESHOLD == "TRUE" ) then
  echo $BPFTHRESHOLD
  exit 0
endif

# MODUS=bigram must be excempt from this case, since this modus does not require a BPF or KANSTR option 
if ( "$PRINTINV" == "FALSE" && ( "$SIGNAL" == "" || ( "$BPF" == "" && "$KANSTR" == "" && $MODUS != "bigram" ) ) ) then 
  echo "${0:t} : version $VERSION" >> /dev/stderr
  cat <<ENDE | sed 's/^# //' >> /dev/stderr

# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl BPF=signal.par|csv [MODUS=standard][INSYMBOL=sampa|ipa][OUT=maustier.mau][OUTFORMAT=mau|mau-append|TextGrid|emu|EMU|legacyEMU|csv|par|bpf|emuR|emuDB][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][CANONLY=no][allowresamp=yes][WEIGHT=default][INSPROB=insprob][MINPAUSLEN=5][STARTWORD=0][ENDWORD=999999][MAUSSHIFT=default][INSORTTEXTGRID=no][INSKANTEXTGRID=no][USETRN=no][PRESEG=no][RULESET=rml-0.95.rul][NOINITIALFINALSILENCE=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=no][BPFTHRESHOLD=3000]
# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl KANSTR="a: b e: # t s e:" [MODUS=standard][INSYMBOL=sampa|ipa][OUT=maustier.mau][OUTFORMAT=mau|mau-append|TextGrid|emu|EMU|legacyEMU|csv|par|bpf|emuR|emuDB][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][CANONLY=no][allowresamp=yes][WEIGHT=default][INSPROB=insprob][MINPAUSLEN=5][MAUSSHIFT=default][INSORTTEXTGRID=no][INSKANTEXTGRID=no][USETRN=no][PRESEG=no][RULESET=rml-0.95.rul][NOINITIALFINALSILENCE=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=no]
# usage: maus SIGNAL=signal.nis|wav|dea|al|mpg|mpeg|mp4|avi|fvl MODUS=bigram [OUT=maustier.mau][OUTFORMAT=mau|TextGrid|csv][CLEAN=1][PARAM=parameter-dir][LANGUAGE=iso639|rfc5646][allowresamp=yes][WEIGHT=default][USETRN=no][PRESEG=no][OUTSYMBOL=sampa|ipa|place|manner][RELAXMINDUR=no]
# usage: maus PRINTINV=yes [LANGUAGE=iso639|rfc5646][OUT=InventarTable.txt]
# usage: maus GETBPFTHRESHOLD=yes
# usage: maus --version

# Reporting: ERRORs/WARNINGs are printed to stderr; DEBUG infos (option v > 0) 
# and output of parameter retrieval is printed to stdout 

# General remarks:
# The script reads a string of phonemic symbols as defined in the param
# file KANINVENTAR, reads a signal from the file signal.nis and 
# performs a MAUS segmentation according to these inputs.
# The resulting segmentation is either written into a BPF MAU tier file *.mau
# or into a Praat compatible *.TextGrid file or in Emu files or appended to the BPF
# input file..
# Phonemic symbols are read either from commandline or from a KAN tier of 
# a BPF file (input file *.par|PAR) or from a CSV table with two ';'-separated columns
# (input file *.csv|CSV).

# Maus is also implemented as a webservice. If you are using maus as a web service,
# not all options are available as for the plain script. Some options have different 
# names and values to standardize certain concepts. Please look for remarks headed
# by 'WebService:' for such special options in the following.

# There are a number of general constraints on how to use maus:

# With the default set of parameter files PARAM (LANGUAGE='' or LANGUAGE='deu-DE') 
# only German language may be
# segmented. Furthermore the statistical rule set used here is optimized 
# to non-prompted dialogue speech used in a formal situation. However,
# we found that this set also works reasonable well for read speech.

# To adapt this script to another language, several parameter files and 
# programs in PARAM must be adapted: The set of phonemic symbols used in the input,
# the MAUS internal symbol set, the mapping function between them, the 
# Hidden Markov Models used for the search, the mapping from MAUS internal
# symbols to HMM and of course the rule set.
# See file USAGE for further details, and many examples in subdirs PARAM.<LANGUAGE>.

# The string of phonemic input symbols must not contain any other symbol as
# defined in $PARAM/KANINVENTAR. 
# The symbol '#' may be used between words indicating possible pauses
# between the words. This is highly recommended.
# When reading from a BPF file (option BPF) these optional pauses are
# inserted automatically.

# WebService: you may download the content of KANINVENTAR (i.e. the allowed 
# phonetic symbols in the maus input) for the selected language by pressing
# the button 'Inventar' next to the LANGUAGE option in the web form or by 
# directly issuing the following curl command:
# curl -X POST -H 'content-type: application/x-www-form-urlencoded' 'http://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runMAUSGetInventar?LANGUAGE=iso639code|rfc5646code'


# This script is intended to work for mono NIST and WAV sound files with 
# 16 kHz sampling rate and 16 bit linear (FIXRATE), because the HMM are 
# trained to this type of data. Multi-channel files are not supported.
# Maus will automatically resample the signal using sox (without dither!)
# if you set the option 'allowresamp' to 'yes' (default is 'yes'; the times 
# given in samples in the mau output are based on the original
# sampling rate of the signal; the times given in the TextGrid output 
# files are NOT affected by any re-sampling!) and also
# recognize ALAW coded / 8kHz telephone speech input by the extensions
# '.al', '.AL', '.dea' and '.DEA' and converts it into PCM / 16kHz using
# sox. Note that telephone speech is band limited and influenced by a 
# number of varying transfer functions and therefore not suited to be 
# recognized by HMM that are trained on more or less clean lab speech.
# But you can try...

# You may also either re-sample using sox beforehand or
# adjust the parameters in PRECONFIG and replace the HMM by HMM trained with 
# data of the respective sampling rate.
# The script will complain if you try to use other sampling rates or HMM
# trained with other sampling rates. Note that ALL kinds of re-sampling
# detoriate the signals!

# If you use WAV/ALAW signal files as input, the tool sox must be installed 
# on your computer.

# Other tools (partly residing in SOURCE) required by this script:
# awk(*), HVite(*), HCopy(*), HHEd(*), dos2unix(*), ffmpeg(*), SoX(*),
# kan2mlf.awk, word_var-2.0, rec2mau.awk,
# mau2TextGrid.awk, mau2TextGridORT.awk, correctusetrn.awk, finalusetrn.awk,
# par2emu, par2emu1.awk, par2emu2.awk, par2TextGrid, relaxMinDur.hed,
# mausbpf2csv, mausbpf2emuR, mausbpf2emuR_class1.awk, mausbpf2emuR_class1lnks.awk,
# mausbpf2emuR_class1mult.awk, mausbpf2emuR_class1multlnks.awk, 
# mausbpf2emuR_class4.awk, mausbpf2emuR_class4lnks.awk
# (tools marked with (*) are NOT part of the MAUS distribution package!)

# word_var-2.0 is a C++ based program compiled for Linux that computes the
# statistically weighted pronuciation model. If you are intending
# to use MAUS for a different language than German AND not do not intend to 
# use pronunciation rule sets (MODUS=align), i.e. make a simple alignment 
# to phonemic SAM-PA transcript, then you can use maus WITHOUT installing
# the binary word_var-2.0. The script will not call word_var-2.0, if you 
# set the option MODUS=align. This might help on platforms where the compilation
# of word_var-2.0 does not work or where no C++ compiler is available.

# Warning: the script will write intermediary files into 
# the cache $TEMP. To be sure that these are deleted after finishing use
# the option TEMP=/mytemp and the option CLEAN=1.
# You may set CLEAN=0 for debugging purposes

# The silence model '#' in the HMM set must be a tee-model.
# The HVite will always complain about the 'words' '#' or '&' that are 
# tee-words. It's safe to ignore these warning.

# History: See file DOCU/HISTORY in this dir

# Options:

# Option v=1 or higher causes maus to produce more and more information 
# at stdout. In general v=1 is good value to start (default: v=0).

# Option SIGNAL must point to a readable media file.

# If option BPF is given, maus will try to read the canonical pronunciation
# either from a BAS Partitur Format (BPF) file (tier KAN), file extension 
# *.par|PAR, or from a spead sheet CSV table (*.csv|CSV). The CSV table has 
# the format: two ';'-separated columns, column 1: orthography of a word in UTF-8,
# column 2: canonical pronunciation of the word in SAMPA with or without 
# separating blanks between phonetic symbols. Optional pauses will be inserted
# between words automatically. Note that the SAMPA symbols have to match the
# set of symbols in PARAM/KANINVENTAR. 
# If you want to model additional optional silence intervals within words, use
# the model name '<p:>', e.g.:
# KAN:  0  hOY<p:>t@
# KAN:  1  ?Ist
# If your BPF input files must contain 'silence words' (= words that consist only of a silence
# symbol) use the '<p>' (enforced silence interval) symbol. E.g.:
# KAN:  0  hOYt@
# KAN:  1  <p>
# KAN:  2  ?Ist
# If your BPF input file contains noise markers (e.g. 'door slam') use the 
# '<nib>' symbol (= non-speech interval), e.g.:
# KAN:  0  hOYt@
# KAN:  1  <nib>
# KAN:  2  ?Ist
# If your BPF input file contains human noise (e.g. 'cough') or garbage speech use the 
# '<usb>' symbol (= human noise interval), e.g.:
# KAN:  0  hOYt@
# KAN:  1  <usb>
# KAN:  2  ?Ist

# If option KANSTR is given, maus will use the string of phonemic symbols
# given from command line. Each symbol is separated by the next by a 
# single blank. Note that these symbols have to match the 
# set of symbols in PARAM/KANINVENTAR. Also note that no optional pauses
# will be inserted between words. You have to do that explicitely by 
# inserting the symbol '#'(optional) or '<p>' (non-optional). If you want to force MAUS to model
# a silence interval at a certain point, insert the symbol '<p>'.
# Do not use the symbol '#' as the first or last symbol in KANSTR!
# If option KANSTR is used the option INSORTTEXTGRID and INSKANTEXTGRID will
# have no effect.

# WebService: KANSTR is not supported by the MAUS Webservices. Use the KAN
# tier in the input BPF files instead.

# Option INSYMBOL defines the phonological encoding in the input file; default
# is 'sampa', alternate is 'ipa'; note that only IPA symbols are recognized that 
# correspond to the SAMPA symbols of the selected language (option LANGUAGE); if the
# input file contains other IPA symbols or combinations thereof, an error is issued.
# IPA input does only work, if the IPA symbols are blank separated in the KAN tier.

# If the option OUT for the output is not given, maus will create an
# output file named with the body of the signal file and 
# extension 'mau', 'csv', 'par' or 'TextGrid' in the same 
# location as the signal file (provided the user is allowed to write there).
# Setting OUT=file will write the results into 'file' regardless the 
# setting of OUTFORMAT, except if option OUTFORMAT=emu|EMU|legacyEMU, the resulting two legacy Emu
# files (*.hlb, *.phonetic) or *.EMU will be written to the path of OUT, but named
# as the signal file.
# The output *.mau is not a valid BPF file because it does not contain a 
# header. But it can be appended to the corresponding input BPF file and should match
# the other tiers exactly. To create a valid BPF containing the MAU tier
# use OUTFORMAT=mau-append|par|bpf (works only with BPF|CSV input!).

# Option OUTFORMAT (default: 'mau')
# Default output is a file with extension 'mau' that contains a three-column table
# (tab-separated): 'MAU:' (1st col), word index staring with 0 (2nd col) and the 
# phonetic symbol label (3rd col, coding dependent of option OUTSYMBOL). This is 
# a BPF compatible tier that can be appended to a BPF annotation file. 
# If set to 'csv' a CSV spreadsheet table is produced by the helper 'mausbpf2csv' and the 
# file extension is set to 'csv' to ease loading of the result table into spread 
# sheet processing software.
# If set to 'TextGrid', maus will produce a praat TextGrid
# file instead of the MAU tier file (if the input contains LaTeX-Umlauts in the 
# ORT tier they are converted to UTF-8, since praat cannot handle LaTeX label names). 
# Since TextGrid files contain explicit times, the sampling rate problem is 
# irrelevant in this case. See also options INSORTTEXTGRID, INSKANTEXTGRID.
# Options START/ENDWORD do not work with TextGrid output.
# If set to 'emu', maus will create legacy Emu compatible hierarchy and segmentation
# files, *.hlb and *.phonetic in the path of OUT and named as the input signal
# file (if OUT not given in the location of the signal file). This works only
# if input is read from a BPF file (option BPF=) and the input file contains
# an ORT tier and an KAN tier.
# The resulting Emu hierarchy consists of 3 levels (bundle,ORT,MAU)
# and two additional labels (bundle -> source, ORT -> KAN) and only
# one type of label file (*.phonetic) for level MAU.
# WARNING: for the creation of emu output maus will add a MAU tier to the 
# input BPF, if the input BPF is in the same location as the signal file (SIGNAL=).
# A already existing MAU tier is overwritten without warning; if you want to
# prevent this, place the input BPF in a different directory than the signal file.
# WARNING: Legacy Emu does not tolerate some SAM-PA labels such as curly brackets ({}).
# If you are using a language containing such symbols, you must replace these
# labels by Emu-comform symbols before using the maus-created Emu files.
# If the option OUTFORMAT is set to 'EMU|legacyEMU', maus will do the same as emu, but store
# both legacy Emu files into a single file *.EMU separated by a line '--- cut here ---'.
# Options START/ENDWORD do not work with Emu output.
# If the option OUTFORMAT is set to 'emuR|emuDB', an Emu compatible *_annot.json file is 
# created. The same remarks as to OUTFORMAT='emu' apply here, except that all SAM-PA 
# labels are tolerated here.
# If the option OUTFORMAT is set to 'mau-append' or 'par' or 'bpf', the MAU tier is added to the 
# BPF input file (an existing MAU is being replaced) or a BPF is created from a CSV input file.
# This works only with BPF|CSV input, not with KANSTR input.

# If the option CLEAN=1 is given (default), the script will erases all cached 
# slf, rec, htk and mlf files from TEMP at the end of the script. This 
# is highly recommended unless you know exactly what you are doing and need
# to safe computing time.

# The option PARAM determines another directory containing the parameter
# files of maus (default is $SOURCE/PARAM). By this another ruleset 
# together with all adapted inventory lists, HMM etc. can be chosen.
# Typically this done to select another operating language for maus.
# Note that also a mapping script par2mlf.awk is
# part of PARAM, since the mappings to the internal phoneme
# set GRAPHINVENTAR can be dependend of the set.
# Option LANGUAGE overrides this option.

# The option MMF may be used to superseed the default HMM macro file in $PARAM/MMF.mmf.
# MMF must contain a HMM definition for each phonetic symbol defined in the second 
# column of the $PARAM/DICT file. HMM definitions must have a minimum number of 3 states
# (NUMSTATES >= 5); otherwise option RELAXMINDUR=TRUE will cause an error.

# The option LANGUAGE=rfc5646 can be used instead of PARAM to set the language.
# If set, LANGUAGE will override option PARAM. Currently supported are:
# aus-AU        (= Australian Aboriginal Languages)
# cat,cat-ES
# deu,deu-DE
# ekk,ekk-EE
# eng,eng-GB
# eng-AU
# eng-NZ
# eng-SC
# eng-US
# eus-ES,eus-FR
# fin,fin-FI
# fra,fra-FR
# gsw-CH
# hun,hun-HU
# ita,ita-IT
# jpn,jpn-JP
# kat,kat-GE
# mlt,mlt-MT
# nld,nld-NL,nld-BE
# nor,nor-NO
# pol,pol-PL
# por,por-PT
# ron,ron-RO
# rus,rus-RU
# spa,spa-ES
# sqi,sqi-AL
# swe,swe-SE
# tha-TH
# ltz-LU
# and the non-standard code 'sampa' to denote the language independent MAUS mode 
# (all SAM-PA symbols are allowed in the 
# input BPF KAN tier but they must be blank separated, e.g. instead of 
# 'hOYt@' -> 'h OY t @'). 
# SAMPA symbol 'P' and X-SAMPA diacriticum 'palatalized' e.g. /k'/ are not 
# supported; for palatalisation use /k_j/.

# The option CANONLY=yes causes maus to skip the creation of the statistical
# pronunciation graph and simply align the given string of phonemic symbols.
# This maybe be for instance useful to align a manually created transcript or
# to avoid erroneous variations in noisy or out-of-domain speech or when using
# maus on a language where no pronunciation rule set is avaliable.
# The script maus will not call the binary word_var-2.0 when CANONLY=yes;
# this might help on platforms where the C++ program does not compile.
# This option is deprecated from version 2.87 upwards; use the new option 
# MODUS=align instead.

# The option WEIGHT weights the influence of the statistical
# MAUS model against the acoustical scores. More
# precisely WEIGHT is multiplied to the MAUS model
# score (log likelihood) before adding the score to 
# the acoustical score within the search. Since the MAUS model in most cases
# will favour the canonical pronunciation, increasing WEIGHT will at some 
# point cause MAUS to choose the canonical pronunciation; lower values of 
# WEIGHT will cause more less probable paths be selected according acoustic evidence.
# If the acoustic quality of the signal is very good and the HMMs of the language
# are well trained, it makes sense to lower WEIGHT.
# In an evaluation on parts of the German Verbmobil data set (27425 segments)
# which were segmented and labelled manually (MAUS DEV set) this
# heuristical parameter was optimized to 7.0 (which is rather similar to
# the optimal value used in ASR which is 6.5). Therefore the default value
# for WEIGHT is set to 7.0
# WEIGHT was also tested against the MAUS TEST set to confirm the value of
# 7.0. The symmetric inter-labeller agreement for 7.0 was 79.47, for 8.0
# 79.31, for 6.0 79.51 and for 5.0 79.47 and dropping.
# The differences between these
# three values are not significant; therefore we can say that the value of
# 7.0 holds for other data sets.
# Note that this might NOT be the optimal value for other MAUS tasks. For instance 
# for Italian we found that WEIGHT=1.0 yields better results (tahts why the 
# WEIGHT is default set to 1.0 for LANGUAGE=ita)

# The option INSPROB influences the probability of deletion of segments.
# It is a constant factor (a constant value added to the log likelihood score)
# after each segment. Therefore, a higher value of INSPROB will cause
# the probability of segmentations with more segments go up, thus decreasing
# the probability of deletions (and increasing the probability of insertions,
# which are rarely modelled in the rule set). This parameter has been
# evaluated on parts of the German Verbmobil data set (27425 segments)
# which were segmented and labelled manually (MAUS DEV set) and found to have
# its optimum at 0.0 (which is nice). Therefore we set the default value
# of INSPROB to 0.0
# INSPROB was also tested against the MAUS TEST set to confirm the value of
# 0.0. It had an optimum at 0.0 as well.
# Note that this might NOT be the optimal value for other MAUS tasks.

# The option 'allowresamp=yes' will cause maus to automatically resample
# input signal files to 16kHz sampling rate using sox (polyphase).
# For this to work, sox must be installed on your system. Note that all kind 
# of re-sampling might detoriate your signal.

# WebService: in the webservice 'allowresamp=yes' is always set.

# The option MINPAUSLEN controls the behaviour of optional inter-word silence.
# Inter-word silence is modelled by the symbols '#', '&' (only for 
# compatibility) and '<p:>' (which may denote a word-internal silence
# interval), not by '<p>' which models explicit silence intervals.
# If set to 1, maus will detect all inter-word silence intervals that can be
# found (minimum length for a silence interval is then 10 msec = 1 frame). 
# If set to values > 1, 
# the minimum length for an inter-word silence interval to be detected 
# is set to n*10 msec. For example MINPAUSLEN=5 will cause MAUS to 
# suppress inter-word silence intervals up to a length of 40msec.
# Since 40 msec seems to be the border of perceivable silence, we set 
# this option default to 5.
# With other words: inter-word silences smaller than 50msec are not segmented 
# but rather distributed equally to the adjacent segments.
# If one of the adjacent segments happens to be a plosive (set of plosives
# defined in PARAM/PLOSIVES) then the deleted silence interval is added totally
# to the plosive; if both adjacent segments are plosives, the interval is
# equally spread as with non-plosive adjacent segments.
# (Here we assume that the frame length is always 10msec!)
# If you want to force MAUS to insert am silence interval at a certain point
# use the symbol '<p>'.

# The options STARTWORD and ENDWORD control which words from the input
# BPF file are selected for the canonical input (only for mau format output). 
# Default STARTWORD is 0, ENDWORD is 999999 (all words). 
# These options are useful, if the BPF file
# contains not only one utterance but a whole dialogue, whereas the 
# input sound file contains only a part of it (e.g. from a chunk segmentation).
# The timing information in the resulting segmentation is of course in 
# relation to the input signal file.
# Note that the word numbering is passed through to the BPF output (mau,csv,par),
# if selected. Therefore *.mau files (OUTFORMAT=mau) or *.par files
# (OUTFORMAT=mau-append|par|bpf) are still conform to the source BPF file.
# These options have no effect if the option BPF is not used; if the 
# option OUTFORMAT is set to emu|EMU|legacyEMU|emuR|emuDB, the script terminates with an error;
# if set to TextGrid, options INSORTTEXTGRID/INSKANTEXTGRID are disabled.
# In combination with option USETRN and a TRN entry in the input BPF, these
# options may be used to perform partial segmentations in larger recordings.
# See the script maus.trn in the maus distribution package for an example.

# Option MAUSSHIFT causes the calculated MAUS segment boundaries to be 
# shift by MAUSSHIFT msec. (default : 0)
# As reported by different sources, segmentations produced by a HMM model
# show s consistant shift in the segment boundaries of about 8-12msec too early 
# (Probably due to average systematic shifts in the training material, see the BA thesis 
# of Bernhard Jackl 2016).

# Option INSORTTEXTGRID
# If set to 'yes' and option OUTFORMAT is set to 'TextGrid' and input
# is read from a BPF|CSV, maus will try to identify either an ORT tier
# or - if that fails - a KAN tier (must be there as input!) and
# write an additional interval section into the TextGrid file
# containing the word segmentation based on the underlying MAUS
# segmentation. The tier is called either 'ORT' or 'KAN'
# respectively; it contains non-labeled segments where MAUS labelled
# a silence interval and a segment either labelled with the
# orthography or the canonical transcript for the words. If set to
# 'no' the regular Textgrid output with one interval section is
# produced.
# This option does obviously not work with CSV input.

# Option INSKANTEXTGRID
# Same as INSORTTEXTGRID with the KAN tier. If INSORTTEXTGRID is set as well
# both tiers are exported after another before the phonemic tier. If the 
# source BPF does not contain an ORT tier, only one KAN tier is being exported
# Both options INSORTTEXTGRID and INSKANTEXTGRID will have no effect if the 
# canonic transcript is read from the option KANSTR instead from a BPF|CSV file
# (option BPF).

# Option USETRN (works only with BPF=*.par|PAR)
# If set to 'yes' maus will search the input BPF for a TRN tier
# that segments the utterance within the recording. If the input 
# BPF contains no TRN tier, a warning is issued and processing resumes
# as usual. If only one TRN entry is found, maus will cut
# out the segment defined there and run the MAUS segmentation only within the
# cut out segment. Afterwards the offset and final cut off are
# re-calculated into the final result file. Note that word links 
# of the TRN entry are ignored, that is the script will segment all words 
# given in the KAN tier within the time segment defined by the TRN entry. 
# (this functionality is kept for backwards compatibility, because that is the 
# way maus handled TRN input before verion 2.50; if you want to contain the 
# maus segmentation not only to a time segment but also to a certain span of
# words in the input BPF, use the options STARTWORD/ENDWORD.)
# If more than one TRN entry is found, maus assumes that a chunk segmentation
# is available for the input recording 
# (see http://www.bas.uni-muenchen.de/Bas/BasFormatseng.html#TRN for details).
# The script will then call the helper maus.trn (must be installed in the 
# same location as this script) to segment each chunk separately and aggregates the 
# results into the results file.
# Chunk segmentations may contain overlapping chunks which cause problems when
# aggregating the segmentation results of the individual chunks into one 
# TextGrid or Emu result file (since these formats do not support intervals
# with negative times, while BPF does). Therefore if the option OUTFORMAT
# is set to TextGrid, the script will check the input for overlaps. If there
# are overlaps, the script will issue an error message and terminate. If 
# OUTFORMAT is set to emu|EMU|legacyEMU|emuR|emuDB, the script will terminate 
# with an error (not supported yet).
# The following option value 'force' is deprecated in 4.11, user PRESEG=true instead : 
# If set to 'force' maus will search for a helper WAV2TRN to run an online
# pre-segmentation ignoring any TRN entries in the input BPF. If the helper
# is not found, maus will issue a warning and proceed without any pre-segmentation.
# Default is set to 'no'. If no BPF is given on the command line or for CSV input 
# this option is ignored.

# Option PRESEG
# If set to "true", maus will use the helper WAV2TRN to perform a
# pre-segmentation; if WAV2TRN is not installed on the system,
# a warning is issued and the option is ignored; a TRN tier in the BPF
# input is being ignored. If this option is set in combination with USETRN=true
# and the input BPF contains a chunk segmentation (tier TRN), then the
# presegmentation is carried out for every single chunk.

# Option RULESET
# Via this option the default statistical rule set 'rml-0.95.rul' for which maus is
# looking in PARAM can be overwritten. If the filename starts with
# an '/' maus does not look for that file in PARAM but rather loads 
# it directly (absolute path!). Otherwise maus will try to locate 
# the file in the given PARAM dir.
# Please note that the file name extension must be either of '.rul' 
# (denoting a statistical rule set) or '.nrul' (denoting a non-statistical
# rule set. Other extensions are not accepted.
# Please keep in mind that the phonetic symbols used in the rule set you 
# use must in any case match the inventar of symbols defined in 
# PARAM/GRAPHINVENTAR (numerical symbols preceeded by 'P', e.g. 'P6'). 
# For details on how to formulate new rule sets, see the docu file USAGE.
# If RULESET points to a file named 'dummy.rul', the script will assume that no 
# valid rule set exists for the selected language and will force the option 
# MODUS=align (thus performing a simple forced alignment in a any case). 
# The latter will be indicated by a WARNING message.

# Option PRINTINV
# If this option is set to 'yes', maus will not process any given input
# but merely print the set of allowed SAM-PA input symbols as a simple
# UTF-8 table to the output file OUT or to SAMPA.tab if OUT is not given. 
# This is useful, if you are not sure, which symbols are allowed in the 
# selected language, especially in SAMPA mode (see options PARAM or LANGUAGE).
# The UTF-8 table has the following columns:
# MAUS:           SAMPA symbols as supported by MAUS (KAN tier input)
# SAMPA:          Original SAMPA symbol
# IPA:            IPA symbol (if applicable, coded UTF-8)
# PHONETICS:      phonetic description (if applicable)
# EXAMPLES:       orthographic examples (if applicable)
# ISO639-3:       Internal Iso 639-3 code of the SAMPA set
#                 'xxx' = used in multiple languages
#                 'nze' = Newsealand English
#                 'aus' = Australian English
#                 'use' = American English

# Deprecated Option OUTIPA, superseeded by option 'OUTSYMBOL'.
# Use OUTSYMBOL=ipa in the future.
# If set to 'true', maus replaces all SAMPA-Symbols in the output 
# files by the corresponding IPS coded in UTF-8. 
# OUTFORMAT=TextGrid should work in most cases; the effect in Emu 
# is unknown; silence intervals, human noise and other noise are coded
# as (...), (..) and (.) respectively. 
# Requires a mapping table defined in IPATABLE1|2 with SAMPA in the 1st
# and IPA in the 3rd column.

# Option OUTSYMBOL=sampa
# Encoding of phonetic symbol output in all file formats;
# requires a mapping table defined in IPATABLE1|2.
# if set to 'ipa', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA coded in UTF-8. Note that BPF tiers
# MAU (*.mau, option OUTFORMAT=mau) are not conform BPF standard then;
# OUTFORMAT=TextGrid should work in most cases; the effect in legacy Emu
# is unknown; silence intervals, human noise and other noise are coded
# as (...), (..) and (.) respectively;
# if set to 'manner', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA manner class (line of IPA consonant chart) +
# 'vowel' + 'diphthong';
# if set to 'place', maus replaces all SAMPA-Symbols in the output
# files by the corresponding IPA place of articulation (col of IPA consonant)
# chart + 'front', 'mid and 'back' for vowels/diphthongs.

# Option NOINITIALFINALSILENCE=no
# If set to 'yes', the maus script will suppress the automatic modelling 
# of optional initial/final silence intervals; this can be useful if the recording
# starts with a stop and you don't want MAUS to place a silence interval '<P:>' before
# the stop (instead of the pause).

# Option RELAXMINDUR=no
# default maus has a minimum duration of phone segments of 3 frames (= 30msec)
# or 40msec for long/tense vowel or diphtongs. This makes sense to achieve robust results
# with a minimum of falsely inserted short segmenst. In some cases (duration of very short
# consonants) this causes a 'ceiling effect' at 30msec in the analysed data. If this option
# is set to 'TRUE', this lower minumum duration bound is relaxed to 10msec (1 frame) and
# 20msec respectively.
# Technically this done on-the-fly converting the loaded HMM set in MMF by adding a 0.05 transition from 
# (virtual) state 2 to 5 (= real states 1 to EXIT) allowing HMMs with 3 states to exit from the 
# first state and HMM with 4 states to jump from the first to the forth state.
# The file defined in RELAXMINDURHED must be a HHEd script with the single command 
# AT 2 5 0.05 { *.transP }

# Option BPFTHRESHOLD=prevalThreshold
# Since the processing time of the Viterbi increases quadratically with input length, we do 
# a prevalidation of the input whether it makes sense to process the data. If the number of 
# words (KAN tier lines) in the BPF input file (which roughly corresponds to the length of input) exceeds 
# the number given in this option, the maus script issues an error and terminates with exit code 2
# Do not change this number unless you know exactly what you are doing.

# Option MODUS - macro option
# Operation modus. Default is 'standard' which denotes the normal
# MAUS technique as published in Schiel ICPhS 1999.
# If set to 'align', MAUS performs the in 'standard', but doesn't
# model pronunciation, i.e. it aligns only to the given pronunciation
# using 'forced alignment' technique. This option corresponds to
# the deprecated former option CANONLY=true.
# If set to 'bigram', MAUS will run a phone recognition on the
# phone classes as defined in the first column of the dictionary
# file defined in DICTBIGRAM constrained only by a phone bigram
# (HTK lattice) as defined in LATBIGRAM.
# If set to 'bigram', the following options
# are set default values overriding command line options:
# INSORTTEXTGRID = false
# INSKANTEXTGRID = false
#   and the following options are being ignored: BPF, KANSTR,
# INFORMAT,CANONLY,RULESET,INSPROB,STARTWORD,ENDWORD,USETRN=true
#   and the option OUTFORMAT=emu|EMU|legacyEMU|mau-append|emuR will cause an error.

# Exit codes

# 0 : everything seems ok
# 1 : serious error
# 2 : problem with the input signal file
# 3 : printing allowed SAM-PA set to output file
# 4 : main arguments missing, printing help message to stdout
# 5 : a helper program reports serious error

ENDE
  exit 4
endif  

if ( $v == 1 ) set TRACE = 0
if ( $v == 2 ) set TRACE = 1
if ( $v == 3 ) set TRACE = 3
if ( $v == 4 ) set TRACE = 7

if ( $v > 0 ) echo "DEBUG: ${0:t} : Starting on `date`"
if ( ! -d $TEMP ) then 
  echo "ERROR: ${0:t} : cannot find temporary dir $TEMP - exiting" >> /dev/stderr
  echo "       please create such a dir and define it to maus" >> /dev/stderr
  echo "       using the option 'TEMP=...'" >> /dev/stderr
  exit 1
endif  

set PID = $$_`date "+%s"`_

# check parameter set
if ( $OUTFORMAT == "emuDB" ) set OUTFORMAT = "emuR"
if ( $OUTFORMAT == "bpf" || $OUTFORMAT == "BPF" || $OUTFORMAT == "mau-append" || $OUTFORMAT == "PAR" ) set OUTFORMAT = "par"
if ( $OUTFORMAT == "csv" || $OUTFORMAT == "CSV" ) then 
  if ( ! -x $MAUSBPF2CSV ) then 
    echo "ERROR: ${0:t} : helper '$MAUSBPF2CSV' required for OUTFORMAT = $OUTFORMAT not found - exiting" >> /dev/stderr
    exit 5
  endif
  set OUTFORMAT="csv"
endif
if ( $OUTFORMAT != "mau" && $OUTFORMAT != "par" && $OUTFORMAT != "emuR" && $OUTFORMAT != "emu" && $OUTFORMAT != "EMU" && $OUTFORMAT != "legacyEMU" && $OUTFORMAT != "csv" && $OUTFORMAT != "TextGrid" ) then 
  echo "ERROR: ${0:t} : unknown output format OUTFORMAT = $OUTFORMAT - exiting" >> /dev/stderr
  exit 1
endif
if ( $INFORMAT == "bpf-sampa" ) then 
  set LANGUAGE = "sampa"
else if ( $INFORMAT != "bpf" ) then
  echo "ERROR: ${0:t} : unknown option INFORMAT = $INFORMAT - exiting" >> /dev/stderr
  exit 1
endif
# LANGUAGE: rfc5646 language codes, singular iso639-3 codes (just
# for backwards compatibility) and 'sampa' 
if ( "$LANGUAGE" != "" ) set PARAM = "$SOURCE/PARAM.${LANGUAGE}"
if ( $v > 0 && ( "$LANGUAGE" == "sampa" || "$PARAM" == "$SOURCE/PARAM.SAMPA" ) ) echo "DEBUG: ${0:t} : Using special SAMPA mode; expect input KAN strings to be blank-separated"
if ( ! -d $PARAM ) then 
  echo "ERROR: ${0:t} : Unknown language code $LANGUAGE or PARAM dir $PARAM not found - exiting" >> /dev/stderr
  echo "       In case you are using LANGUAGE codes 'aus','nze': these have been deprecated with maus version 2.141" >> /dev/stderr
  exit 1
endif
if ( $v > 0 ) echo "DEBUG: ${0:t} : Found parameter set $PARAM for LANGUAGE $LANGUAGE"

# check for language specific defaults
if ( -e $PARAM/DEFAULTS ) source $PARAM/DEFAULTS

# check and set language specific parameters
if ( $WEIGHT == "default" ) then 
  if ( ${?DEFWEIGHT} == 1 ) then 
    set WEIGHT = $DEFWEIGHT
  else 
    set WEIGHT = 1.0              # global default 
  endif
endif
if ( $MAUSSHIFT == "default" ) then 
  if ( ${?DEFMAUSSHIFT} == 1 ) then 
    set MAUSSHIFT = $DEFMAUSSHIFT
  else 
    set MAUSSHIFT = 0              # global default 
  endif
endif
if ( "$RULESET" == "" ) then 
  #echo "WARNING: ${0:t} : option RULESET is empty string - ignoring" >> /dev/stderr
  set RULESET = "default"
endif
if ( "$RULESET" == "default" ) then 
  if ( ${?DEFRULESET} == 1 ) then 
    set RULESET = $DEFRULESET
  else 
    set RULESET = "rml-0.95.rul"   # global default 
  endif
endif

# setting macro options
if ( $MODUS == 'standard' ) then 
  if ( $CANONLY == "TRUE" ) then 
    echo "WARNING: ${0:t} : conflicting options MODUS = $MODUS and CANONLY (deprecated) = $CANONLY" >> /dev/stderr
    echo "         Option CANONLY is deprecated - use option MODUS=align in the future" >> /dev/stderr
    echo "         Switching to forced alignment modus - rule set is being ignored" >> /dev/stderr
    set MODUS = "align"
  else
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Using standard modus"
  endif
else if ( $MODUS == 'align' ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Switching to forced alignment modus - rule set is being ignored"
else if ( $MODUS == 'bigram' ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Switching to bigram modus" 
  if ( $CANONLY == "TRUE" ) then 
    echo "WARNING: ${0:t} : conflicting options MODUS = $MODUS and CANONLY (deprecated) = $CANONLY" >> /dev/stderr
    echo "         Option CANONLY is deprecated - use option MODUS=align in the future" >> /dev/stderr
    echo "         Ignoring option CANONLY" >> /dev/stderr
  endif
  if ( $USETRN == 'TRUE' ) then
    set USETRN = 'FALSE'
    echo "WARNING: ${0:t} : option USETRN set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  if ( $INSORTTEXTGRID == 'TRUE' ) then
    set INSORTTEXTGRID = 'FALSE'
    echo "WARNING: ${0:t} : option INSORTTEXTGRID set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  if ( $INSKANTEXTGRID == 'TRUE' ) then
    set INSKANTEXTGRID = 'FALSE'
    echo "WARNING: ${0:t} : option INSKANTEXTGRID set to 'false' in modus 'bigram'" >> /dev/stderr
  endif
  #set BPF = 'BIGRAM'
  set KANSTR = ''
  if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then 
    echo "ERROR: ${0:t} : cannot run in bigram modus with emu|EMU|legacyEMU output format - exiting" >> /dev/stderr
    exit 1
  endif
else
  echo "ERROR: ${0:t} : unknown modus selected MODUS = $MODUS - exiting" >> /dev/stderr
  exit 1
endif

set IPATABLE = $PARAM/KANINVENTAR.inv
                      # tables that maps all known SAMPA symbols (1st column) to the corresponding
		      # IPA symbol (sequence) coded in UTF-8 (3rd column), the manner class (7th col),
		      # the place of articulation (8th col).

set HMMINVENTAR = $PARAM/$HMMINVENTAR
if ( ! -e  $HMMINVENTAR ) then 
  echo ERROR: ${0:t} : cannot find HMMINVENTAR $HMMINVENTAR
  exit 1
endif  
# check for HMM, transform HMM to a temporary HMM with additional transitions if RELAXMINDUR is set
if ( $MMF == "" ) set MMF = $PARAM/MMF.mmf
if ( ! -e  $MMF ) then 
  echo ERROR: ${0:t} : cannot find MMF $MMF
  exit 1
endif  
if ( $RELAXMINDUR == "TRUE" ) then
  if ( ! -e $RELAXMINDURHED ) then 
    echo "WARNING: ${0:t} : cannot find helper script $RELAXMINDURHED for option RELAXMINDUR - ignoring RELAXMINDUR" >> /dev/stderr
  else
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Option RELAXMINDUR: adding transitions to loaded HMM to allow minimum duration segments" 
    HHEd -H $MMF -w $TEMP/${PID}.MMF.mmf $RELAXMINDURHED $HMMINVENTAR
    set hhedstatus = $status
    if ( $hhedstatus != 0 ) then
      echo "WARNING: ${0:t} : error $hhedstatus occurred in helper HHEd when adding transitions to HMM - using unchanged HMM - option RELAXMINDUR ignored" >> /dev/stderr
    else
      set MMF = $TEMP/${PID}.MMF.mmf 
    endif
  endif
endif

set DICT = $PARAM/$DICT
if ( ! -e  $DICT ) then 
  echo "ERROR: ${0:t} : cannot find DICT $DICT" >> /dev/stderr
  exit 1
endif  
set HVITECONF = $PARAM/$HVITECONF
if ( ! -e  $HVITECONF ) then 
  echo "ERROR: ${0:t} : cannot find HVITECONF $HVITECONF" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $PRECONFIG ) then 
  echo "ERROR: ${0:t} : cannot find PRECONFIG file $PRECONFIG" >> /dev/stderr
  exit 1
endif  
set KANINVENTAR = $PARAM/$KANINVENTAR
if ( ! -e  $KANINVENTAR ) then 
  echo "ERROR: ${0:t} : cannot find KANINVENTAR $KANINVENTAR" >> /dev/stderr
  exit 1
endif  
# output list of allowed input SAM-PA symbols
if ( $PRINTINV == 'TRUE' ) then 
  if ( $OUT == "" ) set OUT = SAMPA.tab
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Printing list of allowed SAM-PA symbols to $OUT"
  touch $OUT
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write to output file $OUT - exiting" >> /dev/stderr
    exit 1
  endif
  if ( ! -e ${KANINVENTAR}.inv ) then
    echo "WARNING: ${0:t} : cannot find SAMPA table ${KANINVENTAR}.inv" >> /dev/stderr
  else
    cat ${KANINVENTAR}.inv >! $OUT
  endif
  exit 3
endif
set GRAPHINVENTAR = $PARAM/$GRAPHINVENTAR
if ( ! -e  $GRAPHINVENTAR ) then 
  echo "ERROR: ${0:t} : cannot find GRAPHINVENTAR $GRAPHINVENTAR" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $SOURCE/kan2mlf.awk ) then 
  echo "ERROR: ${0:t} : cannot find script kan2mlf.awk in $SOURCE" >> /dev/stderr
  exit 1
endif  
if ( ! -e  $SOURCE/rec2mau.awk ) then 
  echo "ERROR: ${0:t} : cannot find script rec2mau.awk in $SOURCE" >> /dev/stderr
  exit 1
endif  
if ( ( $OUTFORMAT == "par" ) && ( $BPF == "" ) && ( $MODUS != "bigram" ) ) then
  echo "ERROR: ${0:t} : option OUTFORMAT=mau-append|par|bpf works only with BPF|CSV input not with KANSTR input - exiting" >> /dev/stderr
  exit 1
endif  
if ( $OUTFORMAT == "TextGrid" && ( ! -x $PAR2TEXTGRID ) ) then 
  echo "ERROR: ${0:t} : cannot find script $PAR2TEXTGRID required for praat compatible output" >> /dev/stderr
  exit 1
endif  
if ( ( $OUTFORMAT == "csv" || $OUTFORMAT == "CSV" ) && ( ! -x $MAUSBPF2CSV ) ) then 
  echo "ERROR: ${0:t} : cannot find script $MAUSBPF2CSV required for csv (spreadsheet) output" >> /dev/stderr
  exit 1
endif  
if ( ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) && ( ! -x $PAR2EMU ) ) then 
  echo "ERROR: ${0:t} : cannot find script $PAR2EMU required for legacy Emu output (*.hlb, *.phonetic)" >> /dev/stderr
  exit 1
endif  
if ( ( $OUTFORMAT == "emuR" ) && ( ! -x $PAR2EMUR ) ) then 
  echo "ERROR: ${0:t} : cannot find script $PAR2EMUR required for Emu output (*_annot.json)" >> /dev/stderr
  exit 1
endif  
if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuR" ) then 
  if ( $BPF == "" && $MODUS != "bigram" ) then
    echo "ERROR: ${0:t} : cannot create Emu output without BPF input - use option BPF" >> /dev/stderr
    echo "       BPF file input must contain the tiers ORT and KAN" >> /dev/stderr
    exit 1
  endif
endif  
# find rule sets/bigram data etc. for the different operation modi
if ( $MODUS == "align" || $MODUS == "ALIGN" ) then 
  if ( $v > 0 ) then
    echo "DEBUG: ${0:t} : Doing forced alignment to the phonological input"
    echo "      helper word_var-2.0 is not called; rule set ignored."
  endif  
  set MODUS = "align"
else if ( $MODUS == "standard" ) then  
  if ( ! -e "$RULESET" ) then 
    # try to find RULESET in PARAM
    if ( `echo "$RULESET" | sed 's/^\(.\).*$/\1/'` != '/' ) then 
      set RULESET = `/bin/ls "$PARAM/${RULESET}"`
      if ( $RULESET == "" ) then 
        echo "ERROR: ${0:t} : cannot find RULESET $RULESET (not even in ${PARAM})" >> /dev/stderr
	echo "       maybe the language selected does not have a rule set yet" >> /dev/stderr
	echo "       try using the option MODUS=align to work without rule set" >> /dev/stderr
        exit 1
      endif
    else
      echo "ERROR: ${0:t} : cannot find RULESET $RULESET" >> /dev/stderr
      echo "       maybe the language selected does not have a rule set yet" >> /dev/stderr
      echo "       try using the option MODUS=align to work without rule set" >> /dev/stderr
      exit 1
    endif
  endif    
  if ( "${RULESET:e}" != 'rul' && "${RULESET:e}" != 'nrul' ) then
    echo "ERROR: ${0:t} : extension of RULESET $RULESET is not 'rul' or 'nrul'" >> /dev/stderr
    exit 1
  endif
  # check for dummy rule sets, which should not be used with MODUS=standard
  ls -l "${RULESET}" | grep -q 'dummy.rul'
  if ( $status == 0 ) then 
    echo "WARNING: ${0:t} : dummy rule set found, cannot be used with MODUS=standard" >> /dev/stderr
    echo "         Forcing option MODUS=align" >> /dev/stderr
    echo "         Doing forced alignment to the phonological input" >> /dev/stderr
    echo "         Results are valid, but no pronunciation modelling was applied" >> /dev/stderr
    set MODUS = align
  else
    # no dummy.rul set : check for type matching extension (word_var will crash on mismatches!)
    if ( "${RULESET:e}" == "rul" ) then
      # extension indicates statistical rule set: each line must end with '000'
      grep -v -q '000$' "${RULESET}"
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : RULESET $RULESET is not a statistical rule set (found a line without trailing number)" >> /dev/stderr 
        exit 1
      endif
    else
      # extension indicates phonological rule set: each line must not end with '000' 
      grep -q '000$' "${RULESET}"
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : RULESET $RULESET is not a phonological rule set (found a line with trailing number)" >> /dev/stderr 
        exit 1
      endif
    endif
  endif  
else if ( $MODUS == "bigram" ) then 
  if ( $DICTBIGRAM == "" ) set DICTBIGRAM = $PARAM/DICT.bigram
  if ( $LATBIGRAM == "" ) set LATBIGRAM = $PARAM/LAT.bigram
  if ( ! -e $DICTBIGRAM ) then 
    echo "ERROR: ${0:t} : cannot find bigram dictionary DICTBIGRAM = $DICTBIGRAM - exiting" >> /dev/stderr
    echo "       most likely you have selected a LANGUAGE for that no bigram modus is (yet) available" >> /dev/stderr
    exit 1
  endif
  if ( ! -e $LATBIGRAM ) then 
    echo "ERROR: ${0:t} : cannot find phone bigram LATBIGRAM = $LATBIGRAM - exiting" >> /dev/stderr
    exit 1
  endif
else
  echo "ERROR: ${0:t} : unknown option MODUS = $MODUS - exiting" >> /dev/stderr
  exit 1
endif  
which sox >& /dev/null
if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : sox is not installed on your system or not in your PATH - exiting" >> /dev/stderr
    exit 1
endif  
if ( ( $BPF == "" || $OUTFORMAT != "TextGrid" ) && ( $INSORTTEXTGRID == "TRUE" || $INSKANTEXTGRID == "TRUE" ) ) then 
  set INSORTTEXTGRID = "FALSE"
  set INSKANTEXTGRID = "FALSE"
  # this WARNING irritates users in the standard applications of maus.pipe, and, frankly the WARNING is not very helpful
  #echo "WARNING: ${0:t} : options INSORTTEXTGRID and INSKANTEXTGRID have no effect" >> /dev/stderr
endif  
if ( $OUTFORMAT == "TextGrid" && ( $STARTWORD != 0 || $ENDWORD != 999999 ) && ( $INSORTTEXTGRID == "TRUE" || $INSKANTEXTGRID == "TRUE" ) ) then 
  set INSORTTEXTGRID = "FALSE"
  set INSKANTEXTGRID = "FALSE"
  echo "WARNING: ${0:t} : options INSORTTEXTGRID and INSKANTEXTGRID disabled" >> /dev/stderr
  echo "         since we are segmenting a subsection of the BPF file (options STARTWORD and ENDWORD)" >> /dev/stderr
endif  
if ( ( $STARTWORD != 0 || $ENDWORD != 999999 ) && ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuR" ) ) then 
  echo "ERROR: ${0:t} : options STARTWORD/ENDWORD cannot be used with Emu output (emu|EMU|legacyEMU|emuR|emuDB) - exiting" >> /dev/stderr
  exit 1
endif  
if ( $STARTWORD > $ENDWORD ) then 
  echo "ERROR: ${0:t} : option STARTWORD ($STARTWORD) must smaller/equal to ENDWORD ($ENDWORD) - exiting" >> /dev/stderr
  exit 1
endif  
if ( $OUTIPA == "TRUE" ) then
  echo -n "WARNING: ${0:t} : usage of deprecated option OUTIPA" >> /dev/stderr
  if ( $OUTSYMBOL == "sampa" ) then 
    set OUTSYMBOL = "ipa"
    echo " - setting option OUTSYMBOL=ipa" >> /dev/stderr
  else
    echo " - ignored" >> /dev/stderr
  endif
endif
if ( $INSYMBOL != "ipa" && $INSYMBOL != "sampa" ) then 
  echo "ERROR: ${0:t} : unknown value $INSYMBOL for option INSYMBOL, valids are sampa|ipa" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif
if ( $OUTSYMBOL != "sampa" && $OUTSYMBOL != "ipa" && $OUTSYMBOL != "manner" && $OUTSYMBOL != "place" ) then 
  echo "ERROR: ${0:t} : unknown value $OUTSYMBOL for option OUTSYMBOL, valids are sampa|ipa|manner|place" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif
# check for IPA mapping tables
if ( $OUTSYMBOL == "ipa" || $OUTSYMBOL == "manner" || $OUTSYMBOL == "place" || $INSYMBOL == "ipa" ) then
  if ( ! -e $IPATABLE ) then 
    echo "WARNING: ${0:t} : you requested symbol mapping in input or output other than SAMPA but no mapping table ${IPATABLE1} can be found - switching to (default) SAMPA symbols" >> /dev/stderr
    set OUTSYMBOL = "sampa"
    set INSYMBOL = "sampa"
  else
    grep -q '^MAUS.*IPA.*MANNER.*PLACE' $IPATABLE
    if ( $status != 0 ) then 
      echo "WARNING: ${0:t} : you requested IPA symbols in input/output (options INSYMBOL/OUTSYMBOL) but the mapping table $IPATABLE is probably corrupt - switching to (default) SAMPA symbol encoding for input/output" >> /dev/stderr
      set OUTSYMBOL = "sampa"
      set INSYMBOL = "sampa"
      set IPATABLE = ""
    endif
  endif  
endif
if ( $v > 0 && $MODUS != "bigram" ) echo "DEBUG: ${0:t} : Expecting phonological input encoded in $INSYMBOL" 
if ( $v > 0 ) echo "DEBUG: ${0:t} : Phonetic output encoded in $OUTSYMBOL" 


# check sampling rate of preprocessing
# get the HTK sampling rate of the preprocessing (in Hz)
set MODELRATE = `grep -a 'SOURCERATE' $PRECONFIG | awk '{ print 10000000 / $3 }'`
# get the frame rate in Hz
set FRAMERATE = `grep -a 'TARGETRATE' $PRECONFIG | awk '{ print 10000000 / $3 }'`

# save original input BPF in case we need e.g. the original KAN tier in output 
set BPFORG = "$BPF"

# check temporary file
set MAU = $TEMP/${PID}${SIGNAL:t:r}
if ( -e ${MAU}.mau ) then 
  echo -n "" >! ${MAU}.mau
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write temporary file ${MAU}.mau - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
else 
  touch ${MAU}.mau
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write temporary file ${MAU}.mau - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  chmod 666 ${MAU}.mau
endif

# check signal input, make format conversions (if necessary),
# determine some important signal features for later

if ( ! -e $SIGNAL ) then 
  echo "ERROR: ${0:t} : cannot find SIGNAL input file $SIGNAL - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif  
# check for zero files: they will cause HVite to loop for ever, so we reject them
sox $SIGNAL -n stat >& $TEMP/${PID}_zeroTest.txt
grep -q 'Maximum amplitude:.*0.000000' $TEMP/${PID}_zeroTest.txt
set sox_err1 = $status
grep -q 'Minimum amplitude:.*0.000000' $TEMP/${PID}_zeroTest.txt
set sox_err2 = $status
if ( $sox_err1 == 0 && $sox_err2 == 0 ) then
  echo "ERROR: ${0:t} : cannot process input signal containing zero signal - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
rm -f $TEMP/${PID}dummy.wav

set SIGNALORG = $SIGNAL
# check/convert input, possible trim
set HTK = $TEMP/${PID}${SIGNAL:t:r}.htk
set signaltype = ${SIGNAL:e}
switch ( $signaltype )
case "nis":
case "NIS":
case "nist":
case "NIST":
case "sph":
case "SPH":
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting NIST SPHERE to RIFF WAVE, mono, 16 bit ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
  sox -t sph ${SIGNAL} -t wav -e signed-integer -b 16 -c 1 $TEMP/${PID}${SIGNAL:r:t}.wav
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot convert NIST SPHERE input signal to RIFF WAVE 16bit PCM using sox - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  
  chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
  set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
  set SIGNALRATE = `soxi -r $SIGNAL`
  breaksw
case "al":
case "AL":
case "dea":
case "DEA":
  set SIGNALRATE = 8000
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting ALAW/8kHz raw into RIFF WAVE ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
  sox -t raw -e a-law -c 1 -r 8000 ${SIGNAL} -t wav -e signed-integer -b 16 -c 1 $TEMP/${PID}${SIGNAL:r:t}.wav
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot convert ALAW input signal to RIFF WAVE 16kHz 16bit PCM using sox - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  
  chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
  set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
  breaksw
case "wav":
case "WAV":
  cp $SIGNAL $TEMP/${PID}${SIGNAL:t}
  set SIGNAL = $TEMP/${PID}${SIGNAL:t}
  set SIGNALRATE = `soxi -r $SIGNAL`
  chmod 666 ${SIGNAL}
  breaksw
case "mp4":
case "mpeg":
case "mpg":
case "avi":
case "flv":
case "MP4":
case "MPEG":
case "MPG":
case "AVI":
case "FLV":
  which ffmpeg >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : ffmpeg is not installed; cannot convert signal file with extension $signaltype - exiting" >> /dev/stderr
    if ( $CLEAN ==1 ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Converting into RIFF WAVE ${SIGNAL} -> $TEMP/${PID}${SIGNAL:r:t}.wav"
  # try to determine the original sample rate of the audiotrack in the video 
  # why do we do this, if we later re-sample to 16000Hz anyway? Because then the resultig 
  # BPF or emuDB files are based on the original sampling rate of the video and not 16000Hz.
  # Video may contain more than one soundtrack or even none; therefore look how many channels 
  # are there and the select the default
  set ffprobeNchannels = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | wc -l`
  if ( "$ffprobeNchannels" == 0 ) then 
    echo "ERROR: ${0:t} : ffprobe detects no soundtrack found in $SIGNAL - exiting"
    if ( $CLEAN ==1 ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  else if ( "$ffprobeNchannels" == 1 ) then
    set VIDEOSAMPLERATE = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | head -n 1 | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
  else
    # multiple sound tracks: select default
    set VIDEOSAMPLERATE = `ffprobe $SIGNAL |& grep 'Stream .* Audio.*(default)' | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
    # check language code in mp4 stream
    set ffprobeLng = `ffprobe $SIGNAL |& grep 'Stream .* Audio.*(default)' | sed 's/^.*(\(...\)): Audio.*$/\1/'`
    if ( "$ffprobeLng" != "" ) then
      set LANGUAGE_3code = `echo "$LANGUAGE" | sed 's/^\(...\).*$/\1/'`
      if ( "$LANGUAGE_3code" != "$ffprobeLng" ) echo "WARNING: ${0:t} : LANGUAGE code (${LANGUAGE}) does not match the language code in the MP4 input soundtrack (${ffprobeLng})" >> /dev/stderr
    endif
  endif
  if ( $VIDEOSAMPLERATE == "" ) then 
    echo "WARNING: ${0:t} : cannot determine audio sample rate from input video; using 16000Hz" >> /dev/stderr
    echo "                  this means that e.g. BPF results are based on 16000Hz sampling rate" >> /dev/stderr
    set VIDEOSAMPLERATE = 16000
  else
    if ( $v > 0 ) echo "DEBUG: ${0:t} : extracting audiotrack with ${VIDEOSAMPLERATE}Hz sampling rate from video input"
  endif
  # we assume that '-ac 1' equals the default soundtrack (until now this always has been true):
  ffmpeg -loglevel quiet -y -i ${SIGNAL} -acodec pcm_s16le -ac 1 -ar $VIDEOSAMPLERATE $TEMP/${PID}${SIGNAL:r:t}.wav
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot convert input signal to RIFF WAVE ${VIDEOSAMPLERATE}Hz 16bit PCM - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  
  chmod 666 $TEMP/${PID}${SIGNAL:r:t}.wav
  set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}.wav
  set SIGNALRATE = `soxi -r $SIGNAL`
  breaksw
default:
  echo "ERROR: ${0:t} : unknown signal type extension ${signaltype} - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
  breaksw
endsw

# check other parameters
set NISTCODING = "`soxi -e $SIGNAL`"
if ( "$NISTCODING" != "Signed Integer PCM" ) then
 if ( "$NISTCODING" == "" ) then 
   echo "WARNING: ${0:t} : cannot determine coding - assuming 'pcm'" >> /dev/stderr
   set NISTCODING = "pcm"
 else
   echo "ERROR: ${0:t} : coding is $NISTCODING - only 'Signed Integer PCM' is allowed - exiting" >> /dev/stderr
   if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
   exit 2
 endif  
endif 
set NISTBITS = `soxi -b $SIGNAL`
if ( $NISTBITS != 16 ) then 
  echo "ERROR: ${0:t} : wrong bit number $NISTBITS per sample; only 16bits allowed - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif  
set NISTCHANNELS = `soxi -c $SIGNAL`
if ( $NISTCHANNELS != 1 ) then 
  echo "ERROR: ${0:t} : number of channels is $NISTCHANNELS - only 1 channel is allowed - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif 
# we need the number of samples in the original signal later for end segment correction!
set SAMPLELENGTH = `soxi -s $SIGNAL`
if ( $SAMPLELENGTH == 0 ) then 
  echo "ERROR: ${0:t} : something is wrong: I detect a zero length signal; maybe a conversion of SIGNAL went wrong - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
endif  

# fork operation modus: if MODUS=standard/align, check/preprocess the BPF input (KANSTR input)
# [note that depending on BPF content in this block the helper MAUSTRN
# might be called (and then the remainder of the maus processing is skipped
# until the output format conversion]
if ( $MODUS == "standard" || $MODUS == "align" ) then 
  
  # option BPF superceeds option KANSTR
  if ( $BPF != "" ) set KANSTR = ""
  
  if ( "$KANSTR" == "" ) then 
    # checking for file
    if ( ! -e $BPF ) then 
      echo "ERROR: ${0:t} : cannot find BPF input file $BPF - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif  
    # check for standard extensions
    if ( ${BPF:e} != "csv" && ${BPF:e} != "CSV" && ${BPF:e} != "par" && ${BPF:e} != "PAR" ) then 
      echo "ERROR: ${0:t} : BPF input file $BPF has no standard extensions par|PAR|csv|CSV - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    # if the input BPF has extension csv|CSV, we assume that it is a two-column, ';'-separated spreadsheet
    # table with one word per line, orthography in the 1st and canonical pronunciation in the 2nd column.
    # We check the table, transform it into a BPF, in case OUTFORMAT is set to par|maus-append we write
    # the BPF using the basename and location of the CSV file (write permissions must be set!), and then 
    # proceed for the rest of the script with this BPF file as input. 
    if ( ${BPF:e} == "csv" || ${BPF:e} == "CSV" ) then 
      if ( $v > 0 ) echo "DEBUG: ${0:t} : Detected CSV input - transforming into temporary BPF"
      # check for limiter symbol CSVLIMITER and proper table form
      set numcsvlines = `cat $BPF | wc -l`
      set numcsvlimiters = `grep -E "^[^;]+${CSVLIMITER}"'[^;]+$' $BPF | wc -l`
      if ( $numcsvlines != $numcsvlimiters ) then 
        echo "ERROR: ${0:t} : CSV input file $BPF does not contain well-formed table - exiting" >> /dev/stderr
        echo "       One line per word: <orthography>${CSVLIMITER}<pronunciation>" >> /dev/stderr
        echo "       <orthography> = UTF-8 encoded orthography (no ${CSVLIMITER} !)" >> /dev/stderr
        echo "       <pronunciation> = SAMPA/IPA encoded canonical pronunciation" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # create output or temporary BPF (we replace the sample rate later)
      set CSVBPF = $TEMP/${PID}_CSVBPF.par
      cat << END >! ${CSVBPF}
LHD: Partitur 1.3
SAM: ##SAMPLERATE##
NCH: 1
LBD:
END
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot create temporary output BPF file ${CSVBPF} - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # transform CSV table into ORT and KAN tier 
      awk 'BEGIN{FS=";";idxcnt=0}{printf("ORT:\t%d\t%s\n",idxcnt,$1);idxcnt++}' $BPF | tr -d '\r' >> ${CSVBPF}
      awk 'BEGIN{FS=";";idxcnt=0}{printf("KAN:\t%d\t%s\n",idxcnt,$2);idxcnt++}' $BPF | tr -d '\r' >> ${CSVBPF}
      if ( $OUTFORMAT == "par" ) then
        mv ${CSVBPF} ${BPF:r}.par
        if ( $status != 0 ) then 
          echo "ERROR: ${0:t} : cannot create output BPF file ${BPF:r}.par - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        set BPF = ${BPF:r}.par
        chmod 666 ${BPF:r}.par
      else
        set BPF = ${CSVBPF}
        chmod 666 ${BPF}
      endif
    endif
    # check if output file is writable
    if ( $OUTFORMAT == "par" ) then
      if ( ! -w $BPF ) then 
        echo "ERROR: ${0:t} : option OUTFORMAT=mau-append|par|bpf but cannot write to BPF input file - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif  
    endif
    # checking for KAN tier
    grep -a '^KAN:' $BPF >& /dev/null
    if ( $status != 0 ) then
      grep -a '^ORT:' $BPF >& /dev/null
      if ( $status != 0 ) then
        echo "ERROR: ${0:t} : partitur file $BPF contains no KAN nor ORT tier - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif 
    else
      # checking for singular silence models in KAN tier, which are not allowed because
      # if a word consists only of a silence model we get problem with word indices
      # (from version 3.33 all languages model '<p>' as non-optional silence model; 
      # only '<p:>', '#' and '&' are optional silence models).
      grep -a '^KAN:' $BPF | awk '{print $3}' | grep -q '^#$'
      if ( $status == 0 ) then
        echo "ERROR: ${0:t} : partitur file $BPF contains 'optional silence word' '#' in the KAN tier - exiting" >> /dev/stderr
        echo "       Optional silence model is a model that can be skipped; if you need to model a word as silence, use the '<p>' model, e.g. 'KAN: 8 <p>'" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
      # roughly checking if KAN tier is of the right format
      if ( `grep '^KAN:' $BPF | awk 'BEGIN{err="ok"}{if(NF<3)err="error"}END{print err}'` == "error" ) then 
        echo "ERROR: ${0:t} : partitur file $BPF contains non-valid KAN lines with less than 3 columns - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
    endif  
    # checking for optional TRN tier(s), call maus.trn or determine offset and duration of pre-segmentation
    if ( $USETRN == "TRUE" ) then 
      set trnanz = `grep -a '^TRN:' $BPF | wc -l`
      switch ( $trnanz )
      case 0:
        echo "WARNING: ${0:t} : partitur file $BPF contains no TRN tier although you set option USETRN=true" >> /dev/stderr
        echo "         will ignore option USETRN" >> /dev/stderr
        set USETRN = "FALSE"
        breaksw
      case 1:
        # For backward compatibility (and to be compatible to WAV2TRN output!) we check
        # whether this single TRN entry carries only the time information start and length as in WAV2TRN output, or a 
        # proper TRN tier entry consisting of start, length and list of word links, e.g. 'TRN: 0 1999 0,1,2,3,4 ...'
        # In the first case we do the normal MAUS segmentation but within the given segment,
        # in the latter case we pass the whole thing to maus.trn and hope for the best.
        set lnklist = `grep -a '^TRN:' $BPF | head -n 1 | awk '{if($4 ~ /^[0-9][0-9]*,*[0-9,]*$/){print 1}else{print 0} }'`
        if ( "$lnklist" == 0 ) then 
          # no proper TRN: determine offset and duration of utterance within the recording
          # in original samples (SIGNALRATE) and continue with normal MAUS procedure
          set TRNOFFSET = `grep -a '^TRN:' $BPF | head -n 1 | awk '{print $2}'`
          #################### DEBUG ########################
          #if ( $TRNOFFSET == 978496 ) sleep 20
          set TRNDUR = `grep -a '^TRN:' $BPF | head -n 1 | awk '{print $3}'`
          # the helper WAV2TRN delivers the duration *including' the start sample, but the following
          # trim operation expects duration *exclusive* the first sample; therefore we substract
          # one sample from TRNDUR
          @ TRNDUR --
          breaksw
        endif
        # seems to be a proper single TRN: continue with 'default'
      default:
        # at least one proper TRN entry -> recursive batch processing of chunk segmentation
        # the following WARNING became anoying - and it is not very helful anyway
        #echo "WARNING: ${0:t} : $trnanz TRN entries found in BPF - calling chunk segmentation" >> /dev/stderr
        if( $v > 0 ) echo "DEBUG: ${0:t} : $trnanz TRN entries found in BPF - calling chunk segmentation"
        if ( ! -x $MAUSTRN ) then 
          echo "ERROR: ${0:t} : cannot execute helper $MAUSTRN - exiting" >> /dev/stderr
  	if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  	exit 5
        endif
        if ( $v > 0 ) echo "DEBUG: ${0:t} : $MAUSTRN BPFTHRESHOLD=$BPFTHRESHOLD RELAXMINDUR=$RELAXMINDUR $mausoptions OUT=${MAU}.mau SIGNAL=$SIGNAL"
        $MAUSTRN BPFTHRESHOLD=$BPFTHRESHOLD RELAXMINDUR=$RELAXMINDUR $mausoptions OUT="${MAU}.mau" SIGNAL="$SIGNAL"
        set maus_trn_err = $status
        if ( $maus_trn_err != 0 ) then 
          echo "ERROR: ${0:t} : helper $MAUSTRN failed - exiting" >> /dev/stderr
          if ( $maus_trn_err == 5 ) then
            echo "       Chunk segmentation with EMU output requires synchronized TRN and KAN tier." >> /dev/stderr
            echo "       Check if the TRN tier in your input BPF covers the complete KAN tier." >> /dev/stderr
          endif
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 5
        endif	
        # result of MAUSTRN should now be a MAU tier in ${MAU}.mau
        goto out_format_conversion
        breaksw
      endsw
    endif  
    # checking for BPF input for Emu output, KAN might be created later
    if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" || $OUTFORMAT == "emuR" ) then
      grep -a '^ORT:' $BPF >& /dev/null
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : input BPF does not contain ORT tier for emu output" >> /dev/stderr
        echo "       Either use another OUTFORMAT than emu|EMU|legacyEMU|emuR|emuDB or correct input BPF" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif  
      grep -a '^KAN:' $BPF >& /dev/null
      if ( $status != 0 ) then 
        if ( $PARAM != $SOURCE/PARAM ) then 
          echo "ERROR: ${0:t} : input BPF does not contain KAN tier for emu output and language is not German" >> /dev/stderr
          echo "       Either use another OUTFORMAT than emu|EMU|legacyEMU|emuR|emuDB or provide a KAN tier or select LANGAUGE=deu" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif	
      endif  
    endif
  endif
  # If the phonological form is given on the commandline in option KANSTR, create
  # a pseudo BPF to process
  if ( "$KANSTR" != "" ) then
    # switch USETRN off since we cannot read from a BPF
    if ( $USETRN == "TRUE" ) set USETRN = "FALSE"
    # if the input canonical string is given from command line, produce a 
    # pseudo BPF file containing only the KAN tier and proceed
    set BPF = $TEMP/${PID}${SIGNALORG:t:r}.par
    touch $BPF
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot create intermediate BPF file $BPF - exiting" >> /dev/stderr
      exit 1
    endif  
    set linkcount = 0
    printf "KAN:\t%d\t" $linkcount >> $BPF
    foreach phon ( $KANSTR ) 
      if ( ${phon} == "#" ) then 
        @ linkcount ++
        printf "\nKAN:\t%d\t" $linkcount >> $BPF
      else  
        printf "%s" $phon >> $BPF
      endif  
    end  
    printf "\n" >> $BPF
  endif  
  # Pre-validation of input
  # check if a KAN tier is present in BPF
  grep -a '^KAN:' $BPF >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : input BPF $BPF does not contain a KAN tier - exiting >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  
  # In the normal case simply count KAN lines in input BPF
  set numKANLines = `grep -a '^KAN:' $BPF | wc -l`
  # If USETRN==TRUE and a single TRN: line in input, count the number of words in the trn chunk
  if ( $USETRN == "TRUE" ) then 
    set numKANLines = `grep '^TRN:' $BPF | head -n 1 | awk '{print $4}' | tr ',' '\n' | wc -l`
  endif
  if ( $numKANLines > $BPFTHRESHOLD ) then 
    echo "ERROR: ${0:t} : number of words $numKANLines in BPF (or TRN chunk) exceeds threshold BPFTHRESHOLD = $BPFTHRESHOLD"  >> /dev/stderr
    echo "       maus cannot process effectively such large files; consider breaking the file up into" >> /dev/stderr
    echo "       smaller parts or use an automatic chunking service (Chunker) before MAUS - exiting" >> /dev/stderr
    echo "       PS: or maybe you just forgot to set the option USETRN=true ?" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 2
  endif
  # If option INSYMBOL was set to IPA, we map the IPA symbols in $BPF to 
  # corresponding MAUS SAMPA (1st column) symbols as defined in the mapping table 
  # $IPATABLE; remember that IPA input requires blank-separated symbols in the KAN tier
  # non-IPA symbols (UTF-8!) in the KAN tier of $BPF will cause an error
  if ( $INSYMBOL == "ipa" ) then
    touch $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    if ( $status != 0 ) then
      echo "ERROR: ${0:t} : cannot create intermediate BPF file $TEMP/${PID}${SIGNALORG:t:r}.ipa.par - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    chmod 666 $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    grep -v '^KAN:' $BPF | tr -d '\r' >! $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    awk -v IPATABLE=$IPATABLE 'BEGIN {err=""; while(getline < IPATABLE > 0){it[$3]=$1}} /^KAN:/ {i=3; printf("KAN:\t%d\t",$2); while($i!="") { if(it[$i]==""){err=$i}else{printf("%s ",it[$i])} i++} printf("\n")}END{if(err!=""){printf("ERROR: maus : unknown ipa symbol %s in input KAN tier - exiting\n",err,$0)>>"/dev/stderr";exit(1)}}' $BPF | tr -d '\r' >> $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
    if ( $status != 0 ) then 
      #echo "ERROR: ${0:t} : at least one unknown ipa symbol found while mapping input KAN tier encoded in IPA to SAMPA - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    set BPF = $TEMP/${PID}${SIGNALORG:t:r}.ipa.par
  endif
  
  # Finished Processing of phonological Input
  # By now $BPF must contain a valid BPF with a valid KAN tier
  # encoded as SAMPA or blank separated SAMPA 
  
  # Create a MLF file containing the chain of canonical phonemes as listed
  # in the KAN tier of the input partitur file. If unknown phonemes
  # are found, the script terminates with an error message. 
  # Accent, composita and function word markers ("'#+) are deleted; 
  # Word are separated by the word boundary symbol '#'
  # The chain starts with a '<' denoting silence and end with a '>' denoting
  # final silence
  # The phoneme inventory KANINVENTAR defines all possible German SAM-PA
  # phonemes that can be handled by this step including the silence 
  # symbols '<p>' '<p:>' '<', '>', '#' and '&'
  # < : initial silence
  # > : final silence
  # #,<P:> : optional silence, automatically modelled  between words (can be of zero length; then no silence is assumed)
  # & : arbitray word separator (a word separator that is caused by a rule
  #     that 'glues' two words together. For example the words 'ab Frankfurt'
  #     /ap#fra:nkfu:6t/  ->  /a&fra:nkfu:6t/; treated as '#'.
  # <p> : explicitely modelled silence (cannot be of zero length)
  # This step also does any mappings from KANINVENTAR to GRAPHINVENTAR which 
  # defines the symbols used in the rule set, in the graph and on the left 
  # side of the dictionary.
  set MLF = $TEMP/${PID}${SIGNALORG:t:r}.mlf
  # create MLF (linear pronunciation) as input to graph generator
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating MLF file $MLF from BPF file $BPF"
  if ( $NOINITIALFINALSILENCE == "TRUE" ) echo "Suppressing initial/trailing optional silence modelling (NOINITIALFINALSILENCE=true)"
  echo '#\!MLF\!#' >! $MLF
  chmod 666 $MLF
  echo '"*/'${MLF:t:r}'.lab"' >> $MLF
  cat $BPF | tr -d '\r' | awk -v INVENTAR=$KANINVENTAR -v STARTWORD=$STARTWORD -v ENDWORD=$ENDWORD -v NOINITIALFINALSILENCE=$NOINITIALFINALSILENCE -f $SOURCE/kan2mlf.awk >> $MLF
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : something went wrong while reading the BPF input, probably" >> /dev/stderr
    echo "       it contains a symbol that is not defined for this language - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif  
  
endif
# end fork operation modus standard/align


# do signal pre-processing
# (note that at this the maus call is processing a single turn only;
# i.e. we either had just a single turn from the beginning, or we are now
# in one of several maus calls initiated by MAUSTRN in the previous section.
# Thus, TRNOFFSET etc. are either not set, or to a single turn read from the input BPF 
# (USETRN=true), or to one of many turns set by MAUSTRN.)

# handle online pre-segmentation
# The input signal (total) or the cut-out signal of a turn (MAUSTRN) are
# pre-segmented.
if ( $USETRN == "force" ) then
  set PRESEG = "TRUE"
  set USETRN = "FALSE"
  echo "WARNING ${0:t} : option value USETRN=force has been deprecated with version 4.11; use option PRESEG=true instead" >> /dev/stderr
endif
if ( $PRESEG == "TRUE" ) then
  which $WAV2TRN >& /dev/null   
  if ( $status != 0 ) then 
    echo "WARNING: ${0:t} : option PRESEG=true but no helper $WAV2TRN found to perform the pre-segmentation" >> /dev/stderr
    echo "         ignoring option PRESEG=true" >> /dev/stderr
    set PRESEG = "FALSE"
  else
    # There are two cases possible here:
    # 1. Original maus call with USETRN=false => we perform a WAV2TRN to the entire input signal,
    #    set TRNOFFSET and TRNDUR and USETRN=true, so that the signal is later cut accordingly.
    # 2. Original maus call with USETRN=true and the input BPF contained one or multiple TRN entries
    #    => TRNOFFSET and TRNDUR are already set here (to a single TRN entry or one of the multiple 
    #    TRN entries (called by MAUSTRN!); in both cases we perform a WAV2TRN on the signal defined
    #    by TRNOFFSET and TRNDUR, and correct TRNOFFSET and TRNDUR accordingly; then we set 
    #    USETRN=true and let the signal processimng cut out the signal later.
    if ( "$USETRN" == "FALSE" ) then 
      # case 1
      set TRNOUT = `$WAV2TRN wav=$SIGNAL`
      if ( $status != 0 ) then 
        echo "WARNING: ${0:t} : helper call $WAV2TRN (1) failed - proceed without online pre-segmentation" >> /dev/stderr
      else	
        set TRNOFFSET = `echo "$TRNOUT" | awk '{ print $1 }'`       
        set TRNDUR = `echo "$TRNOUT" | awk '{ print $2 }'` 
        # the helper WAV2TRN delivers the duration *including' the start sample, but the following 
        # trim operation expects duration *exclusive* the first sample; therefore we substract 
        # one sample from TRNDUR
        @ TRNDUR --
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Pre-segmentation (1: PRESEG=true) using $WAV2TRN : offset = $TRNOFFSET , duration = $TRNDUR"
        set USETRN = "TRUE"
      endif
    else
      # case 2
      # first trim the SIGNAL according to TRNOFFSET and TRNDUR into a temporary file as input for WAV2TRN
      sox $SIGNAL ${SIGNAL:r}_trim_tmp.wav trim ${TRNOFFSET}"s" ${TRNDUR}"s"
      chmod 666 ${SIGNAL:r}_trim_tmp.wav
      # then make the pre-segmentation and adjust offset and duration
      set TRNOUTTMP = `$WAV2TRN wav=${SIGNAL:r}_trim_tmp.wav`
      if ( $status != 0 ) then 
        echo "WARNING: ${0:t} : helper call $WAV2TRN (2) failed - proceed without online pre-segmentation" >> /dev/stderr
      else	
        set TRNOFFSETTMP = `echo "$TRNOUTTMP" | awk '{ print $1 }'`       
        set TRNDURTMP = `echo "$TRNOUTTMP" | awk '{ print $2 }'`
        # the helper WAV2TRN delivers the duration *including' the start sample, but the following 
        # trim operation expects duration *exclusive* the first sample; therefore we substract 
        # one sample from TRNDURTMP
        @ TRNDURTMP --
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Pre-segmentation (2: PRESEG=true) using $WAV2TRN : offset = $TRNOFFSETTMP , duration = $TRNDURTMP"
        # re-calculate TRNOFFSET and TRNDUR with the results of WAV2TRN
        @ TRNOFFSET = $TRNOFFSET + $TRNOFFSETTMP
        set TRNDUR = $TRNDURTMP
        set USETRN = "TRUE"
      endif
    endif
  endif
endif  
# if USETRN is set, cut out the relevant signal 
# (signal is still sampled in SIGNALRATE to be synchrone to BPF!)
# save offset and length of final cut off; we assume here that all our 
# signals have 16 bits per sample (has been tested)
if ( $USETRN == "TRUE" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Offset $TRNOFFSET, length $TRNDUR samples (option USETRN)"
  set totalsam = `/usr/bin/soxi -s $SIGNAL`
  set TRNFINAL = $totalsam
  @ TRNFINAL -= $TRNOFFSET
  @ TRNFINAL -= $TRNDUR
  if ( $v > 0 ) echo "DEBUG: ${0:t} :    Cut off final segment is $TRNFINAL samples long (option USETRN)"
  if ( $TRNFINAL < 0 ) then 
    if ( $v > 0 ) echo "DEBUG: ${0:t} : TRN segment exceeds the signal length; check the last TRN tier entry in BPF input"
    echo "ERROR: ${0:t} : faulty information in TRN tier: segment exceeds signal length by $TRNFINAL samples; check the last TRN tier entry in BPF input  - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) then 
      rm -rf $TEMP/${PID}* >& /dev/null
    endif  
    exit 1
  endif  
  if ( $v > 0 ) echo "DEBUG: ${0:t} :    $SIGNAL -> ${SIGNAL:r}_trim.wav"
  sox $SIGNAL ${SIGNAL:r}_trim.wav trim ${TRNOFFSET}"s" ${TRNDUR}"s"
  chmod 666 ${SIGNAL:r}_trim.wav
  set SIGNAL = ${SIGNAL:r}_trim.wav
endif
# check if re-sampling is necessary
if ( $SIGNALRATE != $MODELRATE ) then 
 if ( $SIGNALRATE < $MODELRATE ) then 
  echo "WARNING: ${0:t} : sample rate of signal $SIGNALRATE is less than $MODELRATE" >> /dev/stderr
  echo "         This will lead to very bad segmentation results! Use at least 16000Hz sampling rate!" >> /dev/stderr
 endif
 if ( $allowresamp == "TRUE" ) then 
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : input sampling rate $SIGNALRATE does not match model rate $MODELRATE" 
    echo "         re-sampling using sox standard resampling method" >> /dev/stderr
    echo "         ${SIGNAL} -> ${SIGNAL:r}_resamp.wav" >> /dev/stderr
  endif  
  # the following sox syntax does a rate conversion without dithering.
  # this works also for sox version 14.3 and higher if SOX_OPTS is set to '-D'
  # (-D is being ignored by lower sox versions, but not on the commandline!)
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE"
    sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE
    set err = $status
  else
    sox ${SIGNAL} ${SIGNAL:r}_resamp.wav rate -s -a $MODELRATE >& /dev/null
    set err = $status
  endif
  if ( $err != 0 ) then 
    echo "ERROR: ${0:t} : resampling using sox failed - exiting" >> /dev/stderr
    echo "       set debug option v > 0 to view error messages of sox" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif
  chmod 666 ${SIGNAL:r}_resamp.wav
  set SIGNAL = ${SIGNAL:r}_resamp.wav
 else
  echo "ERROR: ${0:t} : input sampling rate $SIGNALRATE does not match model rate $MODELRATE" >> /dev/stderr
  echo "       use option 'allowresamp=yes' for automatic re-sampling" >> /dev/stderr
  echo "       exiting"
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 2
 endif 
else
 if ( $v > 0 ) echo "DEBUG: ${0:t} : Sample rate of input is equal model sample rate - ok"
endif  
# from now on we assume that the signal has MODELRATE
if ( $v > 1 ) echo "DEBUG: ${0:t} : Original sample rate: $SIGNALRATE, processing sampling rate: $MODELRATE"
if ( $v > 1 ) echo "DEBUG: ${0:t} :     coding: $NISTCODING, bits: $NISTBITS"
if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating HTK file $HTK"
HCopy -T $TRACE -C $PRECONFIG $SIGNAL $HTK
if ( $status != 0 ) then 
  echo "ERROR: ${0:t} : HCopy crashed with error code <> 0 - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
chmod 666 $HTK

# fork operation modus
if ( $MODUS == 'standard' || $MODUS == "align" ) then 
  
  # write original sampling rate of input signal into input BPF (only necessary 
  # for CSV input)
  sed "s/##SAMPLERATE##/${SIGNALRATE}/" $BPF | tr -d '\r' >! $TEMP/${PID}_CSVBPFTMP.par
  mv $TEMP/${PID}_CSVBPFTMP.par $BPF
  # Create the graph from the MLF into a SLF 
  # If you use a phonological based rule set (e.g. regeln9.nrul) without
  # statistics, you need the option 'wwt=man' to word_var-2.0
  # The rule set must contain the same symbols as in in GRAPHINVENTAR
  if ( ! -e $MLF ) then 
    echo "ERROR: ${0:t} : something went terrible wrong: the MLF I just created vanished" >> /dev/stderr
    echo "       exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif  
  # If we only align to the input string of phonetic SAM-PA symbols,
  # we don't need a SLF file and skip the following
  if ( $MODUS == "standard" ) then 
    set SLF = ${MLF:r}.slf
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating SLF file $SLF"
    if ( $v > 0 ) echo "DEBUG: ${0:t} :   using rule set $RULESET"
    # remove potential BOM and change line terminators to NL
    touch "$RULESET" >& /dev/null
    if ( $status != 0 ) then 
      # RULESET is not writable; make copy and then dos2unix the copy
      cp "$RULESET" $TEMP/${PID}_RULESET.${RULESET:e}
      set RULESET = $TEMP/${PID}_RULESET.${RULESET:e}
      chmod 666 "$RULESET"
    endif
    dos2unix -q "$RULESET"
    if ( ${RULESET:e} == "rul" ) then 
      if ( $v > 1 ) then 
        echo "DEBUG: ${0:t} : $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg=$RULESET jwk=-1" 
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 
        set err = $status
      else
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR \
          rg="$RULESET" jwk=-1 >& /dev/null
        set err = $status
      endif
      if ( $err != 0 ) then 
        echo "ERROR: ${0:t} : word_var crashed with error code <> 0 - exiting" >> /dev/stderr
        echo "       try running with option v=2 to view error messages" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif  
    else if ( "${RULESET:e}" == "nrul" ) then    
      if ( $v > 1 ) then 
        echo "DEBUG: ${0:t} : $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg=$RULESET jwk=-1 wwt=man" 
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 wwt=man 
        set err = $status
      else
        $SOURCE/word_var-2.0 if=$MLF odir=$TEMP iv=$GRAPHINVENTAR rg="$RULESET" jwk=-1 wwt=man >& /dev/null
        set err = $status
      endif
      if ( $err != 0 ) then 
        echo "ERROR: ${0:t} : word_var crashed with error code <> 0 - exiting" >> /dev/stderr
        echo "       try running with option v=2 to view error messages" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 5
      endif  
    else
      echo "ERROR: ${0:t} : unknown extension in rule set name : ${RULESET:e}" >> /dev/stderr
      exit 1
    endif  
    chmod 666 $SLF
    if ( $v > 2 ) then
      $SOURCE/ShowLattice $SLF &
    endif
    if ( $OUTSLF != "" ) cp -f $SLF $OUTSLF
  
    # Align the signal file HTK to the SLF
    if ( $v > 0 ) then 
      echo "DEBUG: ${0:t} : Aligning SLF file to HTK file"
      HVite -A -C $HVITECONF -w -X slf -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
      $DICT $HMMINVENTAR $HTK
    else  
      HVite -C $HVITECONF -w -X slf -H $MMF -s $WEIGHT -p $INSPROB  \
      $DICT $HMMINVENTAR $HTK >& /dev/null
    endif
    if ( $status != 0 ) then 
      # theoretically since we do not use any pruning, HVite should always produce a
      # result; however, ifthe signal and network are totally mis-matched HVite 
      # reports 'no tokens survived to final node' and exits with non-zero
      echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
      echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 5
    endif  
  else  
    # If MODUS=align we simply make a forced alignment (no SLF available)
    # Align the signal file HTK to the MLF
    if ( $v > 0 ) then 
      echo "DEBUG: ${0:t} : Aligning MLF file to HTK file"
      HVite -A -C $HVITECONF -a -X lab -I $MLF -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
      $DICT $HMMINVENTAR $HTK
    else  
      HVite -C $HVITECONF -a -X lab -I $MLF -H $MMF -s $WEIGHT -p $INSPROB  \
      $DICT $HMMINVENTAR $HTK >& /dev/null
    endif
    if ( $status != 0 ) then 
      # theoretically since we do not use any pruning, HVite should always produce a
      # result; however, ifthe signal and network are totally mis-matched HVite 
      # reports 'no tokens survived to final node' and exits with non-zero
      echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
      echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 5
    endif  
  
  endif # operation modus = 'standard/align'

# fork operation modus 'bigram'
else
  
  # run a free HTK recognition cycle constrained by a phone bigram on SIGNAL
  if ( $v > 0 ) then 
    echo "DEBUG: ${0:t} : Run phone recognition on HTK file"
    HVite -A -C $HVITECONF -w "$LATBIGRAM" -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
    $DICTBIGRAM $HMMINVENTAR $HTK
  else  
    HVite -C $HVITECONF -w "$LATBIGRAM" -H $MMF -s $WEIGHT -p $INSPROB -T $TRACE \
    $DICTBIGRAM $HMMINVENTAR $HTK
  endif
  if ( $status != 0 ) then 
    # theoretically since we do not use any pruning, HVite should always produce a
    # result; however, if the signal and network are totally mis-matched HVite 
    # reports 'no tokens survived to final node' and exits with non-zero
    echo "ERROR: ${0:t} : HVite returns error code $status" >> /dev/stderr
    echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif  

endif
# end fork operation modus

# Check on results
if ( ! -e ${HTK:r}.rec || -z ${HTK:r}.rec ) then
  echo "ERROR: ${0:t} : HVite returns ok but no ${HTK:r}.rec found or is zero length" >> /dev/stderr
  echo "       Try to run the command with 'v=1' to check the errors" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 5
endif  
if ( -z ${HTK:r}.rec ) then
  echo "WARNING: ${0:t} : HVite returns ok but ${HTK:r}.rec is zero length" >> /dev/stderr
  echo "         This means that no segment was found and maus will produce a zero length pause segment at output" >> /dev/stderr
  echo "         The reason for this is probably a very unlikely pronunciation hypothesis, or a very short or noisy signal" >> /dev/stderr
endif  
chmod 666 ${HTK:r}.rec

# Producing output

if ( $v > 0 ) echo "DEBUG: ${0:t} : Producing temporary mau output based on sample rate $SIGNALRATE in ${MAU}.mau"
if ( $v > 0 && $MAUSSHIFT != 0 ) echo "DEBUG: ${0:t} : Boundaries are increased by $MAUSSHIFT msec (option MAUSSHIFT)"
if ( $MINPAUSLEN < 1 ) set MINPAUSLEN = 1
if ( $v > 0 && $MINPAUSLEN > 1 ) then 
  echo "DEBUG: ${0:t} : Inter-word pauses smaller than $MINPAUSLEN frames are spread to"
  echo "DEBUG: ${0:t} :   adjacent segments; except if the preceeding or following segment is a plosive"
  echo "DEBUG: ${0:t} :   in which case the pause is completely added to that plosive (option MINPAUSLEN)"
endif  
# Produce a MAU tier in the TEMP area based on SIGNALRATE
# Extract the MAU tier from the HTK *.rec file
# The HMM '#' is designed in a way that it
# has a minimum duration of zero frames. Since the graph contains a '#'
# between each word, we can detect '#' segments that are less than 
# MINPAUSLEN 
# frames long and delete them, because they are not a 'real' pause between 
# words. The silence is equally spread to the adjacent phonemes.
# However, if the initial phoneme of the following words happens to be 
# a plosive, we distribute the deleted silence interval totally to the plosiv.
if ( ! -f $PARAM/PLOSIVES ) then 
  echo "ERROR: ${0:t} : cannot find list of plosives $PARAM/PLOSIVES - exiting" >> /dev/stderr
  if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
  exit 1
endif  
cat ${HTK:r}.rec | awk -v MINPAUSLEN=$MINPAUSLEN \
	-v MODELRATE=$MODELRATE -v FRAMERATE=$FRAMERATE \
	-v SIGNALRATE=$SIGNALRATE -v PLOSIVES=$PARAM/PLOSIVES \
  	-v STARTWORD=$STARTWORD -v MAUSSHIFT=$MAUSSHIFT \
	-f $SOURCE/rec2mau.awk >! ${MAU}.mau
chmod 666 ${MAU}.mau	


# if we have used USETRN and have cut out the signal, the mau file contains 
# now segments that are shifted by TRNOFFSET and the total length of the 
# recording is by TRNFINAL too small. Here we correct this
if ( $USETRN == "TRUE" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Shifting and expanding mau results option (USETRN)"
  cat ${MAU}.mau | awk -v TRNOFFSET=$TRNOFFSET -f $SOURCE/correctusetrn.awk | awk -v TRNFINAL=$TRNFINAL -f $SOURCE/finalusetrn.awk >! ${MAU}.mau.tmp
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif

# The HTK recognizer does not deliver a last segment matching the exact length of the 
# input signal (i.e. the last segment end before the end of the signal). Since some 
# tools (e.g. praat scripts) are sensitive against this mismatch, we get the exact 
# sample size of the input signal file and match the last segment in the MAU tier 
# against this number.
# Get the exact length (in samples) of the signal file, and take care that
# the last segment in the MAUS tier matches this length.

# correct the last line in temporary MAU so that the segment covers until the last sample
set lastmaubegin = `tail -n 1 ${MAU}.mau | awk '{print $2}'` 
set lastmaudur = `tail -n 1 ${MAU}.mau | awk '{print $3}'` 
#set lastmaulnk = `tail -n 1 ${MAU}.mau | awk '{print $4}'` 
#set lastmaulabel = `tail -n 1 ${MAU}.mau | awk '{print $5}'` 
@ lastmausample = $lastmaubegin + $lastmaudur
@ lastmausample ++
if ($lastmausample != $SAMPLELENGTH ) then
  if ( $v > 0 ) echo "DEBUG: ${0:t} : last segment does not match input signal length - correcting"
  @ corlastmaudur = $SAMPLELENGTH - $lastmaubegin
  @ corlastmaudur --
  if ( $corlastmaudur < 1 ) then 
    echo "WARNING: ${0:t} : correction of last segment is larger than last segment - doing nothing" >> /dev/stderr
  else
    head --lines=-1 ${MAU}.mau >! ${MAU}.mau.tmp
    # the following weird way to correct the last MAU line is just because in 
    # cases, where the label starts with a '{' we get an error when assigning
    # it to the variable $lastmaulabel; therefore we leave the line as it is 
    # and just replace the duration value
    #printf "MAU:\t%d\t%d\t%d\t%s\n" "$lastmaubegin" "$corlastmaudur" "$lastmaulnk" "$lastmaulabel" >> ${MAU}.mau.tmp
    tail -n 1 ${MAU}.mau | sed "s%	${lastmaudur}	%	${corlastmaudur}	%" >> ${MAU}.mau.tmp
    mv -f ${MAU}.mau.tmp ${MAU}.mau
    chmod 666 ${MAU}.mau
  endif 
endif
# if we requested symbols other than (default) SAMPA symbols we replace them here
if ( $OUTSYMBOL != "sampa" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Coding phonetic symbols in output into $OUTSYMBOL"
  switch ( "$OUTSYMBOL" )
    case "ipa":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){it[$1]=$3}} /^MAU:/ {printf("MAU:\t%d\t%d\t%d\t%s\n",$2,$3,$4,it[$5]);if(it[$5]=="")printf("WARNING: maus : no mapping to OTSYMBOL 'ipa' found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
    case "manner":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){it[$1]=$7}} /^MAU:/ {printf("MAU:\t%d\t%d\t%d\t%s\n",$2,$3,$4,it[$5]);if(it[$5]=="")printf("WARNING: maus : no mapping to OUTSYMBOL 'manner' found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
    case "place":
      gawk -v IPATABLE=$IPATABLE 'BEGIN {FS="\t"; while(getline < IPATABLE > 0){it[$1]=$8}} /^MAU:/ {printf("MAU:\t%d\t%d\t%d\t%s\n",$2,$3,$4,it[$5]);if(it[$5]=="")printf("WARNING: maus : no mapping to OUTSYMBOL 'place' found for SAMPA /%s/ - output will contain empty labels!\n",$5)>>"/dev/stderr"}' ${MAU}.mau >! ${MAU}.mau.tmp
      breaksw
  endsw
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif

if ( $MODUS == "bigram" ) then 
  # replace the '0' word link by '-1' so that following file format converters work 
  # for the special case that the MAU tier has no links to words"
  awk '/^MAU:/{if($4 != "-1") printf("MAU:\t%d\t%d\t-1\t%s\n",$2,$3,$5); else print $0}' ${MAU}.mau >! ${MAU}.mau.tmp
  mv -f ${MAU}.mau.tmp ${MAU}.mau
  chmod 666 ${MAU}.mau
endif

# Then decide what to do with the output:
out_format_conversion:

set MAO = `echo "VERSION=$VERSION $mausoptions" | tr -d '"' | sed 's%[:,]%_%g' | sed 's% %,%g'`
if ( $v > 0 ) echo "DEBUG: ${0:t} : Options string (MAO): $MAO"

# TextGrid
if ( $OUTFORMAT == "TextGrid" ) then 
  # If INSORTTEXTGRID is set and input is read from a BPF, we try to create
  # an additional word tier in the output TextGrid that is synchronized 
  # to the maus output. If input is read from command line the option is ignored
  # Extract a praat compatible TextGrid file from the MAU tier (and input BPF)
  # Note that there is no way to store the MAUS option string (MAO) in TextGrid.
  if ( $OUT == "" ) set OUT = ${SIGNALORG:r}.TextGrid
  touch $OUT >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
  # the helper PAR2TEXTGRID expects a full BPF as input
  set TEMPORARYPAR = $TEMP/${PID}_${SIGNAL:t:r}.par
  touch $TEMPORARYPAR >& /dev/null 
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot create temporary file $TEMPORARYPAR - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  chmod 666 $TEMPORARYPAR
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating temporary BPF for TextGrid conversion: $TEMPORARYPAR"
  echo "SAM: $SIGNALRATE" >! $TEMPORARYPAR
  if ( $MODUS != 'bigram' ) then 
    #grep '^ORT:' $BPF | tr -d '\r' >> $TEMPORARYPAR
    #grep '^KAN:' $BPF | tr -d '\r' >> $TEMPORARYPAR
    grep -v '^MAU:' $BPF | grep -v '^MAO:' | grep -v '^SAM:' | tr -d '\r' >> $TEMPORARYPAR
  endif
  cat ${MAU}.mau >> $TEMPORARYPAR
  # call helper
  $PAR2TEXTGRID v=$v BPF=$TEMPORARYPAR OUT=$OUT INSORTTEXTGRID=$INSORTTEXTGRID INSKANTEXTGRID=$INSKANTEXTGRID
  if ( $status != 0 ) then
    echo "ERROR: ${0:t} : helper PAR2TEXTGRID reports error - exiting"
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 5
  endif


# legacy EMU output
else if ( $OUTFORMAT == "emu" || $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then   

  if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into legacy Emu files *.hlb and *.phonetic"
  # select location of output dir
  if ( $OUT != "" ) then 
    # take from OUT file
    echo "WARNING: ${0:t} : option OUT=$OUT provided together with Emu output (OUTFORMAT=emu|EMU|legacyEMU)" >> /dev/stderr
    echo "         Emu result files are named as the signal file and written into the location ${OUT:h}" >> /dev/stderr
    if ( $OUT:h != $OUT:t ) then 
      set OUTEMU = $OUT:h
    else
      set OUTEMU = "."
    endif
  else
    # take from signal file
    if ( ${SIGNALORG:h} == ${SIGNALORG:t} ) then
      # SIGNALORG has no dir
      set OUTEMU = "."
    else
      set OUTEMU = ${SIGNALORG:h}
    endif 
  endif  
  # Since the tool PAR2EMU needs a fully equipped BPF as input and the 
  # Emu phonetic file needs the full path of the corresponding signal file,
  # we first create a temporary BPF file in the location of the signal file
  # (write permission needed!) and then call PAR2EMU
  set BPFTMP = ${SIGNALORG:r}.par
  if ( $BPFTMP == $BPF ) then
    # input BPF is already there: just add MAU tier
    set bpftmp_e = 1
    touch $BPFTMP
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot write to BPF input file for emu export - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    chmod 666 $BPFTMP
    # delete a possible MAU tier in input BPF
    grep -v '^MAU:' $BPFTMP >! /tmp/$$BPF
    mv /tmp/$$BPF $BPFTMP
  else
    # input BPF is elsewhere: create a temporary one in the location 
    # of the signal file; if this file already exists, try to overwrite
    set bpftmp_e = 0
    if ( -e $BPFTMP ) then 
      echo "WARNING: ${0:t} : Emu output requires temporary BPF file in signal file location, which already exists - trying to overwrite" >> /dev/stderr
      echo -n "" >! $BPFTMP # empty temp BPF file; do not attempt to remove it because this might fail even if you can write to the file
      if ( $status != 0 ) then 
        echo "ERROR: ${0:t} : cannot write to temporary BPF in $BPFTMP - exiting" >> /dev/stderr
        if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
        exit 1
      endif
    endif
    grep -v '^MAU:' $BPFORG | tr -d '\r' >! $BPFTMP
  endif  
  # add MAU tier
  cat ${MAU}.mau >> $BPFTMP
  # if not already there, add SAM entry in BPF header (needed by par2emu)
  grep '^SAM:' $BPFTMP >& /dev/null
  if ( $status != 0 ) then
    head -n 1 $BPFTMP | tr -d '\r' >! /tmp/$$BPF
    echo "SAM:	$SIGNALRATE" >> /tmp/$$BPF
    tail -n +2 $BPFTMP | tr -d '\r' >> /tmp/$$BPF
    mv /tmp/$$BPF $BPFTMP
  endif  
  # create Emu files
  set PAR2EMUMAO = `echo "$MAO" | tr '=' ':'`
  if ( $v > 0 ) echo "DEBUG: ${0:t} : $PAR2EMU SOURCE=$SOURCE outdir=$OUTEMU force=yes hea=source:maus,${PAR2EMUMAO} $BPFTMP"
  $PAR2EMU SOURCE=$SOURCE outdir=$OUTEMU force=yes "hea=source:maus,${PAR2EMUMAO}" $BPFTMP
  if ( $status != 0 ) echo "ERROR occured in $PAR2EMU - probably no Emu files created" >> /dev/stderr
  if ( $bpftmp_e == 0 ) rm -f $BPFTMP
  # check for non-Emu conform symbols and issue a warning
  egrep -q '{' ${MAU}.mau
  if ( $status == 0 ) echo "WARNING: ${0:t} : Emu does not tolerate some SAM-PA labels such as curly brackets ({})." >> /dev/stderr
  if ( $OUTFORMAT == "EMU" || $OUTFORMAT == "legacyEMU" ) then 
    if ( $v > 0 ) echo "DEBUG: ${0:t} : Packing Emu files *.hlb and *.phonetic into *.EMU"
    mv -f $OUTEMU/${BPFTMP:r:t}.hlb $OUTEMU/${BPFTMP:r:t}.EMU
    echo '--- cut here ---' >> $OUTEMU/${BPFTMP:r:t}.EMU
    cat $OUTEMU/${BPFTMP:r:t}.phonetic >> $OUTEMU/${BPFTMP:r:t}.EMU
    rm -f $OUTEMU/${BPFTMP:r:t}.phonetic
  endif

# EMU output
else if ( $OUTFORMAT == "emuR" ) then   
 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into Emu file (*_annot.json)"
  # use of helper requires a fully equipped BPF file with ORT,KAN and MAU
  # with the same base name as SIGNAL
  mkdir $TEMP/${PID}_BPFDIR >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot create temporary $TEMP/${PID}_BPFDIR - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  set BPFTMP = $TEMP/${PID}_BPFDIR/${SIGNALORG:t:r}.par
  touch $BPFTMP
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : cannot write to temporary file $BPFTMP - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif
  chmod 666 $BPFTMP
  echo -n "" >! $BPFTMP # empty temp BPF file; do not attempt to remove it because this might fail even if you can write to the file
  if ( $MODUS != "bigram" ) then 
    grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | tr -d '\r' >! $BPFTMP
  else
    printf "LHD: Partitur 1.3\nLBD:\n" >! $BPFTMP
  endif
  # add MAU tier
  cat ${MAU}.mau >> $BPFTMP
  # if not already there, add SAM entry in BPF header (needed by helper)
  grep -q '^SAM:' $BPFTMP 
  if ( $status != 0 ) then
    head -n 1 $BPFTMP | tr -d '\r' >! $TEMP/${PID}BPF
    echo "SAM:	$SIGNALRATE" | tr -d '\r' >> $TEMP/${PID}BPF
    tail -n +2 $BPFTMP | tr -d '\r' >> $TEMP/${PID}BPF
    mv $TEMP/${PID}BPF $BPFTMP
  endif  
  # add MAO header entry (is later translated into an emuDB attribute to level 'bundle')
  head -n 1 $BPFTMP | tr -d '\r' >! /tmp/$$BPF 
  echo "MAO:  $MAO" >> /tmp/$$BPF
  tail -n +2 $BPFTMP | tr -d '\r' >> /tmp/$$BPF
  mv /tmp/$$BPF $BPFTMP
  # create Emu file
  touch ${BPFTMP:r}_annot.json 
  if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot create (temporary) ${BPFTMP:r}_annot.json - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
  endif
  if ( $v > 0 ) echo "DEBUG: ${0:t} : $PAR2EMUR v=$v force=TRUE validate=true $BPFTMP"
  $PAR2EMUR v=$v force=TRUE validate=true $BPFTMP
  if ( $status != 0 ) then 
    # TODO: insert proper ERROR and ext 5 here
    echo "ERROR occured in $PAR2EMUR - probably no Emu file created" >> /dev/stderr
  endif  
  if ( $OUT != "" ) then 
    if ( $v > 0 ) echo "Extracting into $OUT"
    cp ${BPFTMP:r}_annot.json $OUT >& /dev/null
  else 
    if ( $v > 0 ) echo "Extracting into ${SIGNALORG:r}_annot.json"
    cp ${BPFTMP:r}_annot.json ${SIGNALORG:r}_annot.json
  endif

# mau|par|bpf|mau-append|csv or UNKNOWN (bpf,BPF,PAR and mau-append have been mapped to par and CSV to csv earlier!)
else
  if ( $OUTFORMAT != "mau" && $OUTFORMAT != "MAU" && $OUTFORMAT != "csv" && $OUTFORMAT != "par" ) then
    echo "ERROR: ${0:t} : Unknown output format $OUTFORMAT - exiting" >> /dev/stderr
    if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
    exit 1
  endif  
  # OUTFORMAT = par|bpf|mau-append
  if ( $OUTFORMAT == "par" ) then
    echo "LHD: Partitur 1.3" >! $TEMP/${PID}_BPF
    echo "MAO: $MAO" >> $TEMP/${PID}_BPF
    if ( $BPFORG != "" ) then
      # check for valid BPF header in input BPF: if there, use it and just insert MAO entry;
      # if not there (e.g. G2P output), create a minimal header; then filter the rest of the BPF
      # for MAU,MAO,LHD and append it
      grep -q '^LHD:' $BPFORG
      if ( $status != 0 ) then 
        echo "SAM: $SIGNALRATE" >> $TEMP/${PID}_BPF
        echo "LBD:" >> $TEMP/${PID}_BPF
        grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | tr -d '\r' >> $TEMP/${PID}_BPF
      else
        grep -v '^MAU:' $BPFORG | grep -v '^MAO:' | grep -v 'LHD:' | tr -d '\r' >> $TEMP/${PID}_BPF
      endif
    else
      # there never was an input BPF (this can happen in MODUS=bigram only!): make a minimal BPF header,
      # and if OUT is not given, use the SIGNAL file name as output file name
      echo "SAM: $SIGNALRATE" >> $TEMP/${PID}_BPF
      echo "LBD:" >> $TEMP/${PID}_BPF
      if ( $OUT == "" ) then 
        set OUT = ${SIGNALORG:r}.par
        touch $OUT >& /dev/null
        if ( $status != 0 ) then
          echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        echo -n "" >! $OUT # empty output file; do not attempt to remove it because this might fail even if you can write to the file
      endif
    endif
    if ( $OUT == "" ) then 
      if ( $v > 0 ) echo "DEBUG: ${0:t} : Appending results as MAU tier to input $BPF"
      cat $TEMP/${PID}_BPF ${MAU}.mau | tr -d '\r' >! $BPF
    else
      if ( $v > 0 ) echo "Extracting into BAS Partitur Format (BPF) $OUT"
      cat $TEMP/${PID}_BPF ${MAU}.mau | tr -d '\r' >! $OUT
    endif
  else
  # OUTFORMAT = csv|mau
    if ( $OUT == "" ) then 
      if ( $OUTFORMAT == "csv" ) then 
        set OUT = ${SIGNALORG:r}.csv
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into CSV style table $OUT"
      else
        set OUT = ${SIGNALORG:r}.par
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into BPF $OUT with single MAU tier"
      endif
    else
      if ( $OUTFORMAT == "csv" ) then 
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into CSV style table $OUT"
      else
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into BPF $OUT with single MAU tier"
      endif
    endif
    touch $OUT
    if ( $status != 0 ) then 
      echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" >> /dev/stderr
      if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
      exit 1
    endif
    if ( $OUTFORMAT == "mau" ) then 
      cp -f ${MAU}.mau $OUT
    else 
      # CSV output : helper MAUSBPF2CSV transforms a BPF
      if ( "$BPF" != "" ) then 
        # if we have a BPF input, we just add the MAU tier and call the helper
        set TEMPORARYPAR = $TEMP/${PID}_${SIGNAL:t:r}.par
        touch $TEMPORARYPAR >& /dev/null 
        if ( $status != 0 ) then 
          echo "ERROR: ${0:t} : cannot create temporary file $TEMPORARYPAR - exiting" >> /dev/stderr
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 1
        endif
        chmod 666 $TEMPORARYPAR
        if ( $v > 0 ) echo "DEBUG: ${0:t} : Creating temporary BPF for CSV conversion: $TEMPORARYPAR"
        grep -v '^MAU:' "$BPFORG" >> $TEMPORARYPAR
        cat ${MAU}.mau >> $TEMPORARYPAR
        # call the helper
        $MAUSBPF2CSV OUT=$OUT $TEMPORARYPAR
        if ( $status != 0 ) then
          echo "ERROR: ${0:t} : helper MAUSBPF2CSV reports error - exiting"
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 5
        endif
      else
        # if we have no input BPF (KANSTR input), we just transform the MAU tier
        # call the helper
        $MAUSBPF2CSV OUT=$OUT ${MAU}.mau
        if ( $status != 0 ) then
          echo "ERROR: ${0:t} : helper MAUSBPF2CSV reports error - exiting"
          if ( $CLEAN == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
          exit 5
        endif
      endif  
    endif           
  endif
endif	


# clean up
if ( $CLEAN == "TRUE" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : Cleaning up temp area"
  rm -rf $TEMP/${PID}* >& /dev/null
endif  

if ( $v > 0 ) echo "DEBUG: ${0:t} : Finishing maus on `date`"

exit 0

