#!/bin/tcsh
#
# MOCCA - Measure of Confidence for Corpus Analysis - MAUS module that adds two layers reflecting
# the confidence measure for transcription (CMT) and segmentation (CMS).
#
# Example command line calls are:
# ./mocca SIGNALFILE=ai002s.wav PARFILE=ai002s.par OUTFILE=/tmp/ai002s.par SAMPLERATE=16000 STANDALONE=1 SEVERITYLEVEL=1
#
# Examples are:
#
# CMT: 3 0.394
# CMS: 2 0.634
#
# CMT: The number in the CMT tier is the probability output by the prediction (how certain is the
# prediction that the current word belongs to class ``correct'' (1) or ``incorrect'' (0)). Ranges
# from 0 to 1.
# 
# CMS: The number in the CMS tier is the output of the prediction of the overlap ratio. Ranges from
# 0 to 1.
#
# first column:  tier name
# second column: reference to ORT/KAN tier
# third column:  transcription "good" class probability as output by the SVM or predicted OR value by
#                the SVM
#
#
#
# CONVENTION: UPPER CASE VARIABLE NAMES are either statically set (then words are divided by _ or
# are passed to MOCCA). Static example: MOCCA_VERSION. Parameter example: SAMPLERATE.
#
# Author T. Kisler (kisler@bas.uni-muenchen.de)
#
# TODO Real input parameter parsing
# TODO implement variable severity conditions ("good", "middle" , "bad") over the complete utterance
# TODO parallelize?
# TODO Only works for German (reference word lengths need to be calculated for SpkRate)
#
######################################
############# STATIC STUFF ###########
# USING A HACK TO GET LINE NUMBERS OF ERROS
#set _lineno = 0
#alias postcmd '@ _lineno++ && echo -n $_lineno\ '
set noglob #disable file expansion when setting a string (e.g. set globTest = 'a ? k') VERY IMPORTANT
# Version 
set MOCCA_VERSION = 0.4

set v = "0"
set CLEAN = "1"


##### EXIT CODES
#
# 0: everything is okay
# 1: file that is crucial for execution not found
# 2: R call did exit with an exit code != 0
# 3: transcription: probabilities of prediction do not sum up to 1.0
# 4: transcription: the output format of the prediction (generated by R seems to be broken)
# 5: transcription: bc call did not work correctly
# 6: overlapratio:  the output format of the prediction (generated by R seems to be broken)
# 7: debugging:     the output of MAUS could not be parsed and therefore the temporary files are unknown

#setenv LANG en_US.UTF-8  # defines the behavior of text processing, sorting etc.
#LC_NUMERIC needed for decimal point of printf command
setenv LC_NUMERIC en_US.UTF-8
set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored 
set MOCCA_R_SUBDIRECTORY = "R_mocca"

set MOCCA_TMP_FILE_EXTENSION      = "mocca.tmp"
set MOCCA_REC_EXTENSION           = "rec"
set MOCCA_SLF_EXTENSION           = "slf"
set MOCCA_PAR_EXTENSION           = "par"
set MOCCA_MAU_EXTENSION           = "mau"
set ARFF_EXTENSION                = "arff"
set MOCCA_OUT_EXTENSION           = "mocca.par"
set MOCCA_PREDICTION_TRANSCRIPT   = "TR.pred"
set MOCCA_PREDICTION_SEGMENTATION = "OR.pred"
#set MAUS_EXECTUABLE               = "MAUS_TOOL/maus"
set MAUS_EXECTUABLE               = "maus"

set NUMBER_OF_DECIMAL_PLACES = 4

set TRANSCRIPT_THRESHOLD = 0.5

if($v > 0) echo $SCRIPT in $SOURCE

set MOCCA_R_WRAPPER_SCRIPT = "$SOURCE/$MOCCA_R_SUBDIRECTORY/rWrapper.R"

# FILE THAT DOES NOT WORK. INVESTIGATE!
#set recFile      = "$SOURCE/develFiles/awed5110-6.rec"
# END FILE THAT DOES NOT WORK

set PARFILE    = ""
set RECFILE    = ""
set SLFFILE    = ""
set SIGNALFILE = ""
set SAMPLERATE = ""
set OUTFILE    = ""
set STANDALONE = "0"
set SEVERITYLEVEL = "2"

##### INPUT PARAMETER PARSING

#echo parsing commandline
if ( $v > 0 ) echo "$0 $*"
while ( "$1" != "" )
	switch ("$1")
	case *=*:
		#set key = `echo $1 | cut -d= -f1`
		set key = `echo $1 | awk -F= '{ print $1 }'`
		#check if option is known (set)
		eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then 
		  echo "ERROR ${0:t} : unknown option $key - exiting" > /dev/stderr
		  exit 1
		endif  
		#set val = `echo $1 | cut -d= -f2`
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser


# boolen variable check; define all boolean input parameters here

set bool = ( STANDALONE )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val ) 
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  case force:
    eval set $booleanvariable = force
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no,(force)' - exiting" > /dev/stderr
    exit 1
  endsw    
end

#check if all necessary parameters are passed

#set PARFILE    = ""
#set RECFILE    = ""
#set SLFFILE    = ""
#set SIGNALFILE = ""
#set SAMPLERATE = ""
#set OUTFILE    = ""
#set STANDALONE = ""
#set SEVERITYLEVEL = "2"

#### in case the standalone mode is actived we only need a signal and a par file
set printUsage = "FALSE"
if( $STANDALONE == "TRUE" ) then
    set necessary = (PARFILE SIGNALFILE SAMPLERATE OUTFILE)
    foreach currNecessary ( $necessary )
        eval set val = '$'$currNecessary
        if($val == "") then
            echo ""
            echo "ERROR: $currNecessary parameter was not set, but is needed in this context" > /dev/stderr
            echo "       needed parameters are $necessary"
            echo ""
            set printUsage = "TRUE"
            break
        endif
    end
else
    set necessary = (PARFILE RECFILE SLFFILE SAMPLERATE OUTFILE)
    foreach currNecessary ( $necessary )
        eval set val = '$'$currNecessary
        if($val == "") then
            echo ""
            echo "ERROR: $currNecessary parameter was not set, but is needed in this context" > /dev/stderr
            echo "       needed parameters are $necessary"
            echo ""
            set printUsage = "TRUE"
            break
        endif
    end
endif

if($printUsage == "TRUE") then
    echo "--------------------------"
    echo "MOCCA version $MOCCA_VERSION"
    echo "--------------------------"
    echo "Not enough arguments provided" >/dev/stderr
    echo "Usage:   mocca <par-file|mau-file> [<signal-file>] [STANDALONE]" >/dev/stderr
    echo "Example: mocca /tmp/ai002s.rec" >/dev/stderr
    echo "<SIGNALFILE>:         Optional signal file (only needed if called with MOCCA DEBUG option)." > /dev/stderr
    echo "" > /dev/stderr
    echo "<PARFILE>:            file that needs to contain at least a MAU tier after maus has been executed." > /dev/stderr
    echo "                      If this file contains a PHO tier as well, the overlap ratio between the MAU" > /dev/stderr
    echo "                      tier created by MAUS and the PHO tier will be calculcated as well." > /dev/stderr
    echo "" > /dev/stderr
    echo "<RECFILE>:            Location of the .rec file produced by MAUS." > /dev/stderr
    echo "" > /dev/stderr
    echo "<SLFFILE>:            Location of the .slf file produced by MAUS." > /dev/stderr
    echo "" > /dev/stderr
    echo "<SAMPLERATE>:         sample rate, needs to be present, if signal-file is as well." > /dev/stderr
    echo "" > /dev/stderr
    echo "[<SEVERITYLEVEL>]:    Optional severity level - how sensitive the check of the file will be recommeded." > /dev/stderr
    echo "                      It specifies the threshold of the percentage of wrong words after which a check is" > /dev/stderr
    echo "                      proposed to the user. The value ranges between 1 and 4, 2 is standard." > /dev/stderr
    echo "                      1 means after 55% words are classified being wrong, a check is proposed " > /dev/stderr
    echo "                      (2 = 65%, 3= 75%, and 4 = 85%). This means the higher the severity level" > /dev/stderr
    echo "                      the later the check will be proposed." > /dev/stderr
    echo "" > /dev/stderr
    echo "OUTFILE:              Necessary outfile." > /dev/stderr
    echo "" > /dev/stderr
    echo "STANDALONE:           Optional for MOCCA as a standalone application. If true, the par-file" > /dev/stderr
    echo "                      and the signal-file only are needed and then maus will be called. After that the" > /dev/stderr
    echo "                      temporary files of MAUS will be used for the processing (/tmp/394_...)." > /dev/stderr
    exit 1
endif

endif

if($v > 0) then
    echo "Will not clean up temporary files"
    set CLEAN = "0"
endif

#####
set moccaTmpFiles = ""

# remove extension from input filename (+ does not work, therefore * with the additional fixed
# pattern before; a+ and aa* are equal
set moccaBaseFileNoExt = `echo $PARFILE | sed 's/\.[a-zA-Z0-9][a-zA-Z0-9]*$//g'`

# if STANDALONE is on, maus is called and the temporary MAUS output is used for processing
if( $STANDALONE == "TRUE") then
    if($v > 1) echo "About to call maus with command: maus CLEAN=0 SIGNAL=$SIGNALFILE BPF=$PARFILE OUTFORMAT=mau-append"
    set htkFileTmp = `$MAUS_EXECTUABLE v=1 CLEAN=0 SIGNAL=$SIGNALFILE BPF=$PARFILE OUTFORMAT=mau-append | grep "Creating HTK file " | awk '{ print $7 }'`
    if($htkFileTmp == "") then
        echo "Temporary MAUS files could not be found (maybe the debug output of MAUS has changed)! Aborting..." > /dev/stderr
        exit 7
    endif
    #overwrite input par file location
    #echo $htkFileTmp
    set parFileBase = `echo $htkFileTmp | sed 's/\.[a-zA-Z0-9][a-zA-Z0-9]*$//g'`
    # need to set parFile here, so that all the other files are set correctly (rec, slf)
    set PARFILE     = "$parFileBase.$MOCCA_PAR_EXTENSION"
    set mauFile     = "$parFileBase.$MOCCA_MAU_EXTENSION"
    if($v > 1) echo "Basefile: $parFileBase"
    set bpfFileBase = `echo $parFileBase | sed 's/\(.*\)_\(.*\)/\1/' # old way b4 the second part with numbers in the filename | sed 's/_[a-zA-Z0-9\._-][a-zA-Z0-9\._-]*$//1'`
    # add the files to being removed after mocca finishes if CLEAN!=0
    if($v > 1) echo "Adding $bpfFileBase and derivates to being deleted after mocca has finished"
    set moccaTmpFiles = "$moccaTmpFiles ${bpfFileBase}_*"
    if($v > 1) echo "bpfFileBase: $bpfFileBase"
    # setting the correct temporary bpfFile name for copying
    set bpfFile = "${bpfFileBase}__BPF"
    #copy the BPF file to where it should be (and how it should be named) -> needs to be deleted in the end!!!
    set outTmp  = `cp $bpfFile $PARFILE`
    #copy the MAU part into the bpf file (needed otherwise the future processing will not work)
    set outTmp  = `cat $mauFile >> $PARFILE`
    if($v > 1) echo "Using temporary BPF content from $bpfFile and copying it to par file $PARFILE" > /dev/stderr

    # set necessary files
    #set RECFILE            = "$moccaBaseFileNoExt.$MOCCA_REC_EXTENSION"
    #set SLFFILE            = "$moccaBaseFileNoExt.$MOCCA_SLF_EXTENSION"
    set RECFILE            = "$parFileBase.$MOCCA_REC_EXTENSION"
    set SLFFILE            = "$parFileBase.$MOCCA_SLF_EXTENSION"
endif

#file locations that need to be set in any case
set moccaArffFile      = "$moccaBaseFileNoExt.$ARFF_EXTENSION"
set moccaPredFileTR    = "$moccaBaseFileNoExt.$MOCCA_PREDICTION_TRANSCRIPT"
set moccaPredFileOR    = "$moccaBaseFileNoExt.$MOCCA_PREDICTION_SEGMENTATION"
set moccaTemporaryFile = "$moccaBaseFileNoExt.$MOCCA_TMP_FILE_EXTENSION"

#add files that will be deleted after the run 
set moccaTmpFiles = "$moccaTmpFiles $moccaPredFileTR $moccaPredFileOR $moccaArffFile $moccaTemporaryFile"

set moccaRScriptDirectory = "$SOURCE/$MOCCA_R_SUBDIRECTORY"


#2018-02-19 Makes no sense: deletes the output file
#if($v > "1") echo "    Attempting to delete $OUTFILE if the file exists"
#if ( -f $OUTFILE ) then
    # if($v > "1") echo "    Removing the temporary file $OUTFILE"
    # set rmAnswer = `rm $OUTFILE`
#endif
# cleaning done

# checks of static files and directories
set outputTmp = `ls $moccaRScriptDirectory >& /dev/null`
set stat = $status
if ( $stat != 0 ) then
   echo "The file $moccaRScriptDirectory could not be found. Needed for execution. Aborting!" > /dev/stderr
   exit 1
endif

################### STATIC STUFF END

#first get all mau indices, then get the 4th column, then sort, then uniq -> all the MAU 
#indices available (even works with missing ones)
set targetWordIndices = `grep -E "MAU:\s+[0-9]+\s+[0-9]+\s+.+" $PARFILE | awk -F "\t" '{ print $4 }' | sort -n | uniq`

set targetWordIndexMax  = `echo $targetWordIndices | xargs -n1 | sort -rn | head -n1`

#remove first entry if it is -1, normally there have to exist at least two pauses at the beginning and end
if( "${targetWordIndices[1]}" == "-1") then
    # TODO Flo: stimmt das wirklich, dass die folgende Zeile nur mit "" geht? Beim echo braucht man
    # die "" nicht...
    set targetWordIndices = "$targetWordIndices[2-]"
endif
if($v > 1) echo "Found (cleaned and sorted) target word indices are: $targetWordIndices"

##### new way of doing things


if($v > 0) echo "Calling: Rscript $MOCCA_R_WRAPPER_SCRIPT --directory $moccaRScriptDirectory --bpf $PARFILE --rec $RECFILE --slf $SLFFILE --arff $moccaArffFile --outfile $OUTFILE --wordIndices $targetWordIndices --sampleRate $SAMPLERATE $SEVERITYLEVEL >& /dev/stdout"

set outputOfR = `Rscript $MOCCA_R_WRAPPER_SCRIPT --directory $moccaRScriptDirectory --bpf $PARFILE --rec $RECFILE --slf $SLFFILE --arff $moccaArffFile --outfile $OUTFILE --wordIndices "$targetWordIndices" --sampleRate $SAMPLERATE --severityLevel $SEVERITYLEVEL >& /dev/stdout`

if($v > 2) echo "----------\nR Output:\n----------\n$outputOfR\n\n"


# cleaning up temporary files in case they do exist

if($CLEAN > 0) then
    foreach moccaTmpFile ( $moccaTmpFiles )
        #if ( -f $moccaTmpFile ) then
        set rmAnswer = `sh -c "rm -f $moccaTmpFile"` #sh -c is necessary, otherwise the wildcard is not handed over correctly to the rm
        #endif
    end
endif

##### end new way
