#!/bin/tcsh 

# transcription submodule of MOCCA

if($v > 1) echo "Calling: Rscript $MOCCA_R_TRANSCRIPTION_QUALITY_SCRIPT $SOURCE $moccaArffFile $moccaPredFileTR >& /dev/stdout"
set outputOfRPrediction = `Rscript $MOCCA_R_TRANSCRIPTION_QUALITY_SCRIPT $SOURCE $moccaArffFile $moccaPredFileTR >& /dev/stdout`

if($v > 2) echo "R outputted: $outputOfRPrediction"

set prediction = `sed 's/\;/ /g' $moccaPredFileTR`
# the prediction array has the following structure: "bad" "good" 0.9 0.1
set probabilityWrong   = $prediction[3]
set probabilityRight   = $prediction[4]

##############################
########## CHECKS ############
# A check thtat tests if the probability for the wrong and the correct class add up to 
# approximately 1. If not, something is going very wrong (maybe the input file is broken)
# Several conditions have to be check if the value is close to 0. E.g. is it very close to zero 
# (z.B. 0.000000002), is it exactly "0" or is at number that only posses 0s before and after the
# decimal dot. Sidenote: Expr returns the number of matched characters.
set probabilityOverall = `echo "$probabilityWrong + $probabilityRight" | bc -l`
set bcStatus = $status # save the status for another check

# as I can not make calculations in csh, I need to use a work around -> get the absolute value of
# the difference between the sum of the two classes and 1.0. Afterwards the diffFromOne should be
# very close to zero or be exactly zero.
set diffFromOne = `echo "sqrt(($probabilityOverall - 1.0)^2)" | bc -l`

set cond1 = `expr match $diffFromOne '^0+\.00000*.*'`# check for numbers like 0.0000000003
set cond2 = `expr match $diffFromOne '^0$'` # check if value is exactly 0
set cond3 = `expr match $diffFromOne '^00*\.00*$'` # check for 0.0, 00.00 etc.; the version '^0+\.0+$' does not work for some strange reason
if($v>0) echo "Condition1 (0.00000000002): $cond1 | Condition2 (0): $cond2 | Condition3 (0.0): $cond3"

#in case all the conditions are 0 (we have obviously something else than a value close to 0 in diffFromOne -> throw error
if($cond1 == 0 && $cond2 == 0 && $cond3 == 0) then
   echo "The two probabilities of the correct and wrong class always have to add up to 1.0. Something is very wrong. Aborting!" >/dev/stderr
   exit 3
endif

#####

# check if bc could be executed correctly (otherwise maybe something is wrong with the operands)
if("$bcStatus" != "0") then
   echo "bc could not be executed correctly (maybe the two operands are no numbers?). Aborting!" >/dev/stderr
   exit 5
endif

#####

# check if the prediction array has the correct size/was read correctly
if("$#prediction" != "4") then
   echo "The output format of the prediction (generated by R) seems to be broken. Aborting!" >/dev/stderr
   exit 4
endif
######### END CHECKS ##########
###############################


###### is the value below threshold -> set the cmtTiersBelowThresh variable for later evaluation

set belowThresh = `echo "$probabilityRight > $TRANSCRIPT_THRESHOLD" | bc`

if($belowThresh == "0") then
    @ cmtTiersBelowThresh += 1
    if($v > 3) echo "Found a CMT tier that is below threshold ($TRANSCRIPT_THRESHOLD). That makes $cmtTiersBelowThresh." > /dev/stderr
endif

if($v > 0) echo "MOCCA: word is wrong with a probability of $probabilityWrong and correct with $probabilityRight (sum: $probabilityOverall)"

set cmtTier = `printf "CMT:\t%i\t%.${NUMBER_OF_DECIMAL_PLACES}f\n" $targetWordIndex $probabilityRight`

if($v > 0) echo "The generated CMT tier is:\n$cmtTier"
##########################################################################
