#!/bin/tcsh 

# wrapper for the service 'EMUMagic' calling simplified pipelines 
# to produce emuR output *_annot.json

# F. Schiel 

set VERSION = 1.6
# version history see ../DOCU/runEMUMagicHistory

# This wrapper reads either a single SIGNAL or a pair of SIGNAL + TEXT
# where SIGNAL can be one of wav,nis,nist,sph,mp4,mpeg,mpg, and
# TEXT can be one of txt,par,TextGrid,eaf,csv possible more that G2P can read without much fuss
# Output is always a single *_annot.json file with the same base name as
# SIGNAL or as defined in OUT.
# OUTFORMAT is accepted by the script but ignored (for compatibility with 
# Web API: the Web API needs this to allow the assemblence of a emuDB)
# The only option is LANGUAGE.

# The script calls either maus.pipe with 
# PIPE=G2P_MAUS_PHO2SYL, if both, SIGNAL and TEXT (<3000 words), are given, or
# PIPE=CHUNKPREP_G2P_MAUS_PHO2SYL, if both, SIGNAL and TEXT (TexGrid,eaf,csv), are given, or
# PIPE=G2P_CHUNKER_MAUS_PHO2SYL, if both, SIGNAL and TEXT (bpf,txt, >3000 words), are given, or
# calls runASR, if only SIGNAL is given, and then decides what pipe to run.

# runASR is called with the following options (other than defaults):
# ASRType=autoSelect
# diarization=true
# maus.pipe is called with the following options (other than defaults):
# OUTFORMAT=emuDB
# OUT=$OUT or OUT=$SIGNAL:r

set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored
                           # (even if we start via a symbolic link)
set runPIPE = $SOURCE/../Pipeline/maus.pipe

set SIGNAL = ""
set TEXT = ""
set LANGUAGE = ""
set OUTFORMAT = ""
set OUT = ""
set InputTierName = ""

# set so we can test the script with ASR
set USERID = "tomcat8"
set HOSTID = 'linux11'
# set for the Web API (dummy)
set emuRDBname = ""

set v = 0   # verbose level (also for called programs!)
 
# Actually do the argument parsing here

# 2016-08-03 : replaced 'cut' ba awk, since we found that 
# parallel calls to script using 'cut' cause mysterious
# and sporadic shell errors ''cut: Command not found' 

while ( "$1" != "" )
        switch ("$1")
        case *=*:
                #set key = `echo $1 | cut -d= -f1`
                set key = `echo $1 | awk -F= '{ print $1 }'`
                #check if option is known (set)
                eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then
                  echo "ERROR: unknown option $key - exiting" >> /dev/stderr
                  exit 1
                endif
                #set val = `echo $1 | cut -d= -f2`
                set val = `echo $1 | awk -F= '{ print $2 }'`
                eval "set $key "= \'"$val"\'
                unset key val
                shift
                breaksw
        default:
                break
        endsw
end

# end option parser

# boolean variable check; define all boolean input parameters here

set bool = (  )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val )
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no'"  >> /dev/stderr
    exit 1
  endsw
end

if ( $1 == "--version" ) then 
  echo $VERSION
  exit 0
endif

if ( $SIGNAL == "" || $LANGUAGE == "" ) then 
  echo "usage: ${0:t} [v=0] SIGNAL=<input signal> LANGUAGE=<rfc-lang-code> [TEXT=<text input>][OUT=<output>][OUTFORMAT=emuDB][InputTierName=]" >> /dev/stderr
  echo "       ${0:t} --version" >> /dev/stderr
  echo "       service wrapper for special simplified Pipeline to produce EMU-SDMS files:" >> /dev/stderr
  echo "       if TEXT is not given, runASR is called followed by"
  echo "       either the pipe G2P_MAUS_PHO2SYL or G2P_CHUNKER_MAUS_PHO2SYL (TEXT is bpf,txt) or" >> /dev/stderr
  echo "       CHUNKPREP_G2P_MAUS_PHO2SYL (TEXT is TextGrid,eaf,csv) with OUTFORMAT=emuDB." >> /dev/stderr
  echo "       if TEXT is a praat TextGrid or an ELAN eaf file, the option InputTierName must" >> /dev/stderr
  echo "       contain the tier name from which the text should be retrieved." >> /dev/stderr
  echo "       ERRORS are displayed to stderr and the script exits with 1." >> /dev/stderr
  echo "       WARNINGS are displayed to stderr and the script continues." >> /dev/stderr
  exit 1
endif

set TEMP = /tmp/$$_`date '+%s'`
mkdir $TEMP
if ( $status != 0 ) then 
  echo "ERROR: ${0:t} : cannot create temporary directory $TEMP  - exiting"
  exit 1
endif
if ( ! -x $runPIPE ) then 
  echo "ERROR: ${0:t} : cannot find helper 'maus.pipe' in $runPIPE  - exiting"
  exit 1
endif
if ( ! -e "$SIGNAL" ) then 
  echo "ERROR: ${0:t} : cannot find input SIGNAL = $SIGNAL  - exiting"
  exit 1
endif

# TEXT not given -> call runASR to produce a BPF with ORT
if ( "$TEXT" == "" ) then 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : no annotation/text given : use runASR"
  which runASR >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : helper 'runASR' missing  - exiting"
    rm -rf $TEMP >& /dev/null
    exit 1
  endif
  runASR v=$v ASRType='autoSelect' SIGNAL="$SIGNAL" LANGUAGE="$LANGUAGE" \
    OUT="${TEMP}/${SIGNAL:t:r}.par" OUTFORMAT=bpf diarization=true USERID="$USERID"
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : helper 'runASR' returns error  - exiting"
    rm -rf $TEMP >& /dev/null
    exit 1
  endif
  set TEXT = "${TEMP}/${SIGNAL:t:r}.par"
else
  if ( ! -e "$TEXT" ) then
    echo "ERROR: ${0:t} : cannot find input TEXT = $TEXT  - exiting"
    rm -rf $TEMP >& /dev/null
    exit 1
  endif
endif

#if ( "${SIGNAL:t:r}" != "${TEXT:t:r}" ) then 
#  echo "ERROR: ${0:t} : file name body of SIGNAL (${SIGNAL:t:r}) must match TEXT (${TEXT:t:r}) to create valid emuDB file - exiting"
#    rm -rf $TEMP >& /dev/null
#  exit 1
#endif

# estimate number of words in TEXT or runASR output; determine pipeline type
set PIPE = "G2P_MAUS_PHO2SYL"
switch ( "${TEXT:e}" )
case "par":
case "PAR":
  set numWords = `grep '^ORT:' "$TEXT" | wc -l`
  if ( $numWords > 3000 ) set PIPE = "G2P_CHUNKER_MAUS_PHO2SYL"
  breaksw
case "txt":
case "TXT":
  set numWords = `cat "$TEXT" | wc -w`
  if ( $numWords > 3000 ) set PIPE = "G2P_CHUNKER_MAUS_PHO2SYL"
  breaksw
case "textgrid":
case "TextGrid":
case "eaf":
case "EAF":
  # special case of TEXT input: praat/ELAN files should contain a chunk segmentation
  # better processed by CHUNKPREP
  set PIPE = "CHUNKPREP_G2P_MAUS_PHO2SYL"
  if ( "$InputTierName" == "" ) then 
    echo "ERROR: ${0:t} : input TEXT is EAF|TextGrid: option 'InputTierName' required but not given - exiting"
    rm -rf $TEMP >& /dev/null
    exit 1
  endif
  breaksw
case "csv":
case "CSV":
  # special case of TEXT input: the CSV table should contain a chunk segmentation (3 columns begin;dur;text)
  # better processed by CHUNKPREP
  set PIPE = "CHUNKPREP_G2P_MAUS_PHO2SYL"
  breaksw
default:
  echo "ERROR: ${0:t} : unknown extension ${TEXT:e} of TEXT input - exiting"
    rm -rf $TEMP >& /dev/null
  exit 1
  breaksw
endsw
set OUTFORMAT = emuDB

# check output
if ( "$OUT" == "" ) set OUT = "${SIGNAL:r}_annot.json"
echo -n "" >! "$OUT"
if ( $status != 0 ) then
  echo "ERROR: ${0:t} : cannot write to $OUT - exiting"
    rm -rf $TEMP >& /dev/null
  exit 1
endif

# make the Pipeline call

if ( $v > 0 ) echo "DEBUG: ${0:t} calling: $runPIPE v=$v PIPE=$PIPE SIGNAL=$SIGNAL TEXT=$TEXT OUT=$OUT OUTFORMAT=emuDB LANGUAGE=$LANGUAGE" 
$runPIPE v=$v PIPE=$PIPE SIGNAL="$SIGNAL" TEXT="$TEXT" OUT="$OUT" OUTFORMAT=emuDB LANGUAGE="$LANGUAGE" USERID="$USERID" HOSTID="$HOSTID" InputTierName="$InputTierName" 
if ( $status != 0 ) then
  echo "ERROR: ${0:t} : helper maus.pipe returns error - exiting"
    rm -rf $TEMP >& /dev/null
  exit 1
endif

exit 0

