#!/bin/tcsh 

# This script is a meta script around the speaker diarization (SD) wrappers
# choosable in the BAS WebServices service 'SpeakDiar'.

# The diarization is returned in form of a class 2 segmentation layer 
# SPD that assigns parts of the input signal to speaker labels 'S0', 'S1'... and to 
# silence '<p:>', and a class 1 layer SPK that labels each word segment (ORT)
# with the corresponding speaker label (if an input BPF with ORT/MAU tier is provided
# via option 'TEXT').

# Independent of the SD method the speaker labels in the output annotation are 
# 'S1', 'S2', ... in the numerical order of the appearance of the speaker labels in 
# the diarization result.

# If the option speakMatch is not empty, the list of comma separated names given in this
# option is used to label the speakers in the order of 'S1', 'S2', ...; if the list
# contains colon separated name/value pairs (e.g. 'S3:Rudolph,S5:Mary,...'), only the 
# named S# speaker labels are patched, leaving the others unchanged.

set VERSION = 2.5

setenv SOX_OPTS "-D" # this prevents sox version 14.3 and higher to use
                     # automatic dithering in rate conversions which causes
                     # video results to fluctuate randomly

# Arguments/Options
# SIGNAL (required): can be any of the usual signal formats supported by 
# the helper ENHANCEAUDIO.
# OUT (required): the file in which the diarization output is written.
# OUTFORMAT (required): the format of the output; all output formats of helper
# ANNOTCONV are allowed
# MEMORY: the size of memory used by LIUM in MB units (default is 2024)
# TEXT: the (optional) input BPF; the created tiers SPD (and SPK) are appended to this
#       (the option must have the name 'TEXT'; otherwise the optional single file/pair 
#        dropping does not work in the webinterface!)
#       if the input BPF contains a VAD tier the file name is passed on to the SD method
#       DiarXvector in option --vad
# speakMatch: comma separated list of speaker names or name/value pairs for 
#             speaker label patching


# Requirements
# annotConv, audioEnhance, runDiarPyAnnote


set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored
                  # (even if we start via a symbolic link)

# public options
set SIGNAL = ""   # required: the input signal
set OUT = ""      # required: name and location of output annotation file

set TEXT = ""     # optional input annotation (only BPF!) to which the diarization
                  # tiers SPD/SPK are appended to
set SAMPLERATE = 1      
                  # sample rate is required for conversion to TextGrid is usually taken from
                  # the SIGNAL; as a fallback it 
                  # can be given on the command line 'SAMPLERATE=<rate>'
set OUTFORMAT = "bpf"
                  # format of the annotation file containing the diarization
set speakMatch = ""
                  # comma separated list of speaker names or name/value pairs for
                  # speaker label patching; if empty, the strict ordered labels 
                  # 'S1', 'S2', ... in the order of speaker appearance are used.
set speakNumber = 0 
                  # enforce number of diarized speakers; 0 = undefined number
set maxSpeakNumber = 0 
                  # enforce number of diarized speakers to a maximum
set minSilenceLength = 200 
                  # Minimum length of silence intervalls in resulting Diariziation to be classified as silent part.
set minVoicedLength = 200
                  # Minimum length of voiced intervalls in resulting Diarization to be classified as speech.
set preference = -2.97
                  # Preference for affinity propagation clustering. Higher values => more detected speakers. 

# Note that all definitions above can be overriden by defining an alternative
# definition as command line parameter

set v = 0   # verbose level (also for called programs!)
 
# helpers
set AUDIOENHANCE = $SOURCE/../AudioEnhance/audioEnhance
set ANNOTCONV = $SOURCE/../AnnotConv/annotConv
set DIARPYNOTE = runDiarPyAnnote  # there must be a script in /usr/local/bin or ~/bin etc.  to start the venv in tomcat8

#set DIARPYNOTE = $SOURCE/runDiarPyAnnote/bin/runDiarPyAnnote  # must be pip installed on server!
               # see README in this dir for details how to install a new version of runDiarPyAnnote

# Actually do the argument parsing here

# 2016-08-03 : replaced 'cut' by awk, since we found that 
# parallel calls to script using 'cut' cause mysterious
# and sporadic shell errors ''cut: Command not found' 

if ( $v > 1 ) echo "DEBUG: ${0:t} : called with arguments $*"

while ( "$1" != "" )
        switch ("$1")
        case *=*:
                #set key = `echo "$1" | cut -d= -f1`
                set key = `echo "$1" | awk -F= '{ print $1 }'`
                #check if option is known (set)
                eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then
                  echo "ERROR: unknown option $key - exiting" >> /dev/stderr
                  exit 1
                endif
                #set val = `echo "$1" | cut -d= -f2`
                set val = `echo "$1" | awk -F= '{ print $2 }'`
                eval "set $key "= \'"$val"\'
                unset key val
                shift
                breaksw
        default:
                break
        endsw
end

# end option parser

# boolean variable check; define all boolean input parameters here

set bool = ( )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val )
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no'"  >> /dev/stderr
    exit 1
  endsw
end

if ( $1 == "--version" ) then 
  echo $VERSION
  exit 0
endif

if ( $SIGNAL == "" || "$OUT" == "" || $1 == '-h' || $1 == '--help' ) then 
  echo "usage: ${0:t} [v=0] SIGNAL=<anySignalSupportedByAudioEnhance> OUT=<output segmentation> [TEXT=<annotationInputToWhichTheDiarizationIsAppended>][speakMatch=""][speakNumber=0][minSilenceLength=200][minVoicedLength=200][preference=-2.97][OUTFORMAT=bpf]" >> /dev/stderr
  echo "       ${0:t} --version" >> /dev/stderr
  echo "       script for speaker diarization method runDiarPyAnnote" >> /dev/stderr
  echo "       The signal (SIGNAL) is segmented and labelled into speaker turns ('S0', 'S1',...)" >> /dev/stderr
  echo "       and silence('<p:>'), and encoded as segmentation tier SPD." >> /dev/stderr
  echo "       The SPD tier is either output as single tier, or - if a BPF is input via option TEXT -" >> /dev/stderr
  echo "       augmented by a SPK tier (if input BPF contains a ORT/MAU combination)" >> /dev/stderr
  echo "       and added to the tiers of the input," >> /dev/stderr
  echo "       and then the output is converted into the required format (OUTFORMAT, default is 'bpf')." >> /dev/stderr 
  echo "       If speakMatch is set to a comma separated list of names, these names are used" >> /dev/stderr
  echo "       as speaker labels instead of 'S1', 'S2',... in the order of appearance; if the list" >> /dev/stderr
  echo "       contains 'S#:name' pairs instead of names, only selected S# labels can be patched, e.g." >> /dev/stderr
  echo "       speakMatch='S3:Ralph' only patches the third appearing speaker to 'Ralph'." >> /dev/stderr
  echo "       If speakNumber is set to a number >0, the SD will enforce this number of speakers" >> /dev/stderr
  echo "       to be diarized; default is 0 which means that the SD method will determine the" >> /dev/stderr
  echo "       number automatically (which almost always leads to double speaker labels!)." >> /dev/stderr
  echo "       Option minSilenceLength sets the minimum length of silence intervals in msecs; minVoicedLength" >> /dev/stderr
  echo "       sets the minimum length of speech intervals in msecs." >> /dev/stderr
  echo "       Option preference for affinity propagation clustering. Higher values => more detected speakers." >> /dev/stderr
  echo "       WARNINGS are displayed to stderr and the script continues." >> /dev/stderr
  echo "       ERRORS are displayed to stderr and the script exits with code > 0." >> /dev/stderr
  exit 1
endif

# check helpers
which $DIARPYNOTE >& /dev/null
if ( $status != 0 ) then
  echo "ERROR: ${0:t} : cannot find SD method $DIARPYNOTE - exiting" >> /dev/stderr
  exit 1
endif
if ( ! -e "$ANNOTCONV" ) then
  echo "ERROR: ${0:t} : cannot find output conversion helper $ANNOTCONV  - exiting" >> /dev/stderr
  exit 1
endif
if ( ! -e "$AUDIOENHANCE" ) then
  echo "ERROR: ${0:t} : cannot find helper audioEnhance $AUDIOENHANCE  - exiting" >> /dev/stderr
  exit 1
endif
if ( ! -e $SOURCE/${0:t}_SPK.awk ) then
  echo "ERROR: ${0:t} : cannot find helper $SOURCE/${0:t}_SPK.awk - exiting" >> /dev/stderr
  exit 1
endif
if ( ! -e $SOURCE/${0:t}_reorder.awk ) then
  echo "ERROR: ${0:t} : cannot find helper $SOURCE/${0:t}_reorder.awk - exiting" >> /dev/stderr
  exit 1
endif


# check parameters and inputs before we START

if ( $v > 0 ) echo "DEBUG: ${0:t} : SIGNAL=$SIGNAL OUT=$OUT TEXT=$TEXT speakNumber=$speakNumber minSilenceLength=$minSilenceLength minVoicedLength=$minVoicedLength SAMPLERATE=$SAMPLERATE"
set v_1 = 0
if ( $v > 0 ) @ v_1 = $v - 1
set TEMP = /tmp/$$_`date '+%s'`
if ( ! -e "$SIGNAL" ) then 
  echo "ERROR: ${0:t} : cannot find input SIGNAL = $SIGNAL  - exiting" >> /dev/stderr
  rm -rf ${TEMP}*  >& /dev/null
  exit 1
endif
if ( "$TEXT" != "" && ! -e "$TEXT" ) then 
  echo "ERROR: ${0:t} : cannot find input TEXT = $TEXT  - exiting" >> /dev/stderr
  rm -rf ${TEMP}*  >& /dev/null
  exit 1
endif
if ( "$TEXT" != "" && "${TEXT:e}" != "par" && "${TEXT:e}" != "PAR" && "${TEXT:e}" != "bpf" && "${TEXT:e}" != "TEXT" ) then 
  echo "WARNING: ${0:t} : input BPF $TEXT:t does not have standard extension par|PAR|bpf|BPF, could it be that you are processing the wrong files?" >> /dev/stderr
endif
touch "$OUT" >& /dev/null
if ( $status != 0 ) then
  echo "ERROR: ${0:t} : cannot write to output file OUT = $OUT  - exiting"  >> /dev/stderr
  rm -rf ${TEMP}*  >& /dev/null
  exit 1
endif
echo -n "" >! "$OUT"
# if input BPF is given, copy to a temporary BPF (later we add the SPD tier), 
if ( "$TEXT" != "" ) cp "$TEXT" ${TEMP}_BPFOUT.par 

# check OUTFORMAT
if ( "$OUTFORMAT" == "bpf" || "$OUTFORMAT" == "BPF" || "$OUTFORMAT" == "par" || "$OUTFORMAT" == "PAR" ) then
  set OUTFORMAT = "par"
else
  set FORMATS = `$ANNOTCONV --listOutFormats`
  if ( $v > 0 ) echo "DEBUG: ${0:t} : got FORMATS = $FORMATS from $ANNOTCONV"
  set idx = 1
  while ( $idx <= $#FORMATS )
    if ( $FORMATS[$idx] == "$OUTFORMAT" ) break
    @ idx ++
  end
  if ( $idx > $#FORMATS ) then
    echo "ERROR: $0:t : output format $OUTFORMAT not supported; supported formats are: par bpf $FORMATS - exiting" >> /dev/stderr
    rm -rf ${TEMP}*  >& /dev/null
    exit 1
  endif
endif

# debug:
#cp $TEXT ${TEMP}_SPD.par
#goto outputBPF

# start processing

# use audioEnhance to normalize SIGNAL (depends what LIUM wants)
if ( $v > 0 ) echo "DEBUG: ${0:t} : preprocessing signal $AUDIOENHANCE v=$v_1 SIGNAL=$SIGNAL OUT=${TEMP}_SIGNAL.wav"
$AUDIOENHANCE v=$v_1 SIGNAL="$SIGNAL" OUT=${TEMP}_SIGNAL.wav
set err_code = $status
if ( $err_code != 0 ) then 
    echo "ERROR: $0:t : output converter $AUDIOENHANCE reports error code $err_code - exiting" >> /dev/stderr
    rm -rf ${TEMP}*  >& /dev/null
    exit $err_code
endif

# at this point a RIFF WAVE version of the input named like 
# should reside in ${TEMP}_SIGNAL.wav

# check for sample rate: TextGrid output and video processing 
# require the sample rate for timing
# conversion and it should match the BPF input (SAM entry)
if ( "$SAMPLERATE" == 1 ) set SAMPLERATE = `soxi -r "${TEMP}_SIGNAL.wav"`
if ( $status != 0 ) then 
  echo "ERROR: ${0:t} : cannot determine sampling rate from SIGNAL ${TEMP}_SIGNAL.wav ; as an override rate can be given on the command line (SAMPLERATE=<sampling rate>) - exiting" >> /dev/stderr
  rm -rf ${TEMP}*  >& /dev/null
  exit 1
endif
if ( $v > 0 ) echo "DEBUG: ${0:t} : determined signal sampling rate $SAMPLERATE"

if ( "$TEXT" != "" ) then
  # match rate to SAM entry in input BPF
  egrep -q "^SAM:" "$TEXT"  
  if ( $status == 0 ) then 
    set samrate = `egrep "^SAM:" "$TEXT" | awk '{print $2}'`
    if ( $SAMPLERATE != $samrate ) then 
      echo "WARNING: ${0:t} : mismatch sampling rate of signal $SAMPLERATE and BPF input $samrate  - using signal rate $SAMPLERATE and trying to continue"  >> /dev/stderr
    else
      if ( $v > 1 ) echo "DEBUG: ${0:t} : sampling rate of signal $SAMPLERATE and BPF input $samrate match" 
    endif
  else
    if ( $v > 1 ) echo "DEBUG: ${0:t} : input BPF contains no entry SAM sampling rate" 
  endif
endif

# diarization
if ( $v > 0 ) echo "DEBUG: ${0:t} : start diarization $DIARPYNOTE"
if ( $v_1 > 0 ) echo "DEBUG: ${0:t} : $DIARPYNOTE --verbosity $v_1 --INP ${TEMP}_SIGNAL.wav --OUT ${TEMP}_SPD.par --num-speakers $speakNumber --min-silence-length $minSilenceLength --min-voiced-length $minVoicedLength --preference $preference"
$DIARPYNOTE --verbosity $v_1 --INP ${TEMP}_SIGNAL.wav --OUT ${TEMP}_SPD.par --num-speakers "$speakNumber" --min-silence-length "$minSilenceLength" --min-voiced-length "$minVoicedLength" --preference "$preference"
#if ( $v_1 > 0 ) echo "DEBUG: ${0:t} : $DIARPYNOTE --INP ${TEMP}_SIGNAL.wav --OUT ${TEMP}_SPD.par --num-speakers $speakNumber --min-silence-length $minSilenceLength --min-voiced-length $minVoicedLength"
#$DIARPYNOTE --INP ${TEMP}_SIGNAL.wav --OUT ${TEMP}_SPD.par --num-speakers "$speakNumber" --min-silence-length "$minSilenceLength" --min-voiced-length "$minVoicedLength"
set err_code = $status
if ( $err_code != 0 ) then
  echo "ERROR: $0:t : SD method $DIARPYNOTE reports error code $err_code - exiting" >> /dev/stderr
  rm -rf ${TEMP}*  >& /dev/null
  exit $err_code
endif

# check/correct last segment so that it does not exceed length of signal
set durSamSignal = `soxi -s "${TEMP}_SIGNAL.wav"` 
set begSamLast = `grep '^SPD:' ${TEMP}_SPD.par | tail -n 1 | awk '{print $2}'` 
set durSamLast = `grep '^SPD:' ${TEMP}_SPD.par | tail -n 1 | awk '{print $3}'`
set endSamLast = $begSamLast
@ endSamLast += $durSamLast 
@ endSamLast += 1
if ( $endSamLast > $durSamSignal ) then
  # last SPD segment exceeds end of file -> correct
  set durSamDiff = $endSamLast
  @ durSamDiff -= $durSamSignal
  @ durSamLast -= $durSamDiff
  if ( $v > 0 ) echo "DEBUG: ${0:t} : correcting the last segment by $durSamDiff samples"
  set lblLast = `grep '^SPD:' ${TEMP}_SPD.par | tail -n 1 | awk '{print $4}'`
  set numLines = `cat ${TEMP}_SPD.par | wc -l`
  @ numLines --
  head -n $numLines ${TEMP}_SPD.par >! ${TEMP}_SPD.par.tmp
  printf "SPD:\t%s\t%s\t%s\n" $begSamLast $durSamLast $lblLast >> ${TEMP}_SPD.par.tmp
  mv ${TEMP}_SPD.par.tmp ${TEMP}_SPD.par 
endif
# re-factor the speaker labels to 'S1', 'S2', ... in the order as they first appear in SPD
# and patch the speaker by the names provided in option list 'speakMatch' if set
if ( $v > 0 ) then
  if ( "$speakMatch" == "" ) then
    echo "DEBUG: ${0:t} : re-ordering speaker labels according to ascending appearance"
  else
    echo "DEBUG: ${0:t} : re-ordering speaker labels according to ascending appearance and patching label names using speakMatch = $speakMatch"
  endif
endif
awk -v SPD=${TEMP}_SPD.par -v speakMatch="$speakMatch" -f $SOURCE/${0:t}_reorder.awk ${TEMP}_SPD.par >! ${TEMP}_SPD.par.tmp
mv ${TEMP}_SPD.par.tmp ${TEMP}_SPD.par

outputBPF:
# compose output BPF
if ( "$TEXT" != "" ) then
  # check for existing SPD tier in input BPF (e.g. from previous SD)
  grep -qs '^SPD:' ${TEMP}_BPFOUT.par
  if ( $status == 0 ) echo "WARNING: $0:t : SPD tier in input BPF (TEXT) is over-written" >> /dev/stderr
  # remove SPD from input
  grep -v '^SPD:' ${TEMP}_BPFOUT.par >! ${TEMP}_BPFOUTTEMP.par
  mv ${TEMP}_BPFOUTTEMP.par ${TEMP}_BPFOUT.par
  # write SPD tier to output
  grep '^SPD:' ${TEMP}_SPD.par >> ${TEMP}_BPFOUT.par
  # if ORT/MAU are in BPF input, create SPK tier
  # (technically we need only the MAU tier to do this, but we also check for the 
  # ORT tier, because if this is present, we can safely assume that the MAU tier has word links)
  set ortmauflag = 0
  grep -q '^ORT:' "$TEXT"
  @ ortmauflag += $status
  grep -q '^MAU:' "$TEXT"
  @ ortmauflag += $status
  if ( $ortmauflag == 0 ) then
    if ( $v > 0 ) echo "DEBUG: $0:t : adding SPK layer based on overlap between MAU and SPD "
    # check for existing SPK tier in input BPF (e.g. from ASR module)
    grep -qs '^SPK:' ${TEMP}_BPFOUT.par
    if ( $status == 0 ) echo "WARNING: $0:t : SPK tier in input BPF (TEXT) is over-written" >> /dev/stderr
    grep -v '^SPK:' ${TEMP}_BPFOUT.par >! ${TEMP}_BPFOUTTEMP.par
    mv ${TEMP}_BPFOUTTEMP.par ${TEMP}_BPFOUT.par
    grep '^MAU:' "$TEXT" | awk -v SPD=${TEMP}_SPD.par -f $SOURCE/${0:t}_SPK.awk >> ${TEMP}_BPFOUT.par
    if ( $status != 0 ) then
      echo "ERROR: $0:t : ${0:t}_SPK.awk reports error code != 0 - exiting" >> /dev/stderr
      rm -rf ${TEMP}*  >& /dev/null
      exit 1
    endif
  else
    echo "WARNING: $0:t : input BPF does not contain ORT/MAU tier combination - cannot create SPK tier" >> /dev/stderr 
  endif
else
  if ( $v > 0 ) echo "DEBUG: $0:t : output is single SPD tier"
  cp ${TEMP}_SPD.par ${TEMP}_BPFOUT.par
endif


# output conversion
if ( "$OUTFORMAT" == "par" ) then
  cp ${TEMP}_BPFOUT.par "$OUT"
else
  if ( $v > 0 ) echo "DEBUG: ${0:t} : convert output BPF to $OUTFORMAT format using converter $ANNOTCONV"
  mkdir ${TEMP}_SPEAKDIARDIR
  cp ${TEMP}_BPFOUT.par ${TEMP}_SPEAKDIARDIR/${SIGNAL:t:r}.par
  $ANNOTCONV v=$v_1 INP=${TEMP}_SPEAKDIARDIR/${SIGNAL:t:r}.par OUT="$OUT" outFormat="$OUTFORMAT" SAMPLERATE=$SAMPLERATE
  set err_code = $status
  if ( $err_code != 0 ) then 
    echo "ERROR: $0:t : output converter $ANNOTCONV reports error code $err_code - exiting" >> /dev/stderr
    rm -rf ${TEMP}*  >& /dev/null
    exit $err_code
  endif
endif

# clean up
rm -rf  ${TEMP}* >& /dev/null
if ( $v > 0 ) echo "DEBUG: ${0:t} : normal termination on `date`"
exit 0

