#!/bin/tcsh 

# transforms ASR BPF files from command line into emuR *_annot.json
# files: only one ITEM level 'ORT', and optionally one ITEM level 'SPK'

# this is NOT general purpose converter, but a highly specialized
# tool to convert the output of runASR into a emuDB usable *_annot.json file.
# The input *.par files must
# - have SAM: entry
# - optionally have a SAO: entry
# - have an ORT: tier 
# - optionally have a SPK: tier
# - have the same base name as the corresponding signal file (not checked!)
#   so that the entries in the *_annot.json regarding bundle name and signal
#   are correct
# All other tiers are ignored; files not conforming those conditions
# cause an ERROR message on stderr and exit code 1.

# Compatible with runASR 2.1 and higher

# Accepted <body>.par files are converted to <body_annot.json files,
# in the dir given in OUTDIR or in the same location as the input 
# *.par file if OUTDIR is not set. Resulting *_annot.json files
# are checked against the schema server given in SCHEMA, if option 
# validate=true.

# The resulting emuR *_annot.json files describe a structure of two layers:
# bundle,source -> ORT,SPK
# where bundle contains the utterance base name (*) of the input *.par and
# source is always 'asr'; the entry for the signal file is the basename + .wav.

# This script may be used to convert single files or within a wrapper,
# that converts a BPF collection, place the resulting *_annot.json 
# files in a appropriate Session/Bundle structure and creates an 
# appropriate emuDB file.

#set SOURCE = /homes/schiel/MAUS/TOOL
set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored
                           # (even if we start via a symbolic link)

#echo SOURCE = $SOURCE > /dev/stderr

set OUTDIR = ""
#set SCHEMA = "http://webapp:9263/_annot"
set SCHEMA = "https://webapp2.phonetik.uni-muenchen.de:17890/_annot"

set v = 0
set force = FALSE
set validate = TRUE


# Actually do the argument parsing here

while ( "$1" != "" )
	switch ("$1")
	case *=*:
		set key = `echo $1 | cut -d= -f1`
                #check if option is known (set)
                eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then
                  echo "ERROR: unknown option $key - exiting"  > /dev/stderr
                  exit 1
                endif
		set val = `echo $1 | cut -d= -f2`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

# boolean variable check; define all boolean input parameters here

set bool = ( force validate )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val )
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  default:
    echo "Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no'"  > /dev/stderr
    exit 1
  endsw
end

if ( $1 == "" ) then 
  echo "usage: $0 [OUTDIR=dir][SCHEMA=URL-annot-val-server][validate=FALSE] file1.par [file2.par ...]" > /dev/stderr
  echo "       transforms ASR generated BPFs with tier ORT[,SPK] into emuR *_annot.json files" > /dev/stderr
  echo "       if validate=TRUE, the resulting *_annot.json are validated against the validator" > /dev/stderr
  echo "       given in the URL 'SCHEMA'" > /dev/stderr
  exit 1
endif

while ( "$1" != "" ) 
  set par = $1
  if ( $v > 0 ) echo "DEBUG: ${0:t} : working on $par"
  if ( ! -e "$par" ) then 
    echo "ERROR: cannot find input BPF $par - exiting" > /dev/stderr
    exit 1
  endif
  if ( "${par:e}" != "par" && "${par:e}" != "PAR" && "${par:e}" != "emupar" ) then 
    echo "WARNING: input BPF $par:t does not have standard extension par/PAR," > /dev/stderr
    echo "         could be that you are processing the wrong files?" > /dev/stderr
  endif
  # check if input is suitable
  set SAO = ""
  grep -q '^SAO:' $par
  if ( $status == 0 ) set SAO = `grep '^SAO:' $par | sed 's%^SAO:[ 	]*%%'`
  set SAMPLERATE = `grep '^SAM:' $par | awk '{print $2}'`
  if ( $SAMPLERATE == "" ) then
    echo "ERROR: cannot read sample rate (SAM:) from input $par - exiting" > /dev/stderr
    exit 1
  endif 
  if ( $v > 0 ) echo "DEBUG: ${0:t} : input is assumed to be flat (ORT, optional SPK)"
  grep -q "^ORT:" "$par"
  if ( $status != 0 ) then 
    echo "ERROR: input $par contains no ORT tier - exiting" > /dev/stderr
    exit 1
  endif
  set ortmax = `grep '^ORT:' "$par" | awk 'BEGIN{ortmax=0}{if($2>ortmax)ortmax=$2}END{print ortmax}'`
  # check for optional speaker diarization
  set spkmax = 0
  grep -q "^SPK:" "$par" 
  if ( $status == 0 ) then
    if ( $v > 0 ) echo "DEBUG: ${0:t} : found SPK tier in input"
    set spkmax = `grep '^SPK:' "$par" | awk 'BEGIN{ortmax=0}{if($2>ortmax)ortmax=$2}END{print ortmax}'`
    if ( $spkmax != $ortmax ) then 
      echo "WARNING: ${0:t} : mismatch between ORT and SPK tier - ignoring SPK tier" > /dev/stderr
      set spkmax = 0
    endif
  endif
  # determine output file
  if ( $OUTDIR == "" ) then 
    set out = "${par:r}_annot.json"
  else
    set out = "${OUTDIR}/${par:t:r}_annot.json"
  endif
  if ( -e "$out" && $force == "FALSE" ) then 
    echo "ERROR: output JSON $out already exists, remove or use option force=true - exiting" > /dev/stderr
    exit 1
  endif
  echo -n "" >! "$out" # empty output file; do not attempt to remove it because this might fail even if you can write to the file
  if ( $status != 0 ) then 
    echo "ERROR: cannot write to output file $out - exiting" > /dev/stderr
    exit 1
  endif

  # start conversion
  set nam = "${par:t:r}"
  # print preliminaries
  printf '{\n  "name": "%s",\n  "annotates": "%s.wav",\n  "sampleRate": %d,\n  "levels": [\n' "$nam" "$nam" "$SAMPLERATE" >! "$out"
  # bundle level + attribute source
  printf '        {\n          "name": "bundle",\n          "type": "ITEM",\n          "items": [\n' >> "$out"
  if ( "$SAO" == "" ) then 
    printf '              {\n                    "id": 0,\n                    "labels": [\n                        {\n                            "name": "bundle",\n                            "value": "%s"\n                        },\n                        {\n                            "name": "source",\n                            "value": "asr"\n                        }\n                    ]\n              }\n          ]\n        },\n' "$nam" >> "$out"
  else
    printf '              {\n                    "id": 0,\n                    "labels": [\n                        {\n                            "name": "bundle",\n                            "value": "%s"\n                        },\n                        {\n                            "name": "source",\n                            "value": "asr"\n                        },\n                        {\n                            "name": "SAO",\n                            "value": "%s"\n                        }\n                    ]\n              }\n          ]\n        },\n' "$nam" "$SAO" >> "$out"
  endif

  # start ORT level item array
  if ( $spkmax == 0 ) then
    printf '        {\n          "name": "ORT",\n          "type": "ITEM",\n          "items": [\n' >> "$out"
    # go over ORT tier entries, create ORT level items (with KAN and optional KAS label), use word link 
    # numbers from input BPF plus 1 as IDs, so that we can refer to these later in the MAU level
    awk -f $SOURCE/${0:t}_0.awk "$par" >> "$out"

    # close ORT level array
    printf "          ]\n        }\n" >> "$out"

    # close level array
    printf "    ],\n"  >> "$out"
  else
    printf '        {\n          "name": "ORT",\n          "type": "ITEM",\n          "items": [\n' >> "$out"
    # go over ORT tier entries, create ORT level items (with KAN and optional KAS label), use word link 
    # numbers from input BPF plus 1 as IDs, so that we can refer to these later in the MAU level
    awk -f $SOURCE/${0:t}_1.awk "$par" >> "$out"

    # close ORT level array
    printf "          ]\n        }\n" >> "$out"

    # close level array
    printf "    ],\n"  >> "$out"

  endif
  # begin links array
  printf '    "links": [\n' >> "$out"
  # link words to the single item in bundle level (ID = 0); go over ORT in BPF
  awk -f $SOURCE/${0:t}_2.awk "$par" >> "$out"

  # close links array and file
  printf "    ]\n}\n" >> "$out"


  # validate : needs a validation server, the URL must be given in SCHEMA
  if ( $validate == "TRUE" ) then
    if ( $SCHEMA != "" ) then 
      if ( $v > 0 ) echo "DEBUG: ${0:t} : validating $out using $SCHEMA"
      curl -s -H "Content-Type: applicationjson" --data-binary "@$out" "$SCHEMA" | grep -q 'SUCCESS'
      if ( $status != 0 ) then
        echo "ERROR: _annot.json does not validate or schema server not functional - exiting" > /dev/stderr
        echo "$out"  > /dev/stderr
        curl -s -H "Content-Type: applicationjson" --data-binary "@$out" "$SCHEMA"  > /dev/stderr
        exit 1
      else
        if ( $v > 0 ) echo "  -> Ok."
      endif
    else
      which jsonlint >& /dev/null
      if ( $status == 0 ) then
        if ( $v > 0 ) echo "DEBUG: ${0:t} : validating $out using jsonlint"
        jsonlint "$out"
        if ( $status != 0 ) then
          echo "ERROR: _annot.json does not validate syntactially - exiting" > /dev/stderr
          echo "$out" > /dev/stderr
          exit 1
        else
          if ( $v > 0 ) echo "  -> Ok."
        endif
      else
        echo "WARNING: $0 : cannot validate _annot.json because neither jsonlint nor a schema validator are functional"  > /dev/stderr
      endif
    endif
  endif

  shift
end

exit 0
 
