#!/bin/tcsh 

# helper to transform a TDF describing a single segmentation tier into 
# a praat compatible TextGrid (normal format)

# Author F. Schiel (schiel@bas.uni-muenchen.de)

# Version 
set VERSION =  1.0  # (compatible with maus 4.25)

# To get the version number type in 'tdf2TextGrid --version'

set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored
                           # (even if we start via a symbolic link)
set TEMP = /tmp
setenv LANG en_US.UTF-8  # defines the behavior of text processing, sorting etc.
##########################################################################

set TDF = ""
set OUT = ""
set TG_tier_name = "Default_Tier_Name"

set v = 0

if ( $1 == '--version' ) then 
  echo $VERSION
  exit 1
endif


# Actually do the argument parsing here

#echo parsing commandline
#echo "$0 $*"
while ( "$1" != "" )
	switch ("$1")
	case *=*:
		set key = `echo $1 | awk -F= '{ print $1 }'`
		#check if option is known (set)
		eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then 
		  echo "ERROR: unknown option $key - exiting" > /dev/stderr
		  exit 1
		endif  
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

# boolen variable check; define all boolean input parameters here

set bool = ( )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val ) 
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  case force:
    eval set $booleanvariable = force
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no'" >> /dev/stderr
    exit 1
  endsw    
end




# General remarks:

# Input TDF
# TDF that should be converted. It must contain 3 header lines that are ignored,
# and 13 columns separated by TABs in the body
# where column 3 contains the start of segent in
# sec, column 4 the end, and column 8 the transcript label of the segment.

# Option OUT
# Write to OUT=file instead of inputbody.TextGrid

# Exit codes

# 0 : everything seems ok
# 1 : serious error
# 4 : main arguments missing, printing help message to stdout


if ( ! -d $TEMP ) then 
  echo "ERROR: cannot find temporary dir $TEMP - exiting" >> /dev/stderr
  echo "       please create such a dir and define it in the script" >> /dev/stderr
  echo "       or use the option 'TEMP=...'" >> /dev/stderr
  exit 1
endif  
set PID = $$_`date "+%s"`_

if ( $TDF == "" ) then 
  echo "usage: $0 TDF=<TDF> [TG_tier_name=<...>][OUT=<out.TextGrid>]" >> /dev/stderr
  exit 4
endif  
if ( ! -e $TDF ) then 
  echo "ERROR: ${0:t} : cannot open input TDF $TDF - exiting" >> /dev/stderr
  exit 1
endif

if ( $OUT == "" ) set OUT = ${TDF:r}.TextGrid
if ( $v > 0 ) echo "DEBUG: ${0:t} : Extracting into TextGrid $OUT"
touch $OUT >& /dev/null
if ( $status != 0 ) then 
  echo "ERROR: ${0:t} : cannot write output to $OUT - exiting" > /dev/stderr
  exit 1
endif
rm -f $OUT
set MAXTIME = `tail -n 1 $TDF | cut -f 4`
if ( $v > 1 ) echo "DEBUG: ${0:t} : MAXTIME = $MAXTIME"

# write header
cat <<END | sed "s/##XMAX/$MAXTIME/" >! $OUT
File type = "ooTextFile"
Object class = "TextGrid"

xmin = 0 
xmax = ##XMAX
tiers? <exists> 
size = 1
item []:
END


# keeping track which tier we are actually writing at the moment
set curr_tiers = 1 

# write init of tier
  cat <<END | sed "s/##NAME/${TG_tier_name}/" | sed "s/##XMAX/$MAXTIME/" >> $OUT
    item [$curr_tiers]:
        class = "IntervalTier"
        name = "##NAME"
        xmin = 0
        xmax = ##XMAX
        intervals: size = ##NUMSEG
END

#  write segments
# change the tail option to '1', if we don't have a column header line in TDF
tail -n +4 $TDF | awk 'BEGIN{FS="\t";segcnt=1;nextXmin=0.0}{xmin=$3;xmax=$4;if(xmin!=nextXmin){printf("        intervals [%d]:\n            xmin = %f\n            xmax = %f\n            text = \"\"\n",segcnt,nextXmin,xmin);segcnt++} printf("        intervals [%d]:\n            xmin = %f\n            xmax = %f\n            text = \"%s\"\n",segcnt,xmin,xmax,$8);nextXmin=xmax;segcnt++}' >> $OUT

# read number of segments (including inserted silence intervals)
set NUMSEG = `grep 'text = "' $OUT | wc -l`
if ( $v > 1 ) echo "DEBUG: ${0:t} : NUMSEG = $NUMSEG"
# insert number of segments in result TG
sed "s/##NUMSEG/$NUMSEG/" $OUT >! $TEMP/${PID}_OUT
mv $TEMP/${PID}_OUT $OUT

rm -rf $TEMP/${PID}* >& /dev/null
exit 0

