#!/bin/tcsh

# this script reads X-WAVES word segmentation files from commandline
# and converts them into BPF files with the same base name but 
# extension '.par'.
# Each label is assumend to be an orthographic word label and is mapped 
# to an BPF ORT entry; parallel thereof we create a TRN entry bearing 
# the timing information.

# The resulting BPF can be passed through g2p to add a KAN tier, and then 
# through MAUS (WebMAUS with Option 'Chunk Segmentation = yes') to add
# a MAU tier (where MAUS segments only in each word segment)

# To determine the sampling rate, you can either give the sampling rate as 
# command line option 'rate=22050', of the script will remove the extension 
# from the X-WAVES file and look for a WAV file with the same basename in the
# same location, to get the sample rate via soxi (sox must be installed!).


# usage: x-wave_wordseg_to_bpf [v=0][rate=samplerate] file1.words [file2.words ...]

set SOURCE = /homes/schiel/Desktop
##############################################

set rate = ""
set chunksize = 1
set v = 0

# Actually do the argument parsing here

while ( "$1" != "" )
	switch ("$1")
	case *=*:
		set key = `echo $1 | awk -F= '{ print $1 }'`
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

if ( $1 == "" ) then 
  echo "usage: $0 [v=0][rate=samplerate][chunksize=1] file1.words [file2.words ...]"
  echo "       transforms X-WAVES word segmentation files into BPF files"
  echo "       chunks in the TRN tier are grouped in chunksize words each"
  exit 1
endif  

while ( $1 != "" )
  if ( ! -e $1 ) then 
    echo "ERROR: cannot find input file $1 - exiting"
    exit 1
  endif  
  if ( $v > 0 ) echo "Working on $1"
  set utt = ${1:r}
  if ( $rate == "" ) then
    set wav = $utt.wav
    if ( ! -e $wav ) then 
      echo "ERROR: cannot determine sample rate - 'rate' not given and wav file not found - exiting"
      exit 1
    endif
    set srate = `soxi $wav | grep 'Sample Rate' | awk '{print $4}'`
    if ( $status != 0 || $srate == "" ) then 
      echo "ERROR: cannot determine sample rate using soxi on $wav - exiting"
      exit 1
    endif
  else
    set srate = $rate  
  endif
  if ( $v > 0 ) echo "Sampling rate: $srate"
  set bpf = $utt.par
  if ( -e $bpf ) rm -f $bpf
  touch $bpf
  if ( $status != 0 ) then 
    echo "ERROR: cannot create output BPF $bpf - exiting"
    exit 1
  endif  
  # make BPF header
  printf "LHD: Partitur 1.3\nSAM: %d\nLBD:\n" $srate >> $bpf
  # make ORT tier
  cat $1 | tr -d '\r' | awk -v SRATE=$srate -v CHUNKSIZE=$chunksize -f $0.awk >> $bpf
  shift
end

exit 0

