#!/bin/csh 
# creates preprocessing and alignment

# 

# usage: mausseg CD=<cd-root> <dirpattern> <filepattern>
# If the pattern DIRECTORY or FILE is omitted, the script expands to 
# all possible directories in the volume and all possible files in the
# directories.
# Typical call for a total volume (VM21):
# mausseg CD=/cdrom/vm21 '[a-z][0-9][0-9][0-9][a-z]'
# Typical call for a single directory:
# mausseg CD=/cdrom/vm21 g101a
# Typical call for a single file:
# mausseg CD=/cdrom/vm21 g101a g101a000.a16
# statistik: ~kip/projekte/autoseg/erg/mansegreg/rml-0.95.rul

set MLF=/homes/beringer/VERBMOBIL/MAUSSEG/autoseg/tools/kan2mlf2.awk
set HTKINVENTAR=/homes/beringer/VERBMOBIL/MAUSSEG/autoseg/tools/htkinventarnew.txt    #/homes/kip/htkinventar-1.txt
set HTKSOURCE=/homes/htk #/export/home/HTK_V2.1/bin.sun4_solaris
set HTKCONFIG=/homes/beringer/VERBMOBIL/MAUSSEG/HTK/DEF/preproconfig
set INVENTAR = /homes/beringer/VERBMOBIL/MAUSSEG/autoseg/tools/vminventar
set PHONRUL=/homes/beringer/VERBMOBIL/MAUSSEG/PHONRUL/regeln9.nrul
set GRAFERZEUGUNG=/homes/kip/projekte/autoseg/word_var-2.0/word_var
set BUILDXSLF=/homes/beringer/VERBMOBIL/MAUSSEG/ALIGNMENT/testset/buildxslf
set PREPROCESSING=/homes/beringer/VERBMOBIL/MAUSSEG/PREPROCESS/preprocess_pd2
set SALIGN=/homes/beringer/VERBMOBIL/MAUSSEG/ALIGNMENT/alignment
set TARGET=/data/data1/beringer
set PARTITURSOURCE = ""
set CD = ""
set DIRECTORY = ""
set FILE = ""
set SFORMAT = PHONDAT
set MLF_FILE = ""
set PARTITURROOT = /raid/r35/BAS

# set verbose # debugging

# Actually do the argument parsing here

while ( "$1" != "" )
        switch ("$1")
        case *=*:
                set key = `echo "$1" | cut -d= -f1`
                set val = `echo "$1" | cut -d= -f2`
                eval "set $key "= \'"$val"\'
                unset key val
                shift
                breaksw
        default:
                break
        endsw
end

# end option parser

if ( $CD == '' ) then
 echo 'usage: autotest CD=volume [icsioptions] [dirpattern] [filepattern]'
 echo CAUTION: If dirpattern or filepattern is ommited, the script will expand to '*'
 exit 1
endif
set cd = ${CD:t}

if ( $PARTITURSOURCE == '' ) then
  set PARTITURSOURCE = $PARTITURROOT/`echo $cd | gawk '{ print toupper($1) }'`/par*
endif
if ( ! -d $PARTITURSOURCE ) then
  echo ERROR: $PARTITURSOURCE is not a directory for par files
  exit 1
endif

if ( "$1" == '' ) then
  set DIRECTORY = '*'
  echo script will expand to all directories
else 
  set DIRECTORY = "$1"
endif
if ( "$2" == '' ) then
  set FILE = '*'
  echo script will expand to all files
else 
  set FILE = "$2"
endif



# delete old files if present

echo using $TARGET as target directory
if ( -e $TARGET/slflist ) then
  /bin/rm $TARGET/slflist
  touch $TARGET/slflist
endif

# intialize MLF file

if ( $MLF_FILE == '' ) set MLF_FILE=$TARGET/$cd.mlf
if ( -e $MLF_FILE ) /bin/rm -f $MLF_FILE
touch $MLF_FILE >& /dev/null
if ( $status != 0 ) then
  echo ERROR: cannot write MLF to $MLF_FILE
  exit 1
endif
echo "#\!MLF\!#" >! $MLF_FILE

cd $CD

echo DIRECTORY = "$DIRECTORY"
echo FILE = "$FILE"

# pass1: create MLF and htk files

echo PASS1:
foreach n ( $DIRECTORY )
	cd $n

        # create targetsubdir if not already there

         if ( ! -e $TARGET/$n ) mkdir $TARGET/$n
	 foreach sf ( $FILE ) 
		 set z = ${sf:r}
                 echo processing $n/$z
		 set HTK =  $TARGET/$n/$z.htk
	         set SLF=$TARGET/$n/$z.slf

		 # Finde zugehoeriges Partitur file und erzeuge MLF-Teil

                 if ( ! -e $PARTITURSOURCE/$n/${z}.par ) then
		   test_phondats $sf
		   if ($status != 0) then
		    test_nist $sf
			set nist = head -1 $sf
			if ($nist != 'NIST_1A') then
			    rmdir $TARGET/$n
			    break
			endif
		    endif
                   echo "ERROR: cannot find Partitur-File:"
                   echo $PARTITURSOURCE/$n/${z}.par
                 endif

		 echo $HTK >> $TARGET/slflist
		 echo '"'$TARGET/$n/$z'"' >>  $MLF_FILE
		 cat $PARTITURSOURCE/$n/${z}*.par |\
		   gawk -v INVENTAR=$INVENTAR -f $MLF  >> $MLF_FILE

                #  test if htk file already present. if not, create one

                 if ( ! -e $HTK ) then
                   switch ( $SFORMAT )
                     case PHONDAT:
                       echo test_phondats $sf
                       test_phondats $sf >& /dev/null
                       if ( $status != 0 ) then
                         echo ERROR: speech input is no PhonDat file
                         echo $sf
                         exit 1
                       endif
                       phondat2nist phonfile=$sf nistfile=$TARGET/$n/${z}.nis
                       HCopy -C $HTKCONFIG $TARGET/$n/${z}.nis $HTK
                       /bin/rm -f $TARGET/$n/${z}.nis
                       breaksw
                     case NIST:
                       HCopy -C $HTKCONFIG $sf $HTK
                       breaksw
                     default:
                       echo ERROR: unknown speech file format (SFORMAT) $SFORMAT
                       exit 1
                       breaksw
                   endsw
                 endif
	 end
         cd ..
 end

# MLF finished and htk files processed and ready for alignment

# now create *.slf files from MLF in one call
# word_var writes graphs to the filename label in the MLF with extension '.slf'

cd $TARGET
cd /homes/kip/projekte/autoseg/word_var-2.0/
if ($PHONRUL == "/homes/beringer/VERBMOBIL/MAUSSEG/PHONRUL/regeln9.nrul") then

$GRAFERZEUGUNG if=$MLF_FILE odir=$TARGET iv=$HTKINVENTAR rg=$PHONRUL jwk=-1 wwt=man
else

$GRAFERZEUGUNG if=$MLF_FILE odir=$TARGET iv=$HTKINVENTAR rg=$PHONRUL jwk=-1
endif

cd $TARGET

#if there is any .slf in the current TARGET then write it to its subdirectory

 foreach n ( $DIRECTORY )
     mv $FILE:r.slf $n

# now align all the htk files to the slf files
cd $CD
    sh -x $SALIGN slflist=$TARGET/slflist slfext=slf#xslf#wxslf


#now write a partitur

echo "rec2mau"
foreach file ($TARGET/$DIRECTORY/$FILE:r.rec)
csh -x ~beringer/VERBMOBIL/MAUSSEG/GENPARTITUR/rec2mau $file
cat $PARTITURSOURCE/$file:h:t/$file:r:t.par $file:r.par.cor > $TARGET/$file:h:t/$file:r:t
end

#add it to the current one

csh -x ~beringer/VERBMOBIL/MAUSSEG/autoseg/mau2par CD=$CD TARGET=$TARGET PARTITURSOURCE=$PARTITURSOURCE $DIRECTORY $FILE:r



foreach file ($TARGET/$DIRECTORY/$FILE:r.cor)
cat $PARTITURSOURCE/$DIRECTORY/$FILE:r.par $file |gawk '{if ($3 ~ "-[0-9]*") {print "MAU:", $1, $2, $3, $4} else {print $1, $2, $3, $4, $5}}' | sed -e 's/-[0-9]*/-1/g;'  > $file:r
end