#!/bin/csh

# This script is used to convert a PhonDat speech signal file into
# a SAM description file and a SAM speech file (raw file)
# for usage with SAM standard software.

# F. Schiel	05.08.96 / 05.08.96

# Description:
# The script strips off all PhonDat header parts from the input file and
# creates a 'raw' speech file without header named by the same prefix
# as the input file, but the suffix '.SES'
# E.g.  awed5480.16  ->  awed5480.SES
# The necessary contents of the PhonDat header is extracted into a SAM
# compatible description file named with the same prefix as the input file,
# but the suffix '.SEO'. 
# The output files are created in the same location as the input file.

# This script requires
# - installed test_pho
# - read/write access to /tmp
# - read/write access to the directory where the input file is stored

# Usage:
# pho2sam phondat-file1 [phondat-file2 ...]

if ( $1 == "" ) then
  echo usage:  $0 phondat-file1 \[phondat-file2 ...\]
  exit 1
endif

while ($1 != "") 
  set outfile = ${1:r}.SES
  set labelfile = ${1:r}.SEO
  set dir = ${1:h}
  if ($dir == $1) then
    set dir =
  endif
#debug  echo dir = $dir
#debug  echo input = $1
#debug  echo outspeech = $outfile
#debug  echo outdescr = $labelfile
  test_pho v=1 $1 >! /tmp/$$.tmp
  if ($status < 0) then
    echo ERROR: the input file $1 is not a valid phondat file - skipping
    shift
    break
  endif
  if ($status > 0) then
    echo ERROR: the input file $1 has errors - trying to continue
  endif
#debug  cat /tmp/$$.tmp
  set nhblks = `grep anz_header /tmp/$$.tmp | awk '{print $2}'`
  set rate = `grep isf /tmp/$$.tmp | awk '{print $2}'`
  set day = `grep 'day:' /tmp/$$.tmp | awk '{print $2}'`
  if ($day < 10) then
    set day = 0$day
  endif
  set month = `grep 'month:' /tmp/$$.tmp | awk '{print $2}'`
  if ($month < 10) then
    set month = 0$month
  endif
  set year = `grep  'year:' /tmp/$$.tmp | awk '{print $2}'`
  if ($year < 2000) then
    @ year = $year - 1900
  else
    @ year = $year - 2000
  endif
  set bits = `grep  'bits:' /tmp/$$.tmp | awk '{print $2}'`
  @ bits = $bits + 12
  set sex = `grep  'sex:' /tmp/$$.tmp | awk '{print $2}'`
  if ($sex == "W" || $sex == "w") then
    set sex = F
  endif
  if ($nhblks > 1) then
    set orth = `cat /tmp/$$.tmp | sed '1,43d; 44q'`
    set cano = `cat /tmp/$$.tmp | sed '1,45d; 46q'`
  endif
#debug  echo nhblks = $nhblks
  dd if=$1 of=$outfile bs=512 skip=$nhblks >& /dev/null
  echo "LHD: pho2sam-1.0" >! $labelfile
  echo "FIL: label" >> $labelfile
  echo "TYP: " >> $labelfile
  echo "DBN: " >> $labelfile
  echo "VOL: " >> $labelfile
  echo "DIR: $dir" >> $labelfile
  echo "SRC: ${1:t}" >> $labelfile
  echo "CMT: This SAM description file was automatically produced by $0" >> $labelfile
  echo "SAM:" $rate >> $labelfile
  echo "RED: ${month}/${day}/${year}" >> $labelfile
  echo "RET: " >> $labelfile
  echo "REP: " >> $labelfile
  echo "SNB: 2" >> $labelfile
  echo "SBF: 01" >> $labelfile
  echo "SSB: $bits" >> $labelfile
  echo "NCH: 1" >> $labelfile
  echo "SPI: $sex" >> $labelfile
  echo "LBD: " >> $labelfile
  echo "ELF: " >> $labelfile
  if ($nhblks > 1) then
    echo "CMT: Orthography:" >> $labelfile
    echo "CMT: $orth" >> $labelfile
    echo "CMT: Canonic Transcript:" >> $labelfile
    echo "CMT: $cano" >> $labelfile
  endif
  /bin/rm /tmp/$$.tmp

  shift
end
