#!/bin/tcsh -x

# Improve speech signals before BAS WebSevice Processing.

# This is the backend script for the BAS AudioEnhance service
# (runAudioEnhance). It reads a SIGNAL and performs several transformations
# mostly based on SoX that improve the speech signal for processing in 
# BAS WebServices. Depending on input and given options (in brackets) the service
# - extracts sound track from video input (always, if applicable),
# - converts non-RIFF sound formats into RIFF (always, using sox's extension recognition),
# - set bit width per sample to 16bits (always, if applicable),
# - normalize channels to -3dB (NORM=true),
# - merge multi-channel files into one channel (MONO=true),
# - re-sample to given sampling rate (RATE); RATE=0 : no re-sampling,
# - filters signal for constant background noise (NOISE); NOISE=0 : no filtering
#
# Motivation for this service:
# Users are often not aware that their recordings are not optimal for speech 
# processing. For instance, sampling rates and bits withs are too high and signal
# is stereo (although only one channel was recorded), format is video or non-RIFF, etc.
# This service can be used to 'enhance' speech signals before BAS WebService
# processing; it can be used as a stand-alone service, or at the very start of
# a processing pipeline (see service Pipeline).

# Programs required by this script: awk, wav2trn, sox, avconv, ffmpeg, ffprobe.

# The structure of this script is as follows:
# If video input, use ffmpeg to extract default sound track in generic sample rate.
# If MP3 input, convert to PCM (requires avconv, since sox does not support MP3 input)
# Then build a sox option string soxOpt and sox pipeline string soxPipe according to the following 
# command line options:
  - signal is not 16bit PCM : soxOpt '-e signed -b 16' 
# - RESAMPLE=<RATE> : soxOpt '-r <RATE>', RESAMPLE=0 : no resampling
# - NORM=true : soxPipe = 'gain -3dB', NORM=false : no normalizing
# - MONO=true : soxOpt = '-c 1', MONO=false : multi-channel output
# - NOISE=<AMOUNT> (AMOUNT=0...1) : filtering, AMOUNT=0 : no filtering (default)
#   Before sox pipe is operated, use wav2trn to determine BEG/END of speech in recording;
#   calculate noise profile 'noiseprof' from concatenated leading/trailing silence; then
#   add to soxPipe: 'noisered noiseprof <AMOUNT>'
# Finally, perform : sox -D input.ext $soxOpt output.wav $soxPipe
 

# Version 
set VERSION = 0.2

if ( $1 == '--version' ) then 
  echo $VERSION
  exit 0
endif

##########################################################################
set SCRIPT = `readlink -f "$0"`
set SOURCE = `dirname "$SCRIPT"`  # location where the script is stored 
                           # (even if we start via a symbolic link)
set TEMP = /tmp
setenv LANG en_US.UTF-8  # defines the behavior of text processing, sorting etc.
##########################################################################

# Pre-set commandline and other options

# converter helpers

# debugging
set v = 0 
set CLEAN = TRUE       # if set to false, temporary files are not purged

# command line
set SIGNAL = ""            # input to be converted
set OUT = ""               # converted output
set NORM = true
set MONO = true
set RESAMPLE = 16000
set NOISE = 0


# other
#
# ...
#
# -------------------------------------------------------------------------

#
# Exit codes
# 0 : everything seems ok
# 1 : serious error
# 4 : no arguments, printing help message to stdout
# 5 : missing or problem with necessary helper program

# Actually do the argument parsing here

#echo parsing commandline
while ( "$1" != "" )
	switch ("$1")
	case *=*:
		#set key = `echo $1 | cut -d= -f1`
		set key = `echo $1 | awk -F= '{ print $1 }'`
		#check if option is known (set)
		eval set checkoption = '$?'$key
                if ( $checkoption == 0 ) then 
		  echo "ERROR: unknown option $key - exiting" >> /dev/stderr
		  exit 1
		endif  
		#set val = `echo $1 | cut -d= -f2`
		set val = `echo $1 | awk -F= '{ print $2 }'`
		eval "set $key "= \'"$val"\'
		unset key val
		shift
		breaksw
        default:
		break
        endsw
end

# end option parser

# preliminaries
set beginsec = `date '+%s'`
set PID = "$$_${beginsec}_"
if ( $v > 0 ) echo "DEBUG: Starting $0:t on `date` (${beginsec}), TEMP/PID : $TEMP/$PID"
set v_minus = $v
if ( $v > 0 ) @ v_minus --  # debug level for sub-scripts

# boolean variable check; define all boolean input parameters here

set bool = ( NORM MONO CLEAN )
foreach booleanvariable ( $bool )
  eval set val = '$'$booleanvariable
  switch ( $val ) 
  case true:
    eval set $booleanvariable = TRUE
    breaksw
  case True:
    eval set $booleanvariable = TRUE
    breaksw
  case TRUE:
    eval set $booleanvariable = TRUE
    breaksw
  case 1:
    eval set $booleanvariable = TRUE
    breaksw
  case yes:
    eval set $booleanvariable = TRUE
    breaksw
  case Yes:
    eval set $booleanvariable = TRUE
    breaksw
  case YES:
    eval set $booleanvariable = TRUE
    breaksw
  case false:
    eval set $booleanvariable = FALSE
    breaksw
  case False:
    eval set $booleanvariable = FALSE
    breaksw
  case FALSE:
    eval set $booleanvariable = FALSE
    breaksw
  case 0:
    eval set $booleanvariable = FALSE
    breaksw
  case no:
    eval set $booleanvariable = FALSE
    breaksw
  case No:
    eval set $booleanvariable = FALSE
    breaksw
  case NO:
    eval set $booleanvariable = FALSE
    breaksw
  case force:
    eval set $booleanvariable = force
    breaksw
  default:
    echo "ERROR: ${0:t} : Boolean $booleanvariable=$val is not a boolean value. Use either '0,1,true,false,yes,no,(force)' - exiting" >> /dev/stderr
    exit 1
  endsw    
end

# check for helpers
set errCode = 0
which sox >& /dev/null
if ( $status != 0 ) set errCode = 1
which ffmpeg >& /dev/null
if ( $status != 0 ) set errCode = 1
which ffprobe >& /dev/null
if ( $status != 0 ) set errCode = 1
which avconv >& /dev/null
if ( $status != 0 ) set errCode = 1
which wav2trn >& /dev/null
if ( $status != 0 ) set errCode = 1
which awk >& /dev/null
if ( $status != 0 ) set errCode = 1
if( $errCode != 0 ) then 
  echo "ERROR: ${0:t} is missing one or more required programs sox, ffmpeg, ffprobe, avconv - exiting" >> /dev/stderr
  exit 5
endif
if ( $1 == '--help' ) then 
  awk '/  usage: /{pf=1}/  end usage/{pf=0}{if(pf==1)print}' $SOURCE/${0:t}   
  exit 4
endif

if ( "$SIGNAL" == "" || "$OUT" == "" ) then 
  echo "${0:t} : version $VERSION"
  cat <<ENDE

  usage: audioEnhance SIGNAL=inputFile OUT=outputFile [NORM=true][MONO=true][RESAMPLE=16000][NOISE=0]
  usage: audioEnhance --version
  usage: audioEnhance --help
  
  Improve speech signals before BAS WebSevice Processing.

  This is the backend script for the BAS AudioEnhance service
  (runAudioEnhance). It reads a SIGNAL and performs several transformations
  mostly based on SoX that improve the speech signal for processing in 
  BAS WebServices. Depending on input and given options (in brackets) the service
  - extracts sound track from video input (always, if applicable),
  - converts non-RIFF sound formats into RIFF (always, using sox's extension recognition),
  - set bit width per sample to 16bits (always, if applicable),
  - normalize channels to -3dB (NORM=true),
  - merge multi-channel files into one channel (MONO=true),
  - re-sample to given sampling rate (RATE); RATE=0 : no re-sampling,
  - filters signal for constant background noise (NOISE); NOISE=0 : no filtering
 
  Motivation for this service:
  Users are often not aware that their recordings are not optimal for speech 
  processing. For instance, sampling rates and bits withs are too high and signal
  is stereo (although only one channel was recorded), format is video or non-RIFF, etc.
  This service can be used to 'enhance' speech signals before BAS WebService
  processing; it can be used as a stand-alone service, or at the very start of
  a processing pipeline (see service Pipeline).

  ERRORs are reported to stderr and the exit code is >0.
  WARNINGs are reported to stderr and the exit code is 0
  end usage
  
ENDE

  exit 4
endif

# Pre-checks
if ( "$RESAMPLE" < 8000 || "$RESAMPLE" > 96000 ) then
  echo "ERROR: ${0:t} : RESAMPLE = $RESAMPLE is outside 8000 - 96000 - exiting" >> /dev/stderr
  exit 1
endif  
if ( ! -e "$SIGNAL" ) then
  echo "ERROR: ${0:t} : cannot read input signal file '$SIGNAL' - exiting" >> /dev/stderr
  exit 1
endif  
touch "$OUT"
if ( $status != 0 ) then
  echo "ERROR: ${0:t} : cannot write to output file '$OUT' - exiting" >> /dev/stderr
  exit 1
endif  

# start code

# Re-coding non RIF formats

# if video input extract soundtrack
if ( "${SIGNAL:e}" == "mpg" || "${SIGNAL:e}" == "mpeg" || "${SIGNAL:e}" == "mp4" || "${SIGNAL:e}" == "avi" || "${SIGNAL:e}" == "flv" || "${SIGNAL:e}" == "MPG" || "${SIGNAL:e}" == "MPEG" || "${SIGNAL:e}" == "MP4" || "${SIGNAL:e}" == "AVI" || "${SIGNAL:e}" == "FLV" ) then
  # video may comntain multiple soundtracks:
  # the following takes the default audio stream in the mp4 input file; this might be a problem in case
  # that a user wants to process another sound track (stream)
  # the output of ffprobe gives an iso language code 3 (e.g. 'eng') per stream, if more than one.
  # Video may contain more than one soundtrack or even none; therefore look how many channels
  # are there first and then select the default
  set ffprobeNchannels = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | wc -l`
  if ( $status != 0 || $ffprobeNchannels == "" ) then 
    echo "ERROR: ffprob failed, cannot determine number of channels of video input - exiting" >> /dev/stderr 
    set exitCode = 5
    goto clean
  endif
  if ( "$ffprobeNchannels" == 0 ) then
    echo "ERROR: ${0:t} : ffprobe detects no soundtrack found in $SIGNAL - exiting"
    set exitCode = 1
    goto clean
  else if ( "$ffprobeNchannels" == 1 ) then
    set rate = `ffprobe $SIGNAL |& grep 'Stream .* Audio' | head -n 1 | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
    if ( $status != 0 || $rate == "" ) then 
      echo "ERROR: ffprob failed, cannot determine sampling rate of video input - exiting" >> /dev/stderr 
      set exitCode = 5
      goto clean
    endif
  else
    # multiple sound tracks: select default
    set rate = `ffprobe $SIGNAL |& grep 'Stream .* Audio.*(default)' | sed 's/^.* \([0-9][0-9]*\) Hz.*$/\1/'`
    if ( $status != 0 || $rate == "" ) then 
      echo "ERROR: ffprob failed, cannot determine sampling rate of video input - exiting" >> /dev/stderr 
      set exitCode = 5
      goto clean
    endif
  endif
  # Extract the default soundtrack and pass the soundtrack through the pipe
  which ffmpeg >& /dev/null
  if ( $status != 0 ) then 
    echo "ERROR: ${0:t} : input is video, but ffmpeg is not installed to extract soundtrack - exiting" >> /dev/stderr
    set exitCode = 5
    goto clean
  endif
  if ( $v > 0 ) echo "DEBUG: ${0:t} : extracting audiotrack with ${rate}Hz sampling rate from video input"
  # we assume that '-ac 1' equals the default soundtrack (until now this always has been true):
  ffmpeg -loglevel quiet -y -i ${SIGNAL} -acodec pcm_s16le -ac 1 -ar $rate $TEMP/${PID}${SIGNAL:r:t}_soundtrack.wav
  if ( $status != 0 ) then
    echo "ERROR: ${0:t} : cannot convert input signal to RIFF WAVE ${rate}Hz 16bit PCM - exiting" >> /dev/stderr
    set exitCode = 5
    goto clean
  endif
  set SIGNAL = $TEMP/${PID}${SIGNAL:r:t}_soundtrack.wav 
  chmod 666 $SIGNAL
# if MP3, convert to PCM  
else if ( "${SIGNAL:e}" == "mp3" || "${SIGNAL:e}" == "MP3" ) then 
  # use avconv to convert since native sox does not support mp3
  avconv "$SIGNAL" ...
  set SIGNAL = ...
  set rate = `soxi -r "$SIGNAL"`
  set exitCode = $status
  if ( $exitCode != 0 || "$rate" == "" ) then
    echo "ERROR: $0:t : sox does not recognize avconv output (MP3 conversion) - exiting" >> /dev/stderr
    goto clean
  endif
# if another sound format, let sox try to recognize it; if this fails, we probably
# cannot process the input signal anyway
else
  set rate = `soxi -r "$SIGNAL"`
  set exitCode = $status
  if ( $exitCode != 0 || "$rate" == "" ) then
    echo "ERROR: $0:t : sox does not recognize signal input $SIGNAL - exiting" >> /dev/stderr
    goto clean 
  endif
endif  
# check bit width: we only process 16bit in BAS WebServices
if ( `soxi -b "$SIGNAL"` != 16 ) then
  set bitWidth = `soxi -b "$SIGNAL"`
  echo "WARNING: $0:t : input SIGNAL file is not 16bit encoded (${bitWidth}bit) - re-coding into 16bit" >> /dev/stderr
  sox "$SIGNAL" -e signed -b 16 "$TEMP/${PID}${SIGNAL:r:t}_16bit.wav"
  set SIGNAL = "$TEMP/${PID}${SIGNAL:r:t}_16bit.wav"
  chmod 666 "$SIGNAL"
endif

# By now the input signal should be a RIFF WAVE PCM 16bit or another sound format 
# recognized by sox with sampling rate $rate stored in $SIGNAL

# Building the sox arguments and the sox pipeline

set soxArg = " -q -D"    # run silent and always switch off dithering to make result reproducible
set soxArg = "$soxArg --multi-threaded"    # run multi-channel files in parallel
set soxArg = "$soxArg --buffer 131072"     # larger buffer for effective multithreading
set soxPipe = ""
set soxOutArg = ""

# first re-sample if applicable, because this saves us prpocessing time later
if ( RATE != 0 ) set soxPipe = "$soxPipe rate -v $RATE"
# if NORM is set, add gain effect to pipeline; this is done *before* the channel merge,
# so that each channel (-e) is normalized to -3dB!
if ( $NORM == "TRUE" ) set soxPipe = "$soxPipe gain -e -n -3"
# if MONO is set,... sox effect 'channels 1'
if ( $MONO == "TRUE" ) set soxPipe = "$soxPipe channels 1"


# if noise filtering is enabeld, make a noise profile from the leading/trailing
# silence intervals of the signal; negative values for NOISE are treated like zero
if ( "$NOISE" > 0 ) then
  if ( "$NOISE" > 1 ) then
    echo "WARNING: $0:t : noise amount (NOISE=${NOISE} is outside defined range 0...1 - setting noise amount to 1 (max. noise reduction)" >> /dev/stderr
    if ( "$NOISE" > 1 ) set NOISE = 1
  endif  
  # run wav2trn to determine begin/end of speech in samples
  set begSample = `wav2trn .... | cut -d ' ' -f 1`
  set durSample = `wav2trn .... | cut -d ' ' -f 2`
  set totalSample = `soxi -s "$SIGNAL"`
  set durNoise = $totalSample
  @ durNoise -= $durSample
  if ( $durNoise < 1000 ) then 
    echo "WARNING: $0:t : noise reduction (NOISE=${NOISE}) : leading/trailing silence intervals contain less than 1000 samples to estimate noise profile - this might result in a very poor noise reduction" >> /dev/stderr
  endif  
  if ( $durNoise < 100 ) then 
    echo "WARNING: $0:t : noise reduction (NOISE=${NOISE}) : leading/trailing silence intervals contain less than 100 samples to estimate noise profile - switching off noise reduction" >> /dev/stderr
    set NOISE = 0
  else
    sox "$SIGNAL" -n trim 0s ${begSample}s ${durSample}s noiseprof "$TEMP/${PID}${SIGNAL:r:t}_noiseprof" 
  endif  
  set soxPipe = "$soxPipe noisered $TEMP/${PID}${SIGNAL:r:t}_noiseprof"
endif

# finally run the sox pipeline
if ( $v > 0 ) echo "DEBUG: ${0:t} : sox pipeline: 'sox $soxArg $SIGNAL $soxOutArg $OUT $soxPipe'" 
sox $soxArg "$SIGNAL" $soxOutArg "$OUT" $soxPipe 
set exitCode = $status
if ( $exitCode != 0 ) goto clean 


# clean up 
clean:
set endsec = `date '+%s'`
if ( "$CLEAN" == "TRUE" ) rm -rf $TEMP/${PID}* >& /dev/null
if ( $v > 0 ) echo "DEBUG: $0:t : finished with exit = ${exitCode} at `date` (${endsec})"

exit $exitCode

