#!/usr/bin/perl

# #####################################################
#
# This file is part of the Perl scripts of the MASV System.
# MASV = Munich Automatic Speaker Verification
#
# Copyright 2002-2003, Ulrich Trk
# Institute of Phonetics and Speech Communication
# University of Munich
# tuerk@phonetik.uni-muenchen.de
#
#
#   MASV is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   MASV is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with MASV; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# #####################################################

$CVS_Version_String = '$Id: MASV_db_desc.pm,v 1.4 2004/02/17 09:42:04 tuerk Exp $';
$CVS_Name_String = '$Name: rel-1-3 $';


## this is a template file. Adapt to your database requirements and put it in a directory, 
#       to which the environment variable MASV_DATABASE_DESCRIPTION points to.

package MASV_db_desc;

use strict;

use lib $ENV{"MASV_PERL_ROOT"};
use SR_lib;

use FileHandle;
use File::Path;
use Cwd;
use File::Basename;
use Parallel::ForkManager;

use vars qw(%training_sessions_lists 
			%evaluate_sessions_lists %sessions_lists @all_sessions 
			$audio_type
			$sampling_freq
			$label_file_ext
			$audio_file_ext
			@NoisesArray
			);


%sessions_lists = ();

####
#### begin used also by matlab

# values: "raw_alaw", "wav_alaw"
$audio_type = "raw_alaw";

# values: integer, Hertz
$sampling_freq = 8000;


# file extension for label files
$label_file_ext = "DEO";

# file extension for audio files
$audio_file_ext = "DEA";

# noise labels in label files, used as basis for detecting noise contaminated recordings
@NoisesArray = ('\\[int\\]','\\[sta\\]');


$sessions_lists{FixedQuiet} = ['01','03','05','09','12','13','17'];
$sessions_lists{FixedNoisy} = ['07','14','19'];
$sessions_lists{GSMQuiet} = ['02','06','08','11','15','18','20'];
$sessions_lists{GSMNoisy} = ['04','10','16'];

# sessions
$sessions_lists{all} = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20'];

#### end used also by matlab
####

%training_sessions_lists = ();
%evaluate_sessions_lists = ();
@all_sessions = ();

# sessions
$training_sessions_lists{FixedQuiet} = ['01','09','13','12'];
$training_sessions_lists{Quiet} = ['01','09','02','06'];
$training_sessions_lists{Fixed} = ['01','09','13','07'];
$training_sessions_lists{S1FixedQuiet} = ['01'];
$training_sessions_lists{S1FixedNoisy} = ['07'];
$training_sessions_lists{S1GSMQuiet} = ['02'];
$training_sessions_lists{S1GSMNoisy} = ['04'];
$training_sessions_lists{altTraining2} = ['01','03','05','09'];
$training_sessions_lists{altTraining1} = ['01','02','04','07'];

$training_sessions_lists{all} = ['01','02','03','04'];


$training_sessions_lists{S1Fixed} = ['01','07'];
$training_sessions_lists{S1GSM} = ['02','04'];
$training_sessions_lists{S1Noisy} = ['04','07'];
$training_sessions_lists{S1Quiet} = ['01','02'];

$training_sessions_lists{S2Fixed} = ['01','03','05','07'];
$training_sessions_lists{S2GSM} = ['02','04','06','08'];
$training_sessions_lists{S2Noisy} = ['04','07','10','14'];
$training_sessions_lists{S2Quiet} = ['01','02','03','06'];

$training_sessions_lists{S3Fixed} = ['01','03','05','07'];
$training_sessions_lists{S3GSM} = ['02','04','06','08'];
$training_sessions_lists{S3Noisy} = ['04','07','10','14'];
$training_sessions_lists{S3Quiet} = ['01','02','03','06'];

$evaluate_sessions_lists{FixedQuiet} = ['03','05','17'];
$evaluate_sessions_lists{Quiet} = ['03','05','12','13','17','08','11','15','18','20'];
$evaluate_sessions_lists{Fixed} = ['03','05','12','17','14','19'];
$evaluate_sessions_lists{altTraining2} = ['02','04','06','07','08','10','11','12','13','14','15','16','17','18','19','20'];
$evaluate_sessions_lists{altTraining1} = ['03','05','06','08','09','10','11','12','13','14','15','16','17','18','19','20'];


$evaluate_sessions_lists{all} = ['05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20'];


$evaluate_sessions_lists{S1Fixed} = ['03','05','09','12','13','14','17','19'];
$evaluate_sessions_lists{S1GSM} = ['06','08','10','11','15','16','18','20'];
$evaluate_sessions_lists{S1Noisy} = ['10','14','16','19'];
$evaluate_sessions_lists{S1Quiet} = ['03','05','06','08','09','11','12','13','15','17','18','20'];

$evaluate_sessions_lists{S1FixedQuiet} = ['03','05','09','12','13','17'];
$evaluate_sessions_lists{S1FixedNoisy} = ['14','19'];
$evaluate_sessions_lists{S1GSMQuiet} = ['06','08','11','15','18','20'];
$evaluate_sessions_lists{S1GSMNoisy} = ['10','16'];

$evaluate_sessions_lists{S2Fixed} = ['09','12','13','17','14','19'];
$evaluate_sessions_lists{S2GSM} = ['01','03','05','07','09','10','11','12','13','14','15','16','17','18','19','20'];
$evaluate_sessions_lists{S2Noisy} = ['01','02','03','05','06','08','09','11','12','13','15','16','17','18','19','20'];
$evaluate_sessions_lists{S2Quiet} = ['04','05','07','08','09','10','11','12','13','14','15','16','17','18','19','20'];

$evaluate_sessions_lists{S3Fixed} = ['02','04','06','08','09','10','11','12','13','14','15','16','17','18','19','20'];
$evaluate_sessions_lists{S3GSM} = ['01','03','05','07','09','10','11','12','13','14','15','16','17','18','19','20'];
$evaluate_sessions_lists{S3Noisy} = ['01','02','03','05','06','08','09','11','12','13','15','16','17','18','19','20'];
$evaluate_sessions_lists{S3Quiet} = ['04','05','07','08','09','10','11','12','13','14','15','16','17','18','19','20'];


@all_sessions = ('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20');




sub get_SessionsListName {
	my $identifier = $_[0];
	
	if (length($identifier) > 3) {
		return $identifier;
	}
	
	my $env_ident = substr($identifier,0,(length($identifier) - 1));
	my $set_ident = substr($identifier,(length($identifier) - 1),1);
	my $name;
	
	SWITCH: {
		$name = 'S_list_FixedQuiet', last SWITCH if ($env_ident eq 'f');
		$name = 'S_list_S1FixedQuiet', last SWITCH if ($env_ident eq 'fq');
		$name = 'S_list_S1FixedNoisy', last SWITCH if ($env_ident eq 'fn');
		$name = 'S_list_S1GSMQuiet', last SWITCH if ($env_ident eq 'gq');
		$name = 'S_list_S1GSMNoisy', last SWITCH if ($env_ident eq 'gn');
		$name = 'S_list_all', last SWITCH if ($env_ident eq 'a');
		$name = 'S_list_altTraining1', last SWITCH if ($env_ident eq '1');
		$name = 'S_list_altTraining2', last SWITCH if ($env_ident eq '2');
		$name = 'S_list_S1Fixed', last SWITCH if ($env_ident eq 'F');
		$name = 'S_list_S1GSM', last SWITCH if ($env_ident eq 'G');
		$name = 'S_list_S1Noisy', last SWITCH if ($env_ident eq 'N');
		$name = 'S_list_S1Quiet', last SWITCH if ($env_ident eq 'Q');
	}

	SWITCH: {
		$name = $name . '_training', last SWITCH if ($set_ident eq 't');
		$name = $name . '_evaluate', last SWITCH if ($set_ident eq 'e');

	}

	return $name;

}


sub read_db_label {
		my $label_file = $_[0];
		my $label_file_handle;
		my @label_file_content;
		my $line;
		my $line_counter;
		
		my $return_prompt_ref;
		my $return_real_ref;
		
		$label_file_handle = &SR_lib::open_file("<", "$label_file");
		@label_file_content = <$label_file_handle>;
		close $label_file_handle; 
		
		for ($line_counter = 0; $line_counter < scalar(@label_file_content); $line_counter++) {
			$line = $label_file_content[$line_counter];
			if ($line =~ /^LBR:\s*(.*)/) {
				# prompted text
				my $temp = (split /,/ , $1)[-1];
				@$return_prompt_ref = split ' ', $temp;
			}
			elsif ($line =~ /^LBO:\s*(.*)/) {
				# labeled text
				my $temp = (split /,/ , $1)[-1];
				if ($line_counter < (scalar(@label_file_content) -1)) {
					my $next_line = $label_file_content[$line_counter + 1];
					chomp($next_line);
					if ($next_line =~ /^EXT:\s*(.*)/) {
						$temp = $temp . " " . $1;
					}
				}
				@$return_real_ref = split ' ', $temp;
				
			}
		
		}
		
		return ($return_prompt_ref, $return_real_ref);

}


sub convert_prompt_string {
	
	my $promptString = $_[0];
	
	my @promptArr = split(/\s+/,$promptString);
	
	my @return_arr = ();
	
	for (my $counter = 0; $counter <= $#promptArr; $counter++) {
		my $currentWord = $promptArr[$counter];
	
		if ($currentWord =~ /^\d+$/) {
			# only digits
			if ( length($currentWord) == 2) {
				push( @return_arr, &writeOutNumbers($currentWord));
			}
			else {
				# single numbers
				my @singleNumbers = split( //, $currentWord);
				for (0 .. $#singleNumbers) {
					$singleNumbers[$_] = &writeOutNumbers($singleNumbers[$_]);
				}
				push( @return_arr,  join (" ", @singleNumbers));
			}	
		}
		elsif ($currentWord =~ /^[A-Z]$/) { # single letters -> spelled
			push( @return_arr, ("\$" . $currentWord) );
		}	
		else { # leave like is was 
			push( @return_arr, $currentWord);
		}
	}
	
	@return_arr = split(" ", join(" ", @return_arr));
	
	return [ @return_arr ];
}




sub convert_label_string {
	my $labelString = $_[0];
	my @returnArray  = ();
	my $returnString;
	my @wordArray = ();
	
	$labelString =~ s//ue/g;
	$labelString =~ s//ss/g;
	#$labelSTring =~ s/zwo/zwei/g;
	
	@wordArray = split 'und', $labelString;
		
	if (scalar(@wordArray) == 2) {
		@returnArray = ($wordArray[0], "und", $wordArray[1]);
		$returnString = join "\n", @returnArray;
	} elsif (scalar( @wordArray) == 1) {
		@returnArray = ($wordArray[0]);
		$returnString = join "\n", @returnArray;
	} else {
		$returnString = join "\n", ('und');
	}


	return $returnString;


}



sub writeOutNumbers {
	my $numberString = $_[0];

	my $max_length = 3;

	if ((length($numberString) > $max_length) || (length($numberString) < 1)) {
		print STDERR "writeOutNumbers: Zahlen-String leer oder Zahl zu gro: $numberString\n";
		exit 1;
	}

	my @returnArray;
	my $returnString;
	my @einer = qw( ein zwei drei vier fnf sechs sieben acht neun );
	unshift @einer, "";
	my @zehner = qw( zehn zwanzig dreiig vierzig fnfzig sechzig siebzig achtzig neunzig);
	unshift @zehner, "";

	my $orig_length =  length($numberString);
	# mit Nullen auf maximale Lnge auffllen
	for (my $i=0; $i < ($max_length - $orig_length); $i++) {
		$numberString = "0" . $numberString;
	}

	my $einerIndex=substr($numberString, 2, 1);
	my $zehnerIndex=substr($numberString, 1, 1);
	my $hunderterIndex=substr($numberString, 0, 1);

	# hunderter
	push @returnArray, $einer[$hunderterIndex];
	if ($hunderterIndex > 0) {
		push @returnArray, "hundert";
	}
	# Ausnahmen:
	if ( substr($numberString, 1,2) == 11 ) {
		push @returnArray, "elf";
	}
	elsif ( substr($numberString, 1,2) == 12 ) {
		push @returnArray, "zwlf";
	}
	elsif ( substr($numberString, 1,2) == 17 ) {
		push @returnArray, "siebzehn";
	}
	else {
		if ( $einerIndex > 0 ) {
			push @returnArray , $einer[$einerIndex];
		}
		if (( $einerIndex > 0 ) && ( $zehnerIndex > 1 )) {
			push @returnArray , "und";
		}
		if ( $zehnerIndex > 0 ) {
			push @returnArray, $zehner[$zehnerIndex];
		}
		# ein 's' fr Zahlen ohne Zehner
		if (( $zehnerIndex == 0 )  && ( $einerIndex == 1 )) {
			push @returnArray, "s";
		}
	}
	if ($numberString eq "000") {
		push @returnArray, "null";
	}
	$returnString = join " ", @returnArray;
	return $returnString;
}

