#!/usr/bin/perl

# #####################################################
#
# This file is part of the Perl scripts of the MASV System.
# MASV = Munich Automatic Speaker Verification
#
# Copyright 2002-2003, Ulrich Trk
# Institute of Phonetics and Speech Communication
# University of Munich
# tuerk@phonetik.uni-muenchen.de
#
#
#   MASV is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   MASV is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with MASV; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# #####################################################

my $CVS_Version_String = '$Id: create_feature_histogram.pl,v 1.8 2004/02/04 23:33:50 tuerk Exp $';
my $CVS_Name_String = '$Name: rel-1-4-01 $';



use lib $ENV{"MASV_PERL_ROOT"};

use SR_lib;
use strict;

use File::Copy;
use File::Path;
use Cwd;
use Getopt::Long qw( GetOptions );
use Pod::Usage;

my $exit_state = 0;

my $bin_num = 100;

my @envarray =();
my @speakerarray = ();
my @speakertestarray = ();
my (%training_sessions_array_refs, %evaluate_sessions_array_refs);
my @selected_sessions_array;

my $vec_size = 39;
my @min_exp;
my @max_exp;

my @hist_array;
for (my $j=0; $j <= ($bin_num + 1); $j++) {
	for (my $i=0; $i < $vec_size; $i++) {
		$hist_array[$i][$j] = 0;
	}
}

for (my $i=0; $i < $vec_size; $i++) {
	$min_exp[$i] = -1;
	$max_exp[$i] = 1;
}




###########################
# Default values for options
my $help = 0;
my $Identify = 0;



###########################
# Processing Options

# flat
GetOptions('s|speakers=s' => \@speakerarray,
	       'e|environment=s' => \@envarray,
	       'help|?' => \$help,
           'version' => \$Identify);

if ($Identify) {
	printf "$0\n$CVS_Version_String\n$CVS_Name_String\n\n"; 
}

if ((@ARGV != 2) || ($help)) {
   pod2usage(1);
   exit 1;
 }

my $poolname = $ARGV[0];
my $gauss_parameter = $ARGV[1];

my $source_param_dir = "${SR_lib::paramPool_dir}${poolname}${SR_lib::sign}";


unless (-e $source_param_dir) {
	die("\nThere's no pool $source_param_dir \n\n");
}


unless (-e $gauss_parameter) {
	die("\nThere's no parameter file $gauss_parameter \n\n");
}

&read_gauss_parameter($gauss_parameter);


# generate list of speakers
@speakerarray = split(/,/,join(',',@speakerarray));

foreach my $speakerlist_id (@speakerarray) {
   push (@speakertestarray, @{$SR_lib::speakerlist{$speakerlist_id}});
}
if (!@speakertestarray) {
   @speakertestarray = ('all');
}

if (!@envarray) {
   @envarray = ('all');
}

# generate list of test/training sessions
@envarray = split(/,/,join(',',@envarray));


foreach my $envlist (@envarray) {
   $training_sessions_array_refs{$envlist} = $MASV_db_desc::training_sessions_lists{$envlist};
   $evaluate_sessions_array_refs{$envlist} = $MASV_db_desc::evaluate_sessions_lists{$envlist};
}


foreach my $envlist (@envarray) {
	push(@selected_sessions_array, @{$training_sessions_array_refs{$envlist}}, @{$evaluate_sessions_array_refs{$envlist}});
}


opendir SRCDIR, $source_param_dir;
my @speaker_dirs = grep(/^\d{4}$/, readdir (SRCDIR));
closedir SRCDIR;






foreach my $speaker_dir (@speaker_dirs) {
	foreach my $speaker_allowed (@speakertestarray) {
		if (!($speaker_allowed eq "all") && !(&SR_lib::test_containedInArray($speaker_allowed, \@speaker_dirs)) ) {
			print "$speaker_allowed not available in pool $source_param_dir , update necessary ?\n";
		}
		
		
		if ( ($speaker_dir eq $speaker_allowed ) || ($speaker_allowed eq "all") ) {
			#check Path

			print "Current speaker: $speaker_dir \n";
			
			opendir SRCDIR, "$source_param_dir${SR_lib::sign}$speaker_dir";
			my @ses_dirs = grep ( /\d{2}/o, readdir (SRCDIR));
			closedir SRCDIR;
			
			foreach my $ses_dir (@ses_dirs) {
				if (&SR_lib::test_containedInArray($ses_dir, \@selected_sessions_array)) {					
					opendir SRCDIR, "${source_param_dir}${SR_lib::sign}${speaker_dir}${SR_lib::sign}${ses_dir}";
					my @src_files = grep(/param$/o, readdir(SRCDIR));
					close SRCDIR;
					
					foreach my $src_file (@src_files) {
						if ($src_file =~ /\.param$/) {
							
							&update_hist_array($source_param_dir . ${SR_lib::sign} . $speaker_dir .  ${SR_lib::sign} . $ses_dir . ${SR_lib::sign} . $src_file);
							
						}	
					} # end foreach $src_file (@src_files)
				}
			} # end foreach $ses_dir (@ses_dirs)
			
		} # end if ( ($speaker_dir eq $speaker_allowed ) || ($speaker_allowed eq "all") )   
		
		
	} # end foreach $speaker_allowed    
} # end foreach $speaker_dir (@speaker_dirs) 


&write_out_histogram();


	
exit $exit_state;


sub read_gauss_parameter {
	my $file = $_[0];

	my (@means, @vars);
	
	
	unless (open(PARAMFILE, "<$file") ) {
		die "cannot open gauss parameter file!\n\n";
	}
	my @content = <PARAMFILE>;
	close PARAMFILE;
	
	my $read_next_flag_mean = 0;
	my $read_next_flag_var = 0;
	
	foreach my $line (@content) {
		$line =~ s/^\s*//;
		if ($line =~ /<MEAN>\s+(\d+)/) {
			$vec_size = $1;
			$read_next_flag_mean = 1;
			next;
		}
		if ($read_next_flag_mean) {
			$read_next_flag_mean = 0;
			@means = split(' ', $line);
		}
			
		if ($line =~ /<VARIANCE>/) {
			$read_next_flag_var = 1;
			next;
		}
		if ($read_next_flag_var) {
			$read_next_flag_var = 0;
			@vars = split(' ', $line);
		}
	}
	
	if ( ((scalar @means) != $vec_size) || ((scalar @means) != $vec_size) ) {
		die "length of parameters ( $#means, $#vars ) not equal to $vec_size\n";
	}
	
	for (my $i=0; $i<=$#means; $i++) {
		
		$min_exp[$i] = $means[$i] - 7*sqrt($vars[$i]);
		$max_exp[$i] = $means[$i] + 7*sqrt($vars[$i]);
		
	}
}

sub write_out_histogram {

	unless (open(HFILE, ">histogram_Pool_$poolname.txt") ) {
		die "cannot open output file!\n\n";
	}
	
	for (my $i=0; $i < $vec_size; $i++) {
		print HFILE "Feature " . ($i + 1) . "\n";
		printf HFILE ("%d %.9f %.9f\n", $bin_num + 1,$min_exp[$i],$max_exp[$i]);
		for (my $j=0; $j <= ($bin_num + 1); $j++) {
			print HFILE $hist_array[$i][$j], " ";
		}
		print HFILE "\n\n";
	}
}

sub update_hist_array {
	
	my $file = $_[0];

	my $exec_string;
	my $exit_state;
	
	$exec_string = "${SR_lib::htk_bin}HList -r $file > /tmp/HList_process.txt";
	$exit_state = system($exec_string);
	
	unless (open(HLISTFILE, "/tmp/HList_process.txt") ) {
		die "file /tmp/HList_process.txt not found!\n\n";
	}
	my @content = <HLISTFILE>;
	close HLISTFILE;
	
	foreach my $line (@content) {
		my @numbers = split(' ', $line);
		if ((scalar @numbers) != $vec_size) {
			die "error in file $file : length of data string not matching with length $vec_size \n\n";
		}
		# renormalize value
		my $feature_index = 0;
		foreach my $number (@numbers) {
			my $value_norm = ($number - $min_exp[$feature_index]) / ($max_exp[$feature_index] - $min_exp[$feature_index]) * 100;
			my $index = int($value_norm);
			
			$index = 0 if ($index < 0);
			$index = 101 if ($index > 101 );
			
			$hist_array[$feature_index][$index]++;
			$feature_index++;
		}
	}
	
	

}




__END__

=head1 NAME

create_feature_histogram.pl  - create a histogram of parameter files

=head1 SYNOPSIS

create_feature_histogram.pl [options] poolname hmmfile

Calculates a histogram for a given set of speakers and set of environments.

 Options:
 
 -s | speakers = s        specify set of speakers.
                          default: 'all'  = 150 speakers
                          One can expand the template file in
                          templates/speaker_sets/standard by additional sets.
                            
 -e | environment = s     specify environment set.
                          default: all.
                          possible values: 
                          FixedQuiet (training:01,09,13,12; evaluate:03,05,17)
                          all (training:01,02,03,04;
                               evaluate: 05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20)
                          Add more definitions in SR_lib.pm.
 
 -? | help                display this message.
 
=cut

