#!/usr/bin/perl


# #####################################################
#
# This file is part of the Perl scripts of the MASV System.
# MASV = Munich Automatic Speaker Verification
#
# Copyright 2002-2003, Ulrich Trk
# Institute of Phonetics and Speech Communication
# University of Munich
# tuerk@phonetik.uni-muenchen.de
#
#
#   MASV is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   MASV is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with MASV; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# #####################################################

my $CVS_Version_String = '$Id: get_bestWorldModel_GMM.pl,v 1.13 2004/05/05 16:13:11 tuerk Exp $';
my $CVS_Name_String = '$Name: rel-1-4-01 $';


use lib $ENV{"MASV_PERL_ROOT"};

use SR_lib;
use strict;

use File::Path;
use Getopt::Long qw( GetOptions );
use Pod::Usage;

my %results;

my $epsilon = 0.1;
my $epsilon_factor = 0.0;

my $help = 0;
my $Identify = 0;

GetOptions('epsilon=f' => \$epsilon,
           'epsilon_factor=f' => \$epsilon_factor,
           'help|?' => \$help,
           'version' => \$Identify);

if ($Identify) {
	printf "$0\n$CVS_Version_String\n$CVS_Name_String\n\n"; 
}

if ((@ARGV != 2) || ($help)) {
   pod2usage(1);
   exit 1;
 }
 

my $sv_system_name = $ARGV[0];
my $hmmBaseDir = $ARGV[1];

my $test_files_dir = "${SR_lib::sv_systems_dir}${sv_system_name}/world/";
opendir SRCDIR, $test_files_dir;
my @test_files = grep(/^calc_GMM_world_llh_test_${hmmBaseDir}_\d+\.mlf\.txt/, readdir (SRCDIR));
closedir SRCDIR;
# print STDERR "@test_files \n";
if ((scalar @test_files) == 0) {
	exit 1;
}

foreach my $test_file (@test_files)
{
	$test_file =~ /calc_GMM_world_llh_test_${hmmBaseDir}_(\d+)/;
	my $hmm_version = $1;
	
	# read file accumulated llh
	my $test_handle = &SR_lib::open_file("<", "$test_files_dir" . "$test_file");
	my @test_file_content = <$test_handle>;
	close $test_handle;
	chomp($test_file_content[0]);
	#get llh value
	$results{$hmm_version} = $test_file_content[0];
	# print STDERR "$hmm_version : $test_file_content[0] \n";
}
my @sorted_keys=sort sorting_num (keys(%results));

my $best_version = $sorted_keys[0];
my $current_perf = -1e10;
my $eps_on_start_value = abs($results{$best_version}) * $epsilon_factor;

if ($eps_on_start_value == 0) {
	# use absolute perf. step
	foreach my $key (@sorted_keys) {
		# print STDERR "nr. $key, $results{$key} \n";
		if ($results{$key} > ($current_perf + $epsilon)) {
			$best_version = $key;
			$current_perf = $results{$key};
		} else {
			last;
		}
		
	}
}
else {
	# use step size based on factor on first value
	$epsilon = $eps_on_start_value;
	foreach my $key (@sorted_keys) {
		# print STDERR "nr. $key, $results{$key} \n";
		if ($results{$key} > ($current_perf + $epsilon)) {
			$best_version = $key;
			$current_perf = $results{$key};
		} else {
			last;
		}
		
	}
}
print STDOUT "$best_version\n";

exit 0;



sub sorting_num { 
	my $retval;

	if ($a > $b) { 
		$retval = 1;
	} 
	elsif ($a == $b) {
		$retval = 0;
	} 
	else { 
		$retval = -1;
	}
	$retval; 
}


__END__

=head1 NAME

get_bestWorldModel_GMM.pl  - find best world model from GMM result files.

=head1 SYNOPSIS

get_bestWorldModel_GMM.pl  sv_system_name  hmmBaseDir 

Reads given result files for GMM world test (made with
calc_GMM_world_llh.pl) and finds the version which gives
the minimum of the overall llh. The minimum difference in the score between 
suceeding versions must be 0.1, otherwise the current version is declared 
as the best model.

Note. the result files must be in the "world/" directory of the SV system 
"sv_system_name" and their name must start with the string
"calc_GMM_world_llh_test_<hmmBaseDir>_<version_of_model>"
and end with the extension ".txt".

 Options:
 
 -epsilon=f                minimum absolute step size when determining the 
                           best version;
                           default: 0.1

 -epsilon_factor=f         calculate step size based on first value of
                           llhs scaled with given factor; 
                           default: 0

=cut

