#!/usr/bin/perl

# #####################################################
#
# This file is part of the Perl scripts of the MASV System.
# MASV = Munich Automatic Speaker Verification
#
# Copyright 2002-2003, Ulrich Trk
# Institute of Phonetics and Speech Communication
# University of Munich
# tuerk@phonetik.uni-muenchen.de
#
#
#   MASV is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   MASV is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with MASV; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# #####################################################

my $CVS_Version_String = '$Id: split_mlf.pl,v 1.2 2004/02/04 23:19:57 tuerk Exp $';
my $CVS_Name_String = '$Name: rel-1-4-01 $';


use strict;
my ($help, $Identify, $S_list_file, $mlf_file, $mlf_handle, @mlf_file_content, $start, $end,
	$outdir, $fid, $current_outputfile, $S_list_handle, @S_list_file_content, %rec_allowed);

use lib $ENV{"MASV_PERL_ROOT"};

use SR_lib;

use Getopt::Long qw( GetOptions );
use Pod::Usage;

$help = 0;
$Identify = 0;
$S_list_file = '';
$outdir = '';

GetOptions('outdir=s' => \$outdir,
           'S_list=s' => \$S_list_file,
		   'help|?' => \$help,
           'version' => \$Identify);

if ($Identify) {
	printf "$0\n$CVS_Version_String\n$CVS_Name_String\n\n"; 
}

if ((@ARGV != 1) || ($help)) {
   pod2usage(1);
   exit 1;
 }

if ( (!($outdir) && !($S_list_file) ) || (($outdir) && ($S_list_file) ) ){
	die("Please specify either a S_list_file for filtering or an output directory!\n\n");
}

$mlf_file = $ARGV[0];

# File lesen
$mlf_handle = &SR_lib::open_file("<", "$mlf_file");
@mlf_file_content = <$mlf_handle>;
close $mlf_handle;

# read filtering list
if ($S_list_file) {
	$S_list_handle = &SR_lib::open_file("<", "$S_list_file");
	@S_list_file_content = <$S_list_handle>;
	close $S_list_handle;
	
	foreach my $line (@S_list_file_content) {
		$line =~ /\/(\d{4})\/(\d{2})\/(.*)$/;
		my $spk=$1;
		my $ses=$2;
		my $rec=$3;
		$rec =~ s/\..*//;
		if ( ($spk) ) {
			my $key = "${spk}${ses}${rec}";
			$rec_allowed{$key} = 1;
		}
	}
}


# test third line, it must be generated with HVite
# sample line (the last entries are not always present
# 0 1100000 ...
my @test_array=split( /\s/, $mlf_file_content[2]);
if (! ($test_array[0] =~ /^\d+$/) &&
	($test_array[1] =~ /^\d+$/)) {
		die ("not a correct mlf format\n");
}

	


my ($current_speaker, $current_session, $current_recording);
my $current_model;
my $print_flag;

if ($S_list_file) {
	print STDOUT "#!MLF!#\n";
}
	


foreach my $line (@mlf_file_content) {
	
	if ($line =~ /^\d+\s+\d+/) {
		# line with content
		if ( $print_flag ) {
			if ($S_list_file) {
				print STDOUT $line;
			}
			else {
				print $fid $line;
			}
		}
	}
	elsif ($line =~ /^".*\/(\d{4})\/(\d{2})\/(.*)"/ ) {
		# filename of recording
		$current_speaker = $1;
		$current_session = $2;
		$current_recording = $3;
		$current_recording =~ s/\..*//;
		
		$print_flag = !($S_list_file) || ($rec_allowed{"${current_speaker}${current_session}${current_recording}"});
		if ( $print_flag ) {
			if ($S_list_file) {
				print STDOUT $line;
			}
			else {
				if ($fid) {
					close $fid;
				}
				$current_outputfile = "${outdir}/${current_speaker}/${current_session}/${current_recording}.mlf";
				&SR_lib::check_and_create_dir("${outdir}/${current_speaker}/${current_session}");
				$fid = &SR_lib::open_file(">" , $current_outputfile);
				print $fid "#!MLF!#\n";
				print $fid $line;
			}
		}
	}
	
	elsif ($line =~ /^\./) {
		# single dot = end of block
		if ( $print_flag ) {
			if ($S_list_file) {
				print STDOUT $line;
			}
			else {
				print $fid $line;
				if ($fid) {
					close $fid;
				}
			}
		}
	}
	

}



exit 0;


__END__

=head1 NAME

split_mlf.pl  - Split a mlf file to individual mlfs containing only one recording
                per mlf (option outdir) or select individual recordings and print
                them to stdout (option S_list). Both option are mutually exclusive!

=head1 SYNOPSIS

split_mlf.pl  [options] mlf_file

 Options:

 -outdir = s              target directory when each recording 
                          is written in a single mlf_file.
                          default: no default, this option is mutually 
                                   exclusive with S_list

 -S_list = s              S_list file specifies which recordings are 
                          selected from mlf_file.
                          default: print all recordings in mlf_file

 -version                 print version information.

 -? | help                display this message.
 

=cut

