/***********************************************************************
************************************************************************

Module		: wav2trn.c
Title		: detects begin and end of speech in signal file

Author		: F. Schiel
Datum/Aenderung : 04.08.08 / 05.02.18

Description	: 
Reads a RIFF WAVE sound file from required argument wav=..., which contains a 
mono signal with a single word/utterance/sentence, detects the onset and offset 
of the speech signal by applying heuristic tresholds to the energy of 
the lowpass filtered signal and prints either the samples or a BAS Partitur 
Format (BPF) tier TRN to stdout (if par file is given via option par=...).
The TRN tier of BPF is defined in www.bas.uni-muenchen.de/Bas/BasFormatseng.html

First the signal is completely loaded into memory (beware of long sounds!),
then a winlen=... (WININC units msec) rectangular energy window is moved across the 
signal in WININC msec increments, the signal is lowpass filtered by a simple
averaging of width +/- AVER and the energy (power of 2) is calculated from 
the filtered signal within the window and normalized to window length. 
The maximum and minimum energy is determined by averaging the 3 highest and 3 lowest 
energy values.

Thresholds:
1. When the energy function reaches for the 
first time perstart=...
percent of the maximal energy value the start boundary is placed to the 
lefthand side of the corresponding energy window minus the security margin 
of seclen msec. 
2. Where the energy functions falls 
for the last time below perstop=... percent of the maximal energy value the 
end boundary is placed to the righthand side of the corresponding energy window
plus the security margin seclen msec.

These first two threshold are kind of 'emergency thresholds' when the more
sophisticated following thresholds fail because of too much noise.

The absolute minimum energy in a window is multiplied by minsilbegfak and
minsilendfak to get two thresholds for start and end.
3. A window of minsildur energy values length is moved from left to right over 
the energy function. When all values within this window are above threshold
the start boundary is placed to the lefthand-side of the first energy window.
4. A window of minsildur energy values length is moved from right to left over 
the energy function. When all values within this window are above threshold
the end boundary is placed to the righthand-side of the last energy window.

The lowest start and the highest end boundary is selected.
Finally the start and end boundary are extended left and right by seclen msec.

Although the size minsildur can be set from command line (option minsildur=...),
it is recommended to keep the length in the range of of a phoneme
length to robustly detect the speech boundaries (80-100 msec).
By increasing the constant factor seclen=... [msec] you can increase the 
security margin for both boundaries.

Segmentation in speech events
In case that a recording contains several turns of a speaker interupted
by silence or a far distant other speaker, the option 'mult=1' causes to 
produce a spech/non-speech segmentation.
The boundaries are found according the same rules as used above for the
fine detection of the initial boundary (minsildur windows must show an
enery value above minsilener*minsilbegfak to detect speech). For each 
found speech segment one TRN line is output to stdout givin the begin and
duration of the speech segment in samples. The list of speech segments is then 
filtered according to a minimum length of the silence intervals between them 
(minsillength). Note that minsildurmult=5 differs default from minsildur=10
in the normal case and that the the security margin seglenmult is 
added/substracted from the selected boundaries.
Option mult=1 does not work with option par=... since we cannot assign word
numbers to individual speech segments; therefore the word number in this
case is always set to '-1' (=unknown allignment to words). The labels 
of the speech segments are simply numered starting from 0. E.g.
TRN	67876	45000	-1	0
TRN	129789	34879	-1	1
TRN	234156	27689	-1	2
...

Usage:
wav2trn wav=file.wav [perstart=5 perstop=2 winlen=2 seclen=50 minsildur=8 minsilbegfak=100 minsilendfak=20 par=file.par mult=no]

v=yes : prints infos to sterr (default=no)
wav=... : input file, WAV mono, all other formats are rejected
par=... : corresponding BPF file to retrieve number of words and file name
mult=no : if set to yes, switch to speech segmentation mode (see above)
perstart=... : threshold in percent of max energy for beginning speech
perstop=... : threshold in percent of max energy for end speech
winlen=... : window length in WININC units msec
minsilbegfak=... : multiplies the minimum energy to achieve the start threshold
minsilendfak=... : multiplies the minimum energy to achieve the end threshold
minsildur=... : minimum number of energy values for threshold
seclen=... : found boundaries are extendedby seclen msec to both sides

Linking modules or libraries:
ipkclib,SDL

Contained functions:
			: 

Return value:
0 : everthing seems alright
-1 : some error (detailed to sterr) 


*************************************************************************/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <strings.h>
# include <ctype.h>
# include <ipkclib.h>
# include "/usr/include/SDL/SDL.h"

/* DEFINES, only used within this module ********************************/
# define WINLEN 2            /* window length in WININC units */
# define WININC 10            /* window increment in msec */
# define THRESSTART 2        /* percent of max. energy to start speech */
# define THRESSTOP 1        /* percent of max. energy to end speech */
# define SECLEN 50          /* msec substracted/added to the 
			       found samples to extend the segment */
# define SECLENMULT 200     /* dito for mult=1 */

# define AVER 1		    /* filter width: signal is averaged in window
                               +/- AVER samples before squaring to dampen 
			       high frequency noise */
# define MINSILBEGFAK 100       /* energy threshold: MINSILBEGFAK * minenergy */
# define MINSILENDFAK 20       /* energy threshold: MINSILENDFAK * minenergy */
# define MINSILDUR 10           /* minimum number of windows with threshold below
                                  energy */
# define MINSILDURMULT 5       /* dito for mult=1: must be smaller to avoid cutoffs */ 
# define MINSILLENGTH 1000   /* minimum length of within-utterance silence 
                               intervals in msec (mult=1 mode only) */				  

/* TYPE DEFINITIONS, only used within this module  **********************/

/* GLOBAL VARIABLES (avoid, if possible !) ******************************/


/* FUNCTION PROTOTYPES, capsuled in this module *************************/

int main(int argc,const char **argv)
{
	char 	*infilenam,
                *parfilenam=NULL,
		*appname,
		*cptr,
                line[65536],
		links[65536],
		label[65536];
	long    mult_flag = 0;
        long 	verb = 0;
	short   segcount,segnr = 0;
	short   sp_flag = 0;
	long	perstart=THRESSTART,perstop=THRESSTOP;
	short   *window,*sptr,*ssptr;
	long    starts[65536],ends[65536];
	long    winlensam,winincsam,winanz,startsam,stopsam,i,k,l,
		wordnum,winlen=WINLEN,seclen=SECLEN,minsilbegfak=MINSILBEGFAK,
		minsilendfak=MINSILENDFAK,seclensam,minsildur = 0,
		minsillength=MINSILLENGTH,minsillengthsam,minsildurmult = 0;
	double  ener,*energy,maxenergy1,minenergy1,thresstart,thresstop,
	        minsilener,maxenergy2,maxenergy3,minenergy2,minenergy3;
	FILE	*parfile;
	icsiargtab_t args[] =
        {
          { 0, "determine start and end of an utterance from a RIFF WAVE file\n  if par file is given, a valid BPF TRN tier is printed to stdout\n  if par file is not given, only start sample and length are printed to stdout", ARG_DESC},
          { "v","verbose", ARG_LONG, &verb, ARG_OPT },
	  { "mult","switch to segmentation mode", ARG_LONG, &mult_flag, ARG_OPT },
	  { "wav","input file WAV mono", ARG_STR, &infilenam, ARG_REQ },
	  { "perstart","threshold for beginning speech", ARG_LONG, &perstart, ARG_OPT },
	  { "perstop","threshold for ending speech", ARG_LONG, &perstop, ARG_OPT },
	  { "winlen","window length in WININC units (WININC=10msec)", ARG_LONG, &winlen, ARG_OPT },
	  { "minsilbegfak","silence energy threshold", ARG_LONG, &minsilbegfak, ARG_OPT },
	  { "minsilendfak","silence energy threshold", ARG_LONG, &minsilendfak, ARG_OPT },
	  { "minsildur","number of windows to be above threshold", ARG_LONG, &minsildur, ARG_OPT },
	  { "minsildurmult","dito for mult=1", ARG_LONG, &minsildurmult, ARG_OPT },
	  { "minsillength","minimum silence inverval length in msec", ARG_LONG, &minsillength, ARG_OPT },
	  { "seclen","+/- tolerance length in msec", ARG_LONG, &seclen, ARG_OPT },
	  { "par","input file BPF", ARG_STR, &parfilenam, ARG_OPT },
	  {0,0,0}
	};
	SDL_AudioSpec file_spec;
	Uint8 *buffer;
	Uint32 len;


  /* Test for icsiargs if any and check them*/
  icsiargs(args,&argc,&argv,&appname);
  if(verb) printargs(stderr,appname,args);
  if(perstart < 1 || perstart > 99 || perstop < 1 || perstop > 99 ) 
  {
    fprintf(stderr, "%s : ERROR: perstart and perstop must lie within 1...99\n",appname);
    exit(-1);
  } 
  if(mult_flag && verb) fprintf(stderr, "wav2trn: Switching speech segmentation mode on\n");
  if(mult_flag && parfilenam != NULL)
  {
    fprintf(stderr,"%s : ERROR: options mult and par cannot be used simultaneously\n",appname);
    exit(-1);
  }  


  /* open file and check for parameters */
  if ( SDL_LoadWAV(infilenam,&file_spec,&buffer,&len) == NULL )
  {
    fprintf(stderr, "%s : Could not load RIFF WAVE file %s:\n%s\n",appname,infilenam,SDL_GetError());
    exit(-1);
  } 
  if(verb)
  {
    fprintf(stderr,"wav2trn: Sample rate: %d\n",file_spec.freq);
    fprintf(stderr,"wav2trn: Channels: %d\n",file_spec.channels);
    fprintf(stderr,"wav2trn: Format: %x\n",file_spec.format);
    fprintf(stderr,"wav2trn: Number of samples: %u\n",len/2);
  }
  if(file_spec.channels != 1)
  {
    fprintf(stderr,"%s : ERROR: not a mono sound file - exiting\n",appname);
    SDL_FreeWAV(buffer);
    exit(-1);
  }
  if(file_spec.format != 0x8010)
  {
    fprintf(stderr,"%s : ERROR: not 16bit signed  - exiting\n",appname);
    SDL_FreeWAV(buffer);
    exit(-1);
  }
  minsillengthsam = minsillength * file_spec.freq / 1000;

  /* for(cptr=buffer;cptr<(char *)(buffer+len);cptr+=2)
  {
    sptr = (short *)cptr;
    printf("Buffer[%u] = %d\n",cptr,*sptr);   
  }  */

  /* get number of words and label (file name) from BPF file, if given */
  if(parfilenam != NULL)
  {
    if((parfile = fopen(parfilenam,"r")) == NULL)
    {
      fprintf(stderr,"%s: could not open %s\n        skipping BPF\n",appname,parfilenam);
      parfilenam = NULL;
    }
    else
    {
/*puts("2");  */
      /* build link list according to number of ORT entries in BPF */
      wordnum = 0;
      *links = '\0';
      cptr = links;
      while(fgets(line,256,parfile) != NULL)
      {
        if(strncmp("ORT:",line,4) == 0)
        {
          sprintf(cptr,"%ld,",wordnum);
          cptr += strlen(cptr);
          wordnum++;
        }
      }
/*puts("3");*/  
      /* delete last comma */
      cptr--;
      *cptr = '\0';
      fclose(parfile);
      /* use filename body as label */
      if((cptr = rindex(infilenam,'/')) == NULL) cptr = infilenam;
      else cptr++;
      strncpy(label,cptr,strlen(cptr)-4);
      *(label+strlen(cptr)-4) = '\0';
    }
    if(verb)
    {
      fprintf(stderr,"wav2trn: Link list: %s\n",links);
      fprintf(stderr,"wav2trn: Label: %s\n",label);
    }
  }
  /* determine parameters */
  winlensam = winlen * WININC * (long)file_spec.freq / 1000; /* win length in samples */
  winincsam = WININC * (long)file_spec.freq / 1000; /* win increment samples */
  winanz = ( ( (long)len/2 - winlensam ) / winincsam ) + 1;
  seclensam = seclen * (long)file_spec.freq / 1000; /* tolerance in samples */
  if(verb)
  {
    fprintf(stderr,"wav2trn: Lowpass filter width in samples: %d\n",AVER*2+1);
    fprintf(stderr,"wav2trn: Window length in msec: %ld\n",winlen*WININC);
    fprintf(stderr,"wav2trn: Window length in samples: %ld\n",winlensam);
    fprintf(stderr,"wav2trn: Window increment in msec: %d\n",WININC);
    fprintf(stderr,"wav2trn: Window increment in samples: %ld\n",winincsam);
    fprintf(stderr,"wav2trn: Tolerance +/- in samples: %ld\n",seclensam);
    fprintf(stderr,"wav2trn: Number of windows: %ld\n",winanz);
  }
  if((window = (short *)calloc((size_t)winlensam,sizeof(short))) == NULL)
  {
    fprintf(stderr,"%s : cannot allocate memory for window - exiting\n",appname);
    perror(appname);
    exit(-1);
  }  
  if((energy = (double *)calloc((size_t)winanz,sizeof(double))) == NULL)
  {
    fprintf(stderr,"%s : cannot allocate memory for energy - exiting\n",appname);
    perror(appname);
    exit(-1);
  }  

  /* lowpass filter signal in window and calculate energy */
  sptr = (short *)buffer;
  maxenergy1 = 0.0;
  maxenergy2 = 0.0;
  maxenergy3 = 0.0;
  minenergy1 = 9.99e+99;
  minenergy2 = 9.99e+99;
  minenergy3 = 9.99e+99;
  for(i=0;i<winanz;i++) 
  {
    ssptr = sptr + (i * winincsam);
    energy[i] = 0.0;
    /* fprintf(stderr,"Window: %ld :\n",i); */
    for(k=AVER,ssptr+=AVER;k<winlensam-AVER;k++) 
    {
      ener = 0.0;
      for(l=1;l<=AVER;l++)
        ener += *(ssptr-l) + *(ssptr+l);
      /* fprintf(stderr,"Signal[%ld] = %d\n",k,*ssptr); */ 
      ener += *(ssptr);
      ener /= 2 * AVER + 1;
      ener *= ener;
      energy[i] += ener;
      ssptr ++;
    }  
    energy[i] /= winlensam-2*AVER;
    /* fprintf(stderr,"Energy[%ldmsec], Window %ld = %e\n",winlen*WININC/2+i*WININC,i,energy[i]); */
    if(energy[i] > maxenergy1) maxenergy1 = energy[i];
    else if(energy[i] > maxenergy2) maxenergy2 = energy[i];
    else if(energy[i] > maxenergy3) maxenergy3 = energy[i];
    if ( energy[i] != 0.0 )
    {
      if(energy[i] < minenergy1) minenergy1 = energy[i];
      else if(energy[i] < minenergy2) minenergy2 = energy[i];
      else if(energy[i] < minenergy3) minenergy3 = energy[i];
    }  
  }
  if(verb) fprintf(stderr,"wav2trn: Max. Energy = %e\t%e\t%e\n",maxenergy1,maxenergy2,maxenergy3);
  if(verb) fprintf(stderr,"wav2trn: Min. Energy = %e\t%e\t%e\n",minenergy1,minenergy2,minenergy3);
  maxenergy1 = (maxenergy1+maxenergy2+maxenergy3)/3;
  minenergy1 = (minenergy1+minenergy2+minenergy3)/3;

  /* determine start and end thresholds */
  thresstart = maxenergy1 * perstart / 100.0;
  thresstop = maxenergy1 * perstop / 100.0;
  minsilener = minenergy1;
  if(verb) 
  {
    fprintf(stderr,"wav2trn: Threshold energy start = %e\n",thresstart);
    fprintf(stderr,"wav2trn: Threshold energy stop = %e\n",thresstop);
    fprintf(stderr,"wav2trn: Window threshold energy start = %e\n",minsilener*minsilbegfak);
    fprintf(stderr,"wav2trn: Window threshold energy end = %e\n",minsilener*minsilendfak);
  }  

/* normal processing: find begin and end of speech (internal silence ignored) */
  if(mult_flag == 0)
  {
  
  if(minsildur == 0) minsildur=MINSILDUR;
  if(verb) fprintf(stderr,"wav2trn: Window length = %ld msec\n",minsildur*WININC);
  if(winanz < (2*minsildur) )
  {
    fprintf(stderr,"wav2trn: Signal too short to process - doing nothing\n");
    fprintf(stderr,"wav2trn: you might try a smaller window length (winlen=...)\n");
    if(parfilenam == NULL)
      /* output results as simple number pair */
      printf("0\t%ld\n",(long)len/2);
    else
      /* output results in BPF tier TRN */
      printf("TRN:\t0\t%ld\t%s\t%s\n",(long)len/2,links,label);
    exit(0);
  }  
  /* find start sample 
     start sample is on the lefthand side of the first energy value above threshold 
     thresstart OR
     start sample is on the lefthand side of the first energy value where this value
     and the next minsildur-1 energy values are all above minenergy*minsilbegfak */
  i=0; /* index over all energy values in signal */
  k=0; /* index within minsildur window */
  while(energy[i]<thresstart && k != minsildur && i < (winanz-minsildur) ) 
  {
    for(k=0;k<minsildur;k++) if(energy[i+k] < minsilener*minsilbegfak) break;
    /* fprintf(stderr,"Energy[%ldmsec], Window %ld = %e\n",winlen*WININC/2+i*WININC,i,energy[i]); */
    i++;
  }  
  if ( i == (winanz-minsildur) )
  {
    fprintf(stderr,"wav2trn: Something is wrong: could not find a start boundary\n");
    fprintf(stderr,"wav2trn:  until the end of file was reached\n");
    fprintf(stderr,"wav2trn:  Maybe the signal is too short\n");
    if(parfilenam == NULL)
      /* output results as simple number pair */
      printf("0\t%ld\n",(long)len/2);
    else
      /* output results in BPF tier TRN */
      printf("TRN:\t0\t%ld\t%s\t%s\n",(long)len/2,links,label);
    exit(0);
  }  
  i--;
  startsam = (i*winincsam) - seclensam;
  if(startsam<0) startsam = 0;

  /* find end sample 
     end sample is on the righthand side of the last energy value above threshold 
     thresstop OR
     end sample is on the righthand side of the last energy value where this value
     and the previous minsildur-1 energy values are all above minsilener*minsilendfak */
  i=winanz-1;
  k=0;
  while(energy[i]<thresstop && k != minsildur && i > minsildur-1 )
  {
    for(k=0;k<minsildur;k++) if(energy[i-k] < minsilener*minsilendfak) break;
    /* fprintf(stderr,"Energy[%ldmsec], Window %ld = %e\n",winlen*WININC/2+i*WININC,i,energy[i]); */
    i--;
  }  
  if ( i == minsildur )
  {
    fprintf(stderr,"wav2trn: Something is wrong: could not find a end boundary\n");
    fprintf(stderr,"wav2trn:  until the begin of file was reached\n");
    fprintf(stderr,"wav2trn:  Maybe the signal is too short\n");
    if(parfilenam == NULL)
      /* output results as simple number pair */
      printf("0\t%ld\n",(long)len/2);
    else
      /* output results in BPF tier TRN */
      printf("TRN:\t0\t%ld\t%s\t%s\n",(long)len/2,links,label);
    exit(0);
  }  
  i++;
  stopsam= (i*winincsam) + winlensam + seclensam;
  if(stopsam>(long)len/2) stopsam=(long)len/2;
  if(verb)
  {
    fprintf(stderr,"wav2trn: Start sample: %ld\n",startsam);
    fprintf(stderr,"wav2trn: End sample: %ld\n",stopsam);
    fprintf(stderr,"wav2trn: Duration sample: %ld\n",stopsam-startsam+1);
  }

  if(parfilenam == NULL)
    /* output results as simple number pair */
    printf("%ld\t%ld\n",startsam,stopsam-startsam+1);
  else
    /* output results in BPF tier TRN */
    printf("TRN:\t%ld\t%ld\t%s\t%s\n",startsam,stopsam-startsam+1,links,label);

  } /* end normal mode */

  else
  /* speech segmentation mode (mult=yes)
     going over all energy windows-minsildur and check whether the 
     following minsildur windows all have enery above minsilener*minsilbegfak.
     if so, a speech begin is stored; if not a speech end is stored */
  {
  /* i = index over all energy values in signal */
  /* k = index within minsildur window */
  if(minsildurmult == 0) minsildur=MINSILDURMULT;
  else minsildur=minsildurmult;
  if(verb) fprintf(stderr,"wav2trn: Window length = %ld msec\n",minsildur*WININC);
  if(verb) fprintf(stderr,"wav2trn: Minimum silence length = %ld samples\n",minsillengthsam);
  if(winanz < (2*minsildur) )
  {
    fprintf(stderr,"wav2trn: Signal too short to process - doing nothing\n");
    fprintf(stderr,"wav2trn:  you might try a smaller window length (winlen=...)\n");
    if(parfilenam == NULL)
      /* output results as simple number pair */
      printf("0\t%ld\n",(long)len/2);
    else
      /* output results in BPF tier TRN */
      printf("TRN:\t0\t%ld\t%s\t%s\n",(long)len/2,links,label);
    exit(0);
  }  
  for(i=0;i<(winanz-minsildur);i++)
  {
    for(k=0;k<minsildur;k++) if(energy[i+k] < minsilener*minsilbegfak) break;
    if(k == minsildur)
    /* all k windows above threshold */
    {
      if(verb) fprintf(stderr,"wav2trn: Detected speech at window %ld\n",i);
      if(sp_flag == 0)
      /* new speech segment begins */
      {
        startsam = (i*winincsam);
        sp_flag=1;
        /* startsam = (i*winincsam) - seclensam; */
        if(startsam<0) startsam = 0;
        if(verb) fprintf(stderr,"wav2trn: New segment begins at %ld\n",startsam);
      }
    }
    else
    /* at least one window below threshold */
    {
      if(verb) fprintf(stderr,"wav2trn: Detected non-speech at window %ld\n",i);
      if(sp_flag == 1)
      /* speech segment ends */
      {
        sp_flag = 0;
        /* stopsam = ((i+minsildur-1)*winincsam) + seclensam; */
        stopsam = ((i+minsildur-1)*winincsam);
	if(stopsam>(long)len/2) stopsam=(long)len/2;
        if(verb) fprintf(stderr,"wav2trn: Segment ends at %ld\n",stopsam);
        /* printf("TRN:\t%ld\t%ld\t-1\t%d\n",startsam,stopsam-startsam-1,segnr); */
	starts[segnr]=startsam;
	ends[segnr]=stopsam;
	segnr++;
      }
    }
  }
  /* in case we haven't found the end of the last segment, put it to end */
  if(sp_flag == 1)
  {
    if(verb) fprintf(stderr,"wav2trn: Last segment without end: %ld\t%ld\t-1\n",startsam,(long)len/2);
    starts[segnr]=startsam;
    ends[segnr]=(long)len/2;
    segnr++;
  }
  /* seclensam must be longer and applied after the first segmentation
     to avoid overlapping segments. 2*seclensam must be less than 
     minsillengthsam! */
  seclensam = SECLENMULT * (long)file_spec.freq / 1000;
  if(2*seclensam>minsillengthsam)
  {
    fprintf(stderr,"wav2trn: ERROR: 2*seclensam>minsillengthsam - exiting");
    exit(-1);
  }
  /* filter all SILENCE intervals that are less than minsillengthsam and 
     produce output with boundaries extended by seclensam
     First/last silence segment can have any length 
     Go over all speech segments */
     if(verb) for(k=0;k<segnr;k++) fprintf(stderr,"wav2trn: starts[%ld] = %ld, ends[%ld] = %ld\n",k,starts[k],k,ends[k]);
     k=0;
     segcount=0;
     while(k<segnr)
     {
       /* look at the silence interval between the current speech segment
          k and the following i segments */
       i=k;
       while(i<segnr)
       {
         if((starts[i+1]-ends[i])>minsillengthsam)
         {
           /* if a found silence interval is large enough, output speech seg 
	      covering all speech segments to this point
	      and set k to next following speech segment */
	   startsam=starts[k]-seclensam;
	   if(startsam<0) startsam = 0;
	   stopsam=ends[i]+seclensam;
	   if(stopsam>(long)len/2) stopsam=(long)len/2;
           printf("TRN:\t%ld\t%ld\t-1\t%d\n",startsam,stopsam-startsam-1,segcount++);
	   k=i+1;
	   i=segnr;  /* break inner loop */
         }
	 i++;
       }	 
       /* if we do not find any large enough silence to the last segment,
          output a speech segment covering all to and including the last 
	  segment */
       if(i==segnr)
       {
	 startsam=starts[k]-seclensam;
	 if(startsam<0) startsam = 0;
	 stopsam=ends[segnr-1]+seclensam;
	 if(stopsam>(long)len/2) stopsam=(long)len/2;
         printf("TRN:\t%ld\t%ld\t-1\t%d\n",startsam,stopsam-startsam-1,segcount++);
         k=segnr; /* break outer loop */
       }
     }
  } /* end speech segmentation mode */

  /* free buffer */
  SDL_FreeWAV(buffer);
  free(window);
  free(energy);
  

  return(0);
}
