/*
 *                        Copyright (c) 1992 by
 *              Philips GmbH Forschungslaboratorien, Aachen
 *
 * FACILITY:
 *  Language Modeler
 *
 * ABSTRACT:
 *  lmmain.h - Contains the declarations of the 99-II-integration
 *             interface routines for the language model plus an
 *             extension for handling the command class VM2:Command.
 *
 * ========================
 * EXTENDED FUNCTIONALITY :
 * ========================
 *       If the class VM2:Command is contained in the LM's class list,
 *       it's history independent probability may be increased using
 *       the new routine LMSetCmdWeight. In general, the class transition
 *       probabilities will have the following form:
 *
 *       p (class | class-hist) =
 *                             CmdWeight  * delta  (class, VM2:Command) +
 *                      + (1 - CmdWeight) * p_text (class | class-hist)
 *
 *       Here, delta (x, y) is 1 if x==y and is 0 otherwise. 
 *       p_text is an LM trained for command free text (i.e. 
 *       p_text (VM2:Command | class-hist) is always very small).
 *       After LM initialization, CmdWeight is always set to zero.
 *       LMSetCmdWeight allows to set it to any value between 0 and 1.
 *
 * ====================
 * CHANGED INTERFACES !
 * ====================
 *       As opposed to previous LMs and their interface, the new
 *       LMs and their interface are class-based. Several routines
 *       are offered for different tasks. Especially, simplified
 *       routines are provided for word probabilities (if no class
 *       probabilities are needed).
 *
 * ================
 * CHANGED LMType !
 * ================
 *       The new LMType has to be VMClassMGramScore instead of the
 *       previously used LMType WordMgramScore.
 *
 *-----------------------------------------------------------------------------
 *
 * OVERVIEW:
 * 
 *  For those who do not want to care about word class mappings
 *  and who want to use _ONLY_ _WORD_ transition probabilities: 
 *  
 *   - LMWordInit      (this is used instead of LMInit)
 *   - LMWordScore     (this is used instead of LMScore)
 *   - LMSetCmdWeight  is used in the general form
 *   - LMFree          is used in the general form
 *   
 *  The general version of the new LM interface uses the routines:
 *  
 *   - LMInit
 *   - LMScore
 *   - LMSetCmdWeight
 *   - LMFree
 *   
 *  The following description will start with the simpler routines
 *  LMWordInit, LMWordScore, LMSetCmdWeight, and LMFree. Thereafter, 
 *  the general routines LMInit and LMScore are described.
 *
 *-----------------------------------------------------------------------------
 *
 *  LMWordInit:
 *      Initialize a language model. Read the needed data about the language 
 *      model from a data file. Read the mapping from words to classes and
 *      the class sizes from two other data files. Precalculate the lm scores
 *      for the wanted model. A message is put to stdout if an error occurs 
 *      and the routine returns NULL.
 *
 *      LMFile          - Name of lm data file.
 *      MapFile         - Two columns file listing all words and the names
 *                        of their associated classes.
 *      SizeFile        - Two colums file listing all classes and their
 *                        sizes (the number of words contained therein).
 *      RcgWords        - Array of pointers to the strings of the wanted
 *                        vocabulary. The order of the words determines
 *                        the index of the words, starting with index 0.
 *                        No references will be made later on (after
 *                        initialization) to neither the pointers nor the
 *                        strings, thus the memory may be freed after this
 *                        initialization.
 *      RcgNumWords     - Number of words in the RcgWords.
 *      Return Value    - Pointer to a structure specifying the language model.
 *                        NULL if an error occurred.
 *
 *  LMSetCmdWeight:
 *      Routine to change the command class probability as described
 *      under "EXTENDED FUNCTIONALITY". An error will occur, if the
 *      LM's class list does not contain VM2:Command.
 *
 *      LM              - Structure specifying the language model.
 *      CmdWeight       - New value 0 <= CmdWeight < 1. 
 *      
 *  LMWordScore:
 *      Routine to get the negative logarithm of the probability of a word
 *      given its history according to the language model. 
 *
 *      LM              - Structure specifying the language model.
 *      WordHist        - Array with the indexes of the word sequence for
 *                        which we want to get the transition probability:
 *                        WordHist[0] = index of the current (predicted) word
 *                        WordHist[1] = index of the last word of the history
 *                        WordHist[2] = index of the last but one history word
 *      HistLength      - Number of words in ClassHist (incl. ClassHist[0]).
 *      Return Value    - Negative logarithm of probability of the current
 *                         word or class.
 *
 *  LMFree:
 *      Free all resources allocated for a language model.
 *
 *      LM              - Structure specifying the language model
 *
 *-----------------------------------------------------------------------------
 *
 *  The general interface works as follows:
 *      During LMInit, a list of class names and class sizes is specified 
 *      instead of the word list. The scores returned by LMScore describe 
 *      the transition probability from the specified history to the 
 *      specified current (predicted) class multiplied by a uniform word 
 *      emission probability of 1 / ClassSizes[current class].
 *      If ClassSizes[*] is set to 1, the word emission probability drops 
 *      out, and a class transition probability score is returned.
 *  
 *  LMInit:
 *      Initialize a language model. Read the needed data about the language 
 *      model from a data file. Extract the information for the desired
 *      class list and precalculate the lm scores for the wanted model.
 *      A message is put to stdout if an error occurs and the routine returns
 *      NULL.
 *
 *      LMFile          - Name of lm data file.
 *      Classes         - Array of pointers to the strings of the wanted
 *                        classes. The order of the classes determines the
 *                        index of the classes, starting with index 0. 
 *                        No references will be made later on (after
 *                        initialization) to neither the pointers nor the
 *                        strings, thus the memory may be freed after this
 *                        initialization.
 *      ClassSizes      - For word transition scores: Array of integers
 *                             indicating the number of words in the different
 *                             classes (as indexed in the array Classes).
 *                         For class transition scores: Array with all
 *                             entries = 1.
 *      NumClasses      - Number of classes in arrays Classes and ClassSizes.
 *      Return Value    - Pointer to a structure specifying the language model.
 *                        NULL if an error occurred.
 *
 *  LMScore:
 *      Routine to get the negative logarithm of the probability of a word
 *      or a class given its history according to the language model. 
 *
 *      LM              - Structure specifying the language model.
 *      ClassHist       - Array with the indexes of the class sequence for
 *                        which we want to get the transition probability:
 *                        For word transition probabilities:
 *                            ClassHist[0] = index of the class containing
 *                                           the current (predicted) word
 *                            ClassHist[1] = index of the class containing
 *                                           the last word of the history
 *                            ClassHist[2] = index of the class containing
 *                                           the last but one history word
 *                        For class transition probabilities (LMInit must
 *                        have been called using ClassSizes[*] == 1):
 *                            ClassHist[0] = index of the current 
 *                                           (predicted) class 
 *                            ClassHist[1] = index of the class containing
 *                                           the last word of the history
 *                            ClassHist[2] = index of the class containing
 *                                           the last but one history word
 *                            etc.
 *      HistLength      - Number of words in ClassHist (incl. ClassHist[0]).
 *      Return Value    - Negative logarithm of probability of the current
 *                         word or class.
 *
 *  LMFree:
 *      as described above
 *
 *-----------------------------------------------------------------------------
 *
 * ENVIRONMENT:
 *      VAX/VMS; ULTRIX
 *
 * AUTHOR:
 *  Reinhard Kneser, Jochen Peters
 *
 * CREATION DATE: 30-Jan-1992
 *
 * MODIFICATION HISTORY:
 *
 * $Log: lm.h,v $
 * Revision 2.3  1999/06/24  14:08:47  peters
 * Extended functionality: New routine LMSetCmdWeight added.
 *
 * Revision 2.2  1999/05/17  16:22:22  peters
 * Extended new LM interface for integration 99-II:
 * LMWordInit and LMWordScore added for ease of use.
 *
 * Revision 2.1  1999/05/15  13:06:22  peters
 * New LM interface for integration 99-II (class-based).
 *
 * SccsId "@(#)lmmain.h 1.2 2/27/92 Philips Forschungslaboratorien, Aachen, (c)1992"
 */

#ifndef lmmain_h
#define lmmain_h

typedef struct LMStruct LMType;

/*
 * General interface (word and class probabilities as explained above):
 */

LMType *LMInit (char *LMFile, char *Classes[],
                 int *ClassSizes, int NumClasses);
double LMScore (LMType *LM, int *ClassHist, int HistLength);

/*
 * Simplified interface if only word probabilities are used:
 */

LMType *LMWordInit (char *LMFile, char *MapFile, char *SizeFile,
                    char *RcgWords[], int RcgNumWords);
double LMWordScore (LMType *LM, int *WordHist, int HistLength);

/*
 * General free routine:
 */

void LMFree (LMType *LM);

/*
 * General routine for changing the command class probability:
 */

void LMSetCmdWeight (LMType *LM, double CmdWeight);

#endif
