/*
 *                        Copyright (c) 1993 by
 *              PFA Philips GmbH Forschungslaboratorien Aachen
 *                            Reinhard Kneser
 *
 * $Log: lm.c,v $
 * Revision 2.3  1999/06/24  14:08:17  peters
 * Extended functionality: New routine LMSetCmdWeight added.
 *
 * Revision 2.2  1999/05/17  16:21:48  peters
 * Extended new LM interface for integration 99-II:
 * LMWordInit and LMWordScore added for ease of use.
 *
 * Revision 2.1  1999/05/15  13:07:06  peters
 * New LM interface for integration 99-II (class-based).
 *
 * Revision 1.1  1999/05/15  13:00:04  peters
 * Initial revision
 *
 * Revision 1.3  1997/03/17  11:46:30  klakow
 * Bug fixed in LMTree_Free
 *
 * Revision 1.2  1996/03/01  12:26:54  kneser
 * UnknownMap parameter implemented.
 *
 * Revision 1.1  1994/03/09  13:53:56  kneser
 * Initial revision
 *
 * =====================
 * CHANGED INTERFACE !!!
 * =====================
 *
 *  !  The new LMType has to be VMClassMGramScore instead of the
 *  !  previously used LMType WordMgramScore. The new interface
 *  !  is specified in the header file lm.h.
 *
 * This module contains an implementation of a new general 
 * language model interface which is given by the routines 
 *
 *     LMInit, LMScore and LMFree.
 *
 * If only word probabilities are used, simplified routines are provided:
 *
 *     LMWordInit and LMWordScore (plus LMFree).
 *
 * This implementation is able to read language models with
 *
 *     LMType = VMClassMGramScore.
 *
 * In this implementation we assume that the input file is ok and therefore
 * only the obvious format checks are performed. The execution of the program
 * gets terminated with an error message when an error occures.
 * The following error messages may occur:
 *
 * LMType ... not supported - This implementation can only handle language
 *    models of type VMClassMgramScore.
 * LMScore: Invalid index - The specified m-gram in a call to LMScore
 *    contains indexes which are negative or larger than the number of
 *    classes given at initialization time.
 * LM file format error: ... - When the file is corrupted or inconsistent
 *    a number of errors may occur.
 * Error allocating memory - A call to malloc, calloc or realloc failed to
 *    reserve memory.
 *
 * The implementation is written in ANSI-C and should be machine independent.
 *
 * Please send bugs and comments to {klakow,peters}@pfa.research.philips.com
 */


#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <errno.h>
#include <stdarg.h>
#include <string.h>
#include <math.h>

#include "lm.h"


#define GetMem(ptr,nmemb) ((ptr) = GetMemRout ((nmemb)*sizeof(*(ptr))))
#define MoreMem(ptr,nmemb) ((ptr) = MoreMemRout((ptr),(nmemb)*sizeof(*(ptr))))

/*
 * MaxLineLength gives the maximum number of characters per line allowed in
 * the language model file.
 */
#define MaxLineLength 256
/*
 * MGramAllocDefault is the initial number of nodes allocated for a level.
 * This will be only used when the exact number was not specified in file.
 */
#define MGramAllocDefault 2047
#define EPSILON 1e-14
/*
 * ListBlockLength is used for blockwise memory allocation while reading
 * lists of words or classes
 */
#define ListBlockLength 1024


/*
 * LMParamType contains a list of parameters.
 * Each parameter is specified by a string and holds a string as value.
 * The parameters are usually listed in a special section of the language
 * model file.
 */
typedef struct
{
  char          **Names;        /* list of the parameter name strings */
  char          **Values;       /* list of the value strings */
  int           Num;            /* number of parameters */
}
LMParamType;


/*
 * LMTreeType contains a tree structure together with score information.
 * Each node in the tree corresponds to m-grams, where going to a child node
 * with a given index is equivalent to add a class with that index to the
 * right of the previous (m-1)-gram.
 * Each node is identified by its level within the tree and the position
 * within this level. The information which a node may hold is put in a
 * two dimensional array which gets addressed by the level and the position.
 * The children of a node are implemented as a sorted list of indexes which
 * must be searched when going to a child node. In order to speed up this
 * search a list of child nodes for each possible class index is given for
 * level 1.
 */

typedef struct
{
  int           Level;
  int           Pos;
}
LMNodeType;

typedef unsigned short int LMIdxType;
typedef float LMScoreType;

typedef struct
{
  int           Depth;          /* highest level number in tree */
  int           *Alloc;         /* number of nodes allocates (per level) */
  int           *NextFree;      /* next unused node number (per level) */
  LMIdxType     **Idx;          /* index of node */
  LMScoreType   **Score;        /* score of node */
  LMIdxType     **NumChildren;  /* number of children of node */
  int           **FirstChildPos;/* position of first child of node */
  LMScoreType   **BOWeight;     /* backoff weight of node */
  int           *Lev1Pos;       /* level 1 position of given class index */
}
LMTreeType;


/*
 * LMStruct completes the definition of LMType in lm.h.
 */
struct LMStruct
{
  LMTreeType    *T;             /* tree structure holding score data */
  int           RcgNumClasses;  /* size of recognition class set */
  double        *EmitScores;    /* word emission scores for all classes */
  int           *RcgToLMIdx;    /* LUT to translate Rcg to LM class index */ 
  int           *Path;          /* tmp storage for LM indexes of ClassHist */
  int           CmdIdx;         /* LM index of class 'VM2:Command' or -1 */
  double        CmdWeight;      /* interpol. weight for command-only unigram */
  double        TxtScore;       /* - log (1. - CmdWeight) */
  /*
   * The following entries are used only for the LMWord*** interface:
   */
  int           *RcgWToCatIdx;  /* LUT to translate Rcg word to class index */
  int           *ClassHist;     /* tmp storage of Rcg class indexes of Hist */
  int           ClassHistAlloc; /* length of allocated ClassHist memory */
};


static void Error (char *Format, ...)
{
  va_list ap;

  va_start (ap, Format);
  (void) vfprintf (stderr, Format, ap);
  (void) fprintf (stderr, "\n");
  va_end (ap);
  exit (EXIT_FAILURE);
}


static void SysError (char *Format, ...)
{
  va_list ap;
  char *ErrorStr = NULL;

  va_start (ap, Format);
  /*
   *    First of all get the string associated with the last error which
   *    occured. Otherwise errno could get overwritten by some subsequent
   *    error.
   */
  if (errno)
    ErrorStr = strerror (errno);
  (void) vfprintf (stderr, Format, ap);
  (void) fprintf (stderr, "\n");
  if (ErrorStr != NULL)
    (void) fprintf (stderr, "%s\n", ErrorStr);
  va_end (ap);
  exit (EXIT_FAILURE);
}


static void *GetMemRout (int Alloc)
{
  void *ReturnPtr;

  ReturnPtr = calloc (Alloc, 1);
  if (ReturnPtr == NULL)
    Error ("Error allocating memory");
  return ReturnPtr;
}


static void *MoreMemRout (void *Ptr, int Alloc)
{
  void  *ReturnPtr;

  if (Ptr == NULL)
    ReturnPtr = malloc (Alloc);
  else
    ReturnPtr = realloc (Ptr, Alloc);
  if (ReturnPtr == NULL)
    Error ("Error allocating memory");
  return ReturnPtr;
}


static char *GetLine (FILE *In)
{
  static char Line[MaxLineLength];

  if (!fgets (Line, MaxLineLength, In))
    SysError ("Error reading from LM file");
  return Line;
}


char *StringClone (const char *Str)
{
  int Len;
  char *Clone;

  Len = strlen (Str);
  GetMem (Clone, Len + 1);
  (void) strcpy (Clone, Str);
  return Clone;
}


static LMParamType *ReadParameters (FILE *In)
{
  LMParamType *Param;
  int i;
  char *Line, Name[MaxLineLength], Value[MaxLineLength];

  GetMem (Param, 1);
  Line = GetLine (In);
  if (sscanf (Line, "# Parameters %d", &Param->Num) != 1)
    Error ("LM file format error: expected section Parameters not found");
  GetMem (Param->Names, Param->Num);
  GetMem (Param->Values, Param->Num);
  for (i = 0; i < Param->Num; i++)
  {
    Line = GetLine (In);
    if (sscanf (Line, "%s %s", Name, Value) != 2)
      Error ("LM file format error: line %d in section Parameters", i);
    GetMem (Param->Names[i], strlen (Name) + 1);
    (void) strcpy (Param->Names[i], Name);
    GetMem (Param->Values[i], strlen (Value) + 1);
    (void) strcpy (Param->Values[i], Value);
  }
  return Param;
}


static char *GetParameter (LMParamType *Param, char *Name)
{
  int i;

  for (i = 0; i < Param->Num; i++)
  {
    if (!strcmp (Param->Names[i], Name))
      return Param->Values[i];
  }
  return NULL;
}


static void FreeParameters (LMParamType *Param)
{
  int i;
  for (i = 0; i < Param->Num; i++)
  {
    free (Param->Names[i]);
    free (Param->Values[i]);
  }
  free (Param->Names);
  free (Param->Values);
  free (Param);
}


static void LMTree_AllocLevel (LMTreeType *T, int Level, int Alloc)
{
  if (Level > T->Depth) Error ("LM file format error: more than M levels");
  T->Alloc[Level] = Alloc;
  MoreMem (T->Idx[Level], T->Alloc[Level]);
  MoreMem (T->Score[Level], T->Alloc[Level]);
  if (Level < T->Depth)
  {
    MoreMem (T->NumChildren[Level], T->Alloc[Level]);
    MoreMem (T->FirstChildPos[Level], T->Alloc[Level]);
    MoreMem (T->BOWeight[Level], T->Alloc[Level]);
  }
}


static void LMTree_AllocChildren (LMTreeType *T, LMNodeType *Node,
                                  int NumChildren)
{
  T->NumChildren[Node->Level][Node->Pos] = NumChildren;
  T->FirstChildPos[Node->Level][Node->Pos] = T->NextFree[Node->Level + 1];
  T->NextFree[Node->Level + 1] += NumChildren;

  while (T->NextFree[Node->Level + 1] > T->Alloc[Node->Level + 1])
    LMTree_AllocLevel (T, Node->Level + 1, T->Alloc[Node->Level + 1] * 2 + 1);
}


LMTreeType *LMTree_Init (int Depth)
{
  LMTreeType *T;

  GetMem (T, 1);
  T->Depth = Depth;
  GetMem (T->Alloc, Depth + 1);
  GetMem (T->NextFree, Depth + 1);
  GetMem (T->Idx, Depth + 1);
  GetMem (T->Score, Depth + 1);
  GetMem (T->NumChildren, Depth + 1);
  GetMem (T->FirstChildPos, Depth + 1);
  GetMem (T->BOWeight, Depth + 1);
  return T;
}


static void LMTree_Free (LMTreeType *T)
{
  int i;

  for (i = 0; i < T->Depth; i++)
  {
    if (T->Idx[i]) free (T->Idx[i]);
    if (T->Score[i]) free (T->Score[i]);
    if (T->NumChildren[i]) free (T->NumChildren[i]);
    if (T->FirstChildPos[i]) free (T->FirstChildPos[i]);
    if (T->BOWeight[i]) free (T->BOWeight[i]);
  }
  if (T->Idx[T->Depth]) free (T->Idx[T->Depth]);
  if (T->Score[T->Depth]) free (T->Score[T->Depth]);
  free (T->Idx);
  free (T->Score);
  free (T->NumChildren);
  free (T->FirstChildPos);
  free (T->BOWeight);
  free (T->NextFree);
  free (T->Alloc);
  if (T->Lev1Pos) free (T->Lev1Pos);
  free (T);
}


static int LMTree_FindChild (LMTreeType *T, int Idx, LMNodeType *Node)
{
  int Pos1, Pos2, TestPos;
  LMIdxType *IdxList;

  if (Node->Level >= T->Depth)
    return 0;

  if (Node->Level == 0)
  {
    Pos2 = T->Lev1Pos[Idx];
    if (Pos2 < 0)
      return 0;
    else
    {
      Node->Level++;
      Node->Pos = Pos2;
      return 1;
    }
  }

  Pos1 = T->FirstChildPos[Node->Level][Node->Pos];
  Pos2 = Pos1 + T->NumChildren[Node->Level][Node->Pos] - 1;
  IdxList = T->Idx[Node->Level + 1];

  while (Pos1 <= Pos2)
  {
    TestPos = (Pos1 + Pos2) / 2;
    if ((int) IdxList[TestPos] < Idx)
      Pos1 = TestPos + 1;
    else if ((int) IdxList[TestPos] > Idx)
      Pos2 = TestPos - 1;
    else
    {
      Node->Level++;
      Node->Pos = TestPos;
      return 1;
    }
  }

  return 0;
}


static void LMTree_FindNode (LMTreeType *T, int *ClassHist, int HistLen,
                             LMNodeType *Node)
{
  int i;

  Node->Level = 0; Node->Pos = 0;
  for (i = HistLen - 1; i >= 0; i--)
  {
    if (!LMTree_FindChild (T, ClassHist[i], Node))
      break;
  }
}


static int ReadNode (FILE *In, LMTreeType *T, LMNodeType *Father)
{
  int Idx, NumChildren, Pos1;
  int LastIdx, ChildIdx;
  float Score, BOWeight;
  LMNodeType Child;
  char *Line;

  Line = GetLine (In);
  if (Father->Level == T->Depth)
  {
    if (sscanf (Line, "%d %f", &Idx, &Score) != 2)
      Error ("LM file format error: end node (%d,%d)",
             Father->Level, Father->Pos);
    T->Idx[Father->Level][Father->Pos] = Idx;
    T->Score[Father->Level][Father->Pos] = Score;
  }
  else
  {
    if (Father->Level == 0)
    {
      if (sscanf (Line, "%f %f", &Score, &BOWeight) != 2)
        Error ("LM file format error: root node (%d,%d)",
               Father->Level, Father->Pos);
      Idx = 0;
    }
    else
    {
      if (sscanf (Line, "%d %f %f", &Idx, &Score, &BOWeight) != 3)
        Error ("LM file format error: node (%d,%d)",
               Father->Level, Father->Pos);
    }
    T->Idx[Father->Level][Father->Pos] = Idx;
    T->Score[Father->Level][Father->Pos] = Score;
    T->BOWeight[Father->Level][Father->Pos] = BOWeight;
    Line = GetLine (In);
    if (sscanf (Line, "%d", &NumChildren) != 1)
      Error ("LM file format error: node (%d,%d)",
             Father->Level, Father->Pos);
    LMTree_AllocChildren (T, Father, NumChildren);
    Child.Level = Father->Level + 1;
    Pos1 = T->FirstChildPos[Father->Level][Father->Pos];
    LastIdx = -1;
    for (Child.Pos = Pos1; Child.Pos < Pos1 + NumChildren; Child.Pos++)
    {
      if ((ChildIdx = ReadNode (In, T, &Child)) <= LastIdx)
        Error ("LM file format error: tree not sorted");
      LastIdx = ChildIdx; 
    }
  }
  return Idx;
}


static int SearchEntry (char *Key, char **Classes, int *RankToIdx,
                        int NumClasses)
{
  int Idx1, Idx2, CmpVal;
  static int Test = -1;
  
  Idx1 = 0;
  Idx2 = NumClasses - 1;
  /*
   * This routine is called for each class in the LM class-list.
   * Since we assume that the LM class-list is more or less sorted and
   * that the LM class-list and the sorted recognition class-list are the
   * same over long periods we take the class following the class found the
   * last time as first guess.
   */
  if (++Test > Idx2)
    Test = (Idx1 + Idx2) / 2;

  while (Idx1 <= Idx2)
  {
    CmpVal = strcmp (Classes[RankToIdx[Test]], Key);
    if (CmpVal < 0)
      Idx1 = Test + 1;
    else if (CmpVal > 0)
      Idx2 = Test - 1;
    else
      return RankToIdx[Test];
    Test = (Idx1 + Idx2) / 2;
  }
  Test = -1;
  return -1;
}


static void SortEntries (char **Classes, int *Idx, int Num)
{
  int Test, Child, Father, Last, IdxTest;
  char *TestClass;

  for (Test = 0; Test < Num; Test++)
    Idx[Test] = Test;
  Test = Num/2;
  Last = Num - 1;

  for (;;)
  {
    if (Test > 0)
      IdxTest = Idx[--Test];
    else
    {
      IdxTest = Idx[Last];
      Idx[Last] = Idx[0];
      if (--Last == 0)
      {
        Idx[0] = IdxTest;
        return;
      }
    }
    TestClass = Classes[IdxTest];

    Father = Test;
    Child = 2 * Father + 1;
    while (Child <= Last)
    {
      if (Child < Last &&
          strcmp (Classes[Idx[Child]], Classes[Idx[Child+1]]) < 0)
        Child++;
      if (strcmp (TestClass, Classes[Idx[Child]]) >= 0)
        break;
      Idx[Father] = Idx[Child]; Father = Child;
      Child = 2 * Father + 1;
    }
    Idx[Father] = IdxTest;
  }
}


static int ReadClassList (FILE *In, char **RcgClasses, int RcgNumClasses,
                          int *RcgToLMIdx, int *CmdIdx)
{
  int i, RcgIdx, LMNumClasses;
  int *RcgCRankToIdx;
  char *Line;

  GetMem (RcgCRankToIdx, RcgNumClasses);
  SortEntries (RcgClasses, RcgCRankToIdx, RcgNumClasses);

  Line = GetLine (In);
  if (sscanf (Line, "# ClassList %d", &LMNumClasses) != 1)
    Error ("LM file format error: expected section ClassList not found");

  if (LMNumClasses != RcgNumClasses)
    Error ("LMInit error: Class lists of LM and recognizer differ");
  
  for (i = 0; i < RcgNumClasses; i++)
    RcgToLMIdx[i] = -1;
  for (i = 0; i < LMNumClasses; i++)
  {
    Line = GetLine (In);
    /*
     * The end of our class is defined by whitespace
     */
    Line[strcspn (Line, " \t\r\n\f\v")] = 0;
    RcgIdx = SearchEntry (Line, RcgClasses, RcgCRankToIdx, RcgNumClasses);
    if (RcgIdx >= 0)
      RcgToLMIdx[RcgIdx] = i;
    /*
     * Check whether current class is "VM2:Command" -> if yes: set *CmdIdx
     */
    if (strcmp (Line, "VM2:Command") == 0)
      *CmdIdx = i;
  }

  /*
   * Check that an LM class list entry exists for 
   * all entries of the recognizer's class list
   */
  for (i = 0; i < RcgNumClasses; i++)
    if (RcgToLMIdx[i] < 0)
      Error ("LMInit error: Class lists of LM and recognizer differ");
  
  free (RcgCRankToIdx);
  return LMNumClasses;
}


static int ReadSizes (char *SizeFile, char ***ClassList, int **ClassSizes)
{
  int ClassListAlloc, NumClasses, Size;
  char Line[MaxLineLength], Class[MaxLineLength], Rest[MaxLineLength];
  FILE *In;

  In = fopen (SizeFile, "r");
  if (!In)
    SysError ("Error opening class size file %s", SizeFile);

  ClassListAlloc = ListBlockLength;
  GetMem (*ClassList, ClassListAlloc);
  GetMem (*ClassSizes, ClassListAlloc);

  NumClasses = 0;
  while (fgets (Line, MaxLineLength, In))
  {
    NumClasses++;
    if (sscanf (Line, "%s %d %s", Class, &Size, Rest) != 2)
      Error ("Two column input expected in line %d of class size file",
             NumClasses);
    if (NumClasses > ClassListAlloc)
    {
      ClassListAlloc += ListBlockLength;
      MoreMem (*ClassList, ClassListAlloc);
      MoreMem (*ClassSizes, ClassListAlloc);
    }
    (*ClassList)[NumClasses-1] = StringClone (Class);
    (*ClassSizes)[NumClasses-1] = Size;
  }

  (void) fclose (In);
  return NumClasses;
}


static int *ReadMap (char *MapFile, char **ClassList, int NumClasses,
                     char **RcgWords, int RcgNumWords)
{
  int i, NumWords, CatIdx, RcgWIdx;
  int *ClassRankToIdx, *RcgWRankToIdx, *RcgWToCatIdx;
  char Line[MaxLineLength], Class[MaxLineLength], Word[MaxLineLength],
       Rest[MaxLineLength];
  FILE *In;

  GetMem (ClassRankToIdx, NumClasses);
  GetMem (RcgWRankToIdx, RcgNumWords);
  SortEntries (ClassList, ClassRankToIdx, NumClasses);
  SortEntries (RcgWords, RcgWRankToIdx, RcgNumWords);

  In = fopen (MapFile, "r");
  if (!In)
    SysError ("Error opening map file %s", MapFile);

  GetMem (RcgWToCatIdx, RcgNumWords);
  for (i = 0; i < RcgNumWords; i++)
    RcgWToCatIdx[i] = -1;

  NumWords = 0;
  while (fgets (Line, MaxLineLength, In))
  {
    NumWords++;
    if (NumWords > RcgNumWords)
      Error ("Map file contains more words than recognizer's word list");
    if (sscanf (Line, "%s %s %s", Word, Class, Rest) != 2)
      Error ("Two column input expected in line %d of map file", NumWords);
    RcgWIdx = SearchEntry (Word, RcgWords, RcgWRankToIdx, RcgNumWords);
    CatIdx = SearchEntry (Class, ClassList, ClassRankToIdx, NumClasses);
    RcgWToCatIdx[RcgWIdx] = CatIdx;
  }

  for (i = 0; i < RcgNumWords; i++)
    if (RcgWToCatIdx[i] == -1)
      Error ("Recognizer's word %s not contained in map file", RcgWords[i]);

  (void) fclose (In);
  free (ClassRankToIdx);
  free (RcgWRankToIdx);
  return RcgWToCatIdx;
}
  

static double LMScore_LMIdx (LMTreeType *T, int *ClassHist, int HistLen)
{
  LMNodeType Node;
  double Score;

  if (HistLen <= 0)
    return T->Score[0][0];
  LMTree_FindNode (T, ClassHist + 1, HistLen - 1, &Node);
  if (Node.Level != HistLen - 1)
    Score = LMScore_LMIdx (T, ClassHist, HistLen - 1);
  else
  {
    if (LMTree_FindChild (T, ClassHist[0], &Node))
      Score = T->Score[Node.Level][Node.Pos];
    else
    {
      Score = T->BOWeight[Node.Level][Node.Pos];
      Score += LMScore_LMIdx (T, ClassHist, Node.Level);
    }
  }
  return Score;
}


LMType *LMInit (char *LMFile, char *RcgClasses[], int *RcgClassSizes,
                int RcgNumClasses)
{
  FILE *In;
  LMParamType *Param;
  LMTreeType *T;
  LMType *LM;
  LMNodeType Node;
  int i, LMNumClasses, NumLines, Level, Pos;
  char *Value, *Line;

  In = fopen (LMFile, "r");
  if (!In)
    SysError ("Error opening LM file %s", LMFile);

  Param = ReadParameters (In);
  Value = GetParameter (Param, "LMType");
  if (!Value) Error ("LM file format error: parameter LMType not specified");
  if (strcmp (Value, "VMClassMgramScore"))
    Error ("LMType %s not supported", Value);

  GetMem (LM, 1);
  LM->CmdIdx = -1;
  LM->CmdWeight = 0.;
  LM->TxtScore = 0.;
  LM->ClassHist = NULL;
  LM->RcgWToCatIdx = NULL;
  LM->RcgNumClasses = RcgNumClasses;
  GetMem (LM->RcgToLMIdx, RcgNumClasses);
  LMNumClasses = ReadClassList (In, RcgClasses, RcgNumClasses, LM->RcgToLMIdx,
				&(LM->CmdIdx));

  /*
   * Define zerogram emission scores for each class (LM indexed)
   */
  GetMem (LM->EmitScores, RcgNumClasses);
  for (i = 0; i < RcgNumClasses; i++)
  {
    if (RcgClassSizes[i] <= 0)
    {
      Error ("LMInit error: non-positive class size $d not allowd",
             RcgClassSizes[i]);
    }
    LM->EmitScores[LM->RcgToLMIdx[i]] = log ((double) RcgClassSizes[i]);
  }

  Value = GetParameter (Param, "M");
  if (!Value) Error ("LM file format error: parameter M not specified");
  LM->T = T = LMTree_Init (atoi (Value));
  GetMem (LM->Path, T->Depth);
  LMTree_AllocLevel (T, 0, 1);
  for (Level = 1; Level <= T->Depth; Level++)
  {
    char String[30];
    int MGramAlloc = 0;

    (void) sprintf (String, "Num%dGram", Level);
    Value = GetParameter (Param, String);
    if (Value)
      MGramAlloc = atoi (Value);
    if (MGramAlloc <= 0)
      MGramAlloc = MGramAllocDefault;
    LMTree_AllocLevel (T, Level, MGramAlloc);
  }

  Line = GetLine (In);
  if (sscanf (Line, "# ScoreTree %d", &NumLines) != 1)
    Error ("LM file format error: expected section ScoreTree not found");
  Node.Level = 0; Node.Pos = 0;
  (void) ReadNode (In, T, &Node);

  /*
   * Get a list for the position of all level 1 nodes in order to be able 
   * to speed up the search.
   */
  GetMem (T->Lev1Pos, LMNumClasses);
  for (i = 0; i < LMNumClasses; i++)
    T->Lev1Pos[i] = -1;
  for (Pos = T->FirstChildPos[0][0];
       Pos < T->FirstChildPos[0][0] + T->NumChildren[0][0];
       Pos++)
  {
    T->Lev1Pos[T->Idx[1][Pos]] = Pos;
  }

  FreeParameters (Param);
  (void) fclose (In);
  return LM;
}


LMType *LMWordInit (char *LMFile, char *MapFile, char *SizeFile,
                    char *RcgWords[], int RcgNumWords)
{
  int i, NumClasses, NumWords;
  int *ClassSizes;
  char **ClassList;
  LMType *LM;

  NumClasses = ReadSizes (SizeFile, &ClassList, &ClassSizes);
  NumWords = 0;
  for (i = 0; i < NumClasses; i++)
    NumWords += ClassSizes[i];
  if (NumWords != RcgNumWords)
    Error ("RcgNumWords differs from sum of class sizes");

  LM = LMInit (LMFile, ClassList, ClassSizes, NumClasses);
  LM->RcgWToCatIdx = ReadMap (MapFile, ClassList, NumClasses,
                              RcgWords, RcgNumWords);
  LM->ClassHistAlloc = 3;
  GetMem (LM->ClassHist, 3); /* default: max. trigram expected */

  for (i = 0; i < NumClasses; i++)
    free (ClassList[i]);
  free (ClassList);
  free (ClassSizes);

  return LM;
}


double LMScore (LMType *LM, int *ClassHist, int HistLen)
{
  int PathLen, i;
  int *Path;
  double Score;

  Path = LM->Path;
  PathLen = LM->T->Depth;
  if (HistLen < PathLen)
    PathLen = HistLen;
  for (i = 0; i < PathLen; i++)
  {
    if (ClassHist[i] < 0 || ClassHist[i] >= LM->RcgNumClasses)
      Error ("LMScore: invalid index %d", ClassHist[i]);
    Path[i] = LM->RcgToLMIdx[ClassHist[i]];
  }
  Score = LMScore_LMIdx (LM->T, Path, PathLen);
  Score += LM->TxtScore;

  if (Path[0] == LM->CmdIdx)
    Score = - log (LM->CmdWeight + exp (- Score));
  
  Score += LM->EmitScores[Path[0]];

  return Score;
}


double LMWordScore (LMType *LM, int *WordHist, int HistLen)
{
  int i;
  int *ClassHist, *RcgWToCatIdx;
  
  RcgWToCatIdx = LM->RcgWToCatIdx;
  if (RcgWToCatIdx == NULL)
    Error ("LM not initialized via LMWordInit");

  if (HistLen > LM->ClassHistAlloc)
  {
    LM->ClassHistAlloc = HistLen;
    MoreMem (LM->ClassHist, HistLen);
  }
  ClassHist = LM->ClassHist;

  for (i = 0; i < HistLen; i++)
    ClassHist[i] = RcgWToCatIdx[WordHist[i]];

  return LMScore (LM, ClassHist, HistLen);
}


void LMFree (LMType *LM)
{
  LMTree_Free (LM->T);
  free (LM->EmitScores);
  free (LM->RcgToLMIdx);
  free (LM->Path);
  if (LM->ClassHist)
    free (LM->ClassHist);
  if (LM->RcgWToCatIdx)
    free (LM->RcgWToCatIdx);
  free (LM);
}


void LMSetCmdWeight (LMType *LM, double CmdWeight)
{
  if (CmdWeight < 0. || CmdWeight >= 1.)
    Error ("CmdWeight not in interval [0,1)");
  if (LM->CmdIdx == -1 && CmdWeight != 0.)
    Error ("VM2:Command not contained in LM's class list -> do not set CmdWeight");
  
  LM->CmdWeight = CmdWeight;
  LM->TxtScore = - log (1. - CmdWeight);

  return;
}
