//****************************************************************************
//
// CCfile: CwordVar.cc
//
// Autor: A. Kipp
// erstellt: Fri Dec 6 13:59:41 MET 1996
// veraendert:
//
// Enthaelt:
//***************************************************************************
#include <stdio.h>
#include <math.h>
#include "CwordVar.h"
#include "CCexceptions.h"
#include "iotoolbox.h"

int parseString(char* str,char*** target,char** inv,int anzinv);
int cmprapl(const void* p1,const void* p2);

static char* rulesepchars = "-,.;";
static char* defaultworkdir = "./";

char Cvargraph::insertsym[] = "_ins_";
char Cvargraph::replsym[] = "_rep_";

//***************************************************************************
// Implementierung der Klasse: CwordVar
// Autor: A. Kipp
// erstellt: Fri Dec 6 13:59:41 MET 1996 
// veraendert:
//***************************************************************************
CwordVar::CwordVar()
{
  regellist = NULL;
  allrapanz=0;
  jumpwk=0.5;
  monoback = -1.0;
  vargraph = new Cvargraph();
  mlf = new Cmlf();
  workdir=defaultworkdir;
}

void CwordVar::mlf2vargraph()
{
  mlfsegment_rec** srpp;
  Cnode* nd,*lnd = NULL;
  int i,k;

  for( i=0,srpp = mlf->slist.skipThru(init) ; 
       srpp!=NULL ; 
       srpp=mlf->slist.skipThru(next),i++)
    {
      for( k=0 ; k<vargraph->anzinv ; k++ )
	{
	  if( !strcmp( (*srpp)->symbol,vargraph->inventar[k] ) )
	    {
	      nd = vargraph->newNode();
	      ((Chgrnode*)nd)->symbol = vargraph->inventar[k];
	      nd->nodenr = i;
	      vargraph->addNode(nd);
	      if( lnd != NULL )
		{
		  vargraph->addEdge(NULL,lnd,nd);
		}
	      lnd = nd;
	      break;
	    }
	}
      if( k == vargraph->anzinv )
	{
	  warning(__FILE__,__LINE__,"%s unknown. ignoring it",(*srpp)->symbol);
	}
    }
}
  
void CwordVar::openGraphOutfile()
{
  char path[200],body[100],ext[50];
  char fn[200];
  FILE* fp;

  lobotomo(mlf->getFilename(),path,body,ext);
  vargraph->filename=strdup(mlf->getFilename());
  sprintf(fn,"%s/%s/%s.slf",workdir,path,body);

  if( (fp=fopen(fn,"w")) != NULL )
    {
      vargraph->setOut(fp);
      return;
    }

  sprintf(fn,"%s/%s.slf",workdir,body);

  if( (fp=fopen(fn,"w")) != NULL )
    {
      vargraph->setOut(fp);
      return;
    }

  sprintf(fn,"%s.slf",workdir,body);

  if( (fp=fopen(fn,"w")) != NULL )
    {
      vargraph->setOut(fp);
      return;
    }

  vargraph->setOut(stdout);
  
}

void CwordVar::run()
{
  Cvgredge* eptr,*d_eptr,ed;
  int applanz,err;
  
  while( (err=mlf->readFile()) != EOF )
    {
      if( err != 0 )
	{
	  fprintf(stderr,"Warning: couldn't read mlf\n");
	  continue;
	}
      openGraphOutfile();
    
      mlf2vargraph();
      vargraph->compact();
      vtrndanz = vargraph->nodes.compact(&ndlist);

      //this causes an internal compiler error
      //vtrrecndlist = new vtrnoderec[vtrndanz];
      //
      //so, do it awkward:
      vtrnoderec* tmp = new vtrnoderec[vtrndanz];
      vtrrecndlist = tmp;      

      applyRules();
      //setWordnums();
      if( jumpwk > 0.0 )
	{
	  //vargraph->jumpWordseps( log(jumpwk) );
	  warning(__FILE__,__LINE__,"obolete: use HBuild with sublat!");
	}
      if( monoback > 0.0 )
	{
	  insMonoBack();
	}
      vargraph->entCalc();
      vargraph->writeHGraph();
      //write Header comments
      writeHead();
      fclose(vargraph->out);
      vargraph->clear();
      
      delete [] vtrrecndlist;
    }

  delete [] vargraph->inventar;
  delete [] regellist;
}

void CwordVar::writeHead()
{
  fprintf(vargraph->out,"##entropy: %lf\n",vargraph->entropy);
  fprintf(vargraph->out,"##npath: %lf\n",vargraph->pathes);
  fprintf(vargraph->out,"##nvtr: %d\n",vtrndanz);
}

int CwordVar::readRegeln(char* regelfilename)
{
  FILE *fp;
  char *cp;
  char line[100];
  int rno; 

  rule* rulep;
  Cverklist<rule*> v_regellist;
  Cverklist<char*> v_invlist;
     
  regelanz=0;
  fp = fopen(regelfilename,"r");
  if (fp == NULL)
    {
      return -1 ;
    }


  for( rno = 0; fgets(line,100,fp) != NULL; )
    {
      rulep = rule::createRegel(line,vargraph->inventar,vargraph->anzinv);
      if( rulep != NULL )
	{
	  rulep->rulenr = rno++;
	  v_regellist.addEl(&rulep);
	}
    }
  return (regelanz = v_regellist.compact(&regellist));
}

void CwordVar::insMonoBack()
{
  //insertion of nullnodes between every vtr-node to cover unseen
  //variants

  int i;
  Cedge* edptr,*nnedptr;

  for( i=0 ; i<vtrndanz-1 ; i++ )
    {
      // add a null-node if necessary
      if( vtrrecndlist[i].nullnode == NULL )
	{
	  Cvgredge* ved = NULL;
	  vtrrecndlist[i].nullnode = (Cvgrnode*)vargraph->newNode();
	  vtrrecndlist[i].nullnode->symbol = strdup(NULLNODESYM);
	  if( vargraph->addNode(vtrrecndlist[i].nullnode) == NULL ) 
	    {
	      failure(__FILE__,__LINE__,"Internal Error1");
	    }
	  if( (edptr = vargraph->isIn(vtrrecndlist[i].vgrndp,
			    vtrrecndlist[i + 1].vgrndp) ) )
	    {
	      ved = new Cvgredge;
	      *ved = *(Cvgredge*)edptr;
	      ved->startnode =vtrrecndlist[i].vgrndp;
	      ved->endnode = vtrrecndlist[i].nullnode;
	      vargraph->delEdge(edptr);
	    }
	  vargraph->addEdge(ved,vtrrecndlist[i].vgrndp,
			    vtrrecndlist[i].nullnode);
	  vargraph->addEdge(NULL,vtrrecndlist[i].nullnode,
			    vtrrecndlist[i+1].vgrndp);

	}
      
    }
  for( i=0 ; i<vtrndanz-2 ; i++ )
    {
      // null-nodes there?
      double addprob = -1.0;

      if( !strcmp(vtrrecndlist[i+1].vgrndp->symbol,"#") )
	{
	  continue;
	}
      if( vtrrecndlist[i].nullnode == NULL || 
	  vtrrecndlist[i+1].nullnode == NULL)
	{
	  failure(__FILE__,__LINE__,"Internal Error2");
	}
      
      if( (edptr = vargraph->isIn(vtrrecndlist[i].nullnode,
				vtrrecndlist[i+1].nullnode) ) == NULL )
	{
	  edptr = vargraph->addEdge(NULL,vtrrecndlist[i].nullnode,
				vtrrecndlist[i+1].nullnode);
	}
      else
	{
	  addprob = exp( edptr->log_uewkt);
	}
	
      if( edptr == NULL )
	{
	  failure(__FILE__,__LINE__,"Internal Error33");
	}
      nnedptr = 
	vargraph->isIn(vtrrecndlist[i].nullnode,vtrrecndlist[i+1].vgrndp);
      if( nnedptr == NULL )
	{
	  failure(__FILE__,__LINE__,"Internal Error3");
	}

      if( jokers )
	{
	  Cvgrnode* vgrndp;
	  Cedge* jedptr;
	  edptr->log_uewkt = nnedptr->log_uewkt + log( monoback/3);
	  vgrndp = (Cvgrnode*)vargraph->newNode();
	  //Jokernode between consecutive nullnode
	  vgrndp->symbol = strdup(vargraph->replsym);
	  vgrndp->sublatentropy = log(vargraph->anzinv); 
	  if( vargraph->addNode(vgrndp) == NULL ) 
	    {
	      failure(__FILE__,__LINE__,"Internal Error11");
	    }
	  if( (jedptr = 
	      vargraph->addEdge(NULL,vtrrecndlist[i].nullnode,vgrndp)) 
	      == NULL )
	    {
	      failure(__FILE__,__LINE__,"Internal Error12");
	    }
	  if( vargraph->addEdge(NULL,vgrndp,vtrrecndlist[i+1].nullnode)
	      == NULL )
	    {
	      failure(__FILE__,__LINE__,"Internal Error13");
	    }
	  jedptr->log_uewkt = nnedptr->log_uewkt + log( monoback/3);
	  //Jokernode between null and vtr-node
	  vgrndp = (Cvgrnode*)vargraph->newNode();
	  vgrndp->symbol = strdup(vargraph->insertsym);
	  vgrndp->sublatentropy = log(vargraph->anzinv); 
	  if( vargraph->addNode(vgrndp) == NULL ) 
	    {
	      failure(__FILE__,__LINE__,"Internal Error14");
	    }
	  if( (jedptr = 
	      vargraph->addEdge(NULL,vtrrecndlist[i].nullnode,vgrndp)) 
	      == NULL )
	    {
	      failure(__FILE__,__LINE__,"Internal Error15");
	    }
	  jedptr->log_uewkt = nnedptr->log_uewkt + log( monoback/3);
	  if( vargraph->addEdge(NULL,vgrndp,vtrrecndlist[i+1].vgrndp)
	      == NULL )
	    {
	      failure(__FILE__,__LINE__,"Internal Error16");
	    }

	}
      else
	{
	  edptr->log_uewkt = nnedptr->log_uewkt + log( monoback);

	}
      if( addprob > 0 )
	{
	  edptr->log_uewkt = log( exp(edptr->log_uewkt) + addprob);
	} 
      nnedptr->log_uewkt += log( 1.0 - monoback);
    }
  
}

void CwordVar::scoreEdges()
{
}


void CwordVar::applyRules() 
{
  int i,j,jj,k,l;
  Cvgrnode* vgrnode;
  Cnode* nd;
  Cedge* edptr;
  ruleappl* arapl,**rapp;
  Cverklist<ruleappl*> raplist;
  double subwk;
  
  
  for( i=0 ; i<vtrndanz ; i++ )
    {
      vgrnode = (Cvgrnode*)ndlist[i];
      vtrrecndlist[i].vgrndp = vgrnode;
    }

  for( i=0 ; i<vtrndanz ; i++ )
    {
      vgrnode = (Cvgrnode*)ndlist[i];
      
      for( k=0 ; k<regelanz ; k++ )
	{ 
	  for( j=i ; j < vtrndanz  && 
		 (j-i) < regellist[k]->lsanz &&
		 ((Cvgrnode*)ndlist[j])->symbol ==
		 regellist[k]->ls[j-i] ; j++ );

	  if( (j-i) == regellist[k]->lsanz )
	    {
	      //match!!
	      arapl = new ruleappl;
	      arapl->therule = regellist[k];
	      arapl->sind = i;
	      arapl->eind = j-1;
	      vtrrecndlist[i].rapplstarts.addEl(&arapl);
	      for( jj=i ; jj<j ; jj++ )
		{
		  vtrrecndlist[jj].rappls.addEl(&arapl);
		}
	      firstPass( arapl );
	    }
	    
	} 
    }

  vargraph->scoreEdges();
}

void CwordVar::firstPass( ruleappl* rap )
{
  Cvgrnode* vgrnode=NULL,*lnp,**vgrpp;
  Cedge* edptr;
  Cverklist<Cvgrnode*> connodes;
  int i;
  
  for( i=rap->therule->inident ; 
       i<rap->therule->rsanz -rap->therule->endident;
       i++)
    {
      vgrnode = (Cvgrnode*)vargraph->addNode();
      vgrnode->symbol = rap->therule->rs[i];
      connodes.addEl(&vgrnode);
    }

  int vtrvor = rap->sind + rap->therule->inident  -1;
  int vtrnach = rap->eind - rap->therule->endident +1;


  if( rap->therule->inident > 0 )
    {
      connodes.addIniEl(&vtrrecndlist[vtrvor].vgrndp);
    }
  else
    {
      // add a null-node if necessary
      if( vtrrecndlist[vtrvor].nullnode == NULL )
	{
	  if( (edptr = vargraph->isIn(vtrrecndlist[vtrvor].vgrndp,
			    vtrrecndlist[vtrvor + 1].vgrndp) ) )
	    {
	      vargraph->delEdge(edptr);
	    }
	  vtrrecndlist[vtrvor].nullnode = (Cvgrnode*)vargraph->newNode();
	  vtrrecndlist[vtrvor].nullnode->symbol = strdup(NULLNODESYM);
	  if( vargraph->addNode(vtrrecndlist[vtrvor].nullnode) == NULL ) 
	    {
	      failure(__FILE__,__LINE__,"Internal Error");
	    }
	  vargraph->addEdge(NULL,vtrrecndlist[vtrvor].vgrndp,
			    vtrrecndlist[vtrvor].nullnode);
	  vargraph->addEdge(NULL,vtrrecndlist[vtrvor].nullnode,
			    vtrrecndlist[vtrvor+1].vgrndp);
			    
	}
      //
      connodes.addIniEl(&vtrrecndlist[vtrvor].nullnode);	  
    }
  
  if( rap->therule->endident > 0 )
    {
      connodes.addEl( &vtrrecndlist[vtrnach].vgrndp);
    }
  else
    {
      // add a null-node if necessary
      if( vtrrecndlist[vtrnach - 1].nullnode == NULL )
	{
	  if( (edptr = vargraph->isIn(vtrrecndlist[vtrnach - 1].vgrndp,
			    vtrrecndlist[vtrnach].vgrndp) ) )
	    {
	      vargraph->delEdge(edptr);
	    }
	  vtrrecndlist[vtrnach - 1].nullnode = (Cvgrnode*)vargraph->newNode();
	  vtrrecndlist[vtrnach - 1].nullnode->symbol = strdup(NULLNODESYM);
	  if( vargraph->addNode(vtrrecndlist[vtrnach - 1].nullnode) == NULL ) 
	    {
	      failure(__FILE__,__LINE__,"Internal Error");
	    }
	  vargraph->addEdge(NULL,vtrrecndlist[vtrnach -1 ].vgrndp,
			    vtrrecndlist[vtrnach - 1].nullnode);
	  vargraph->addEdge(NULL,vtrrecndlist[vtrnach -1].nullnode,
			    vtrrecndlist[vtrnach].vgrndp);
	}
      connodes.addEl( &vtrrecndlist[vtrnach-1].nullnode);
      
       rap->rexit = connodes.getAnzel() > 0 ? 
	 connodes[ connodes.getAnzel() -1 ] : 
	vtrrecndlist[rap->eind - rap->therule->endident + 1].vgrndp;
    }
  rap->csplit = vtrvor;
  rap->crecom = vtrnach;
  
  //connect it
  if( connodes.getAnzel() > 1 )
    {
      lnp = *connodes.skipThru(init);
      rap->rentry = connodes[1];
      rap->rexit = connodes[ connodes.getAnzel() - 2]; 
      for( vgrpp = connodes.skipThru(next);
	   vgrpp !=NULL  ; lnp=*vgrpp, vgrpp = connodes.skipThru(next) )
	{
	  edptr= (Cvgredge*)vargraph->addEdge(NULL,lnp,*vgrpp);
	}
    }
  else
    {
      failure(__FILE__,__LINE__,"internal error");
    }
}

rule* rule::createRegel(char* line,char** inventar,int anzinv)
{
  double wk,cwk;
  char *cp,right[50];
  int i;

  if( ( cp = strchr(line,'>')) == NULL )
    {
      return NULL;
    }
  *cp = '\0';

  if( sscanf(cp+1,"%s %lf %lf",right,&wk,&cwk) < 1 )
    {
      right[0] = '\0';
    }

  rule* aregel = new rule;
  aregel->lsanz = parseString(line,&aregel->ls,inventar,anzinv);
  aregel->rsanz = parseString(right,&aregel->rs,inventar,anzinv);
  aregel->ruleweight=wk;
  aregel->cellwk=cwk;

  if( aregel->lsanz == 0  )
    {
      delete aregel;
      return NULL;
    }
  if( aregel->rsanz == 0 )
    {
      aregel->inident=0;
      aregel->endident=0;
      return aregel;
    }

  int ld,rd;
  aregel->inident=0;
  aregel->endident=0;

  do 
    {
      ld=0;
      rd=0;
      if( aregel->ls[aregel->inident] == aregel->rs[aregel->inident] 
          &&
	  aregel->inident + aregel->endident <= aregel->lsanz )
	{
	  aregel->inident++;
	  ld=1;
	}
      if( aregel->ls[aregel->lsanz - 1 - aregel->endident] == 
	  aregel->rs[aregel->rsanz - 1 - aregel->endident] 
	  &&
	  aregel->inident + aregel->endident <= aregel->lsanz )
	{
	  aregel->endident++;
	  rd=1;
	}
    }
  while( rd || ld);

  return aregel;
}



int parseString(char* str,char*** target,char** inv,int anzinv)
{
  Cverklist<char*> v_string;
  int k;

  for( str += strspn(str,rulesepchars) ; 
       *str != '\0' ;
       str += strspn(str,rulesepchars))
    {
      for( k=0 ; k<anzinv ; k++ )
	{
	  if( !strncmp(inv[k],str,strlen(inv[k]) ) )
	    {
	      v_string.addEl(inv + k);
	      str += strlen(inv[k]);
	      break;
	    }
	}
      if(k==anzinv)
	{
	  return 0;
	}
    }
  return v_string.compact(target);
}

void CwordVar::setWordnums()
{
  int i,maxrank,wordno;
  Cvgrnode* vgrnode;
  Cnode** ndlist;
  int ndanz;
  Cedge** edptrptr;
  
  maxrank = vargraph->getRanks();
  ndanz = vargraph->nodes.compact(&ndlist);
  qsort((char*)ndlist,ndanz,sizeof(Cnode*),compranks);
  for( i=0 ; i<ndanz ; i++ )
    {
      vgrnode = (Cvgrnode*)ndlist[i];
      for( edptrptr=vgrnode->predecs.skipThru(init),
	     wordno = edptrptr==0 ? -1 : ((Cvgrnode*)(*edptrptr)->startnode)->wordnum ; 
	       edptrptr!=NULL ;
	       edptrptr=vgrnode->predecs.skipThru(next) )
	{
	  if( wordno != ((Cvgrnode*)(*edptrptr)->startnode)->wordnum )
	    {
	      warning(__FILE__,__LINE__,"Inconsistent graph %s, nodenr %d",
		      vargraph->filename,vgrnode->nodenr);
	    }
	}
      vgrnode->wordnum = 
	strpbrk(vgrnode->symbol,"#&") != NULL ? wordno+1 : wordno;
    }
}

int CwordVar::sameContext(ruleappl* r1,ruleappl* r2)
{
  if( r1->csplit != r2->csplit || r1->crecom != r2->crecom ||
      r1->therule->inident != r2->therule->inident ||
      r1->therule->endident != r2->therule->endident )
    {
      return 0;
    }
  else
    {
      return 1;
    }
}

  
int cmprapl(const void* p1,const void* p2)
{
  return (*(ruleappl**)p2)->therule->lsanz - 
    (*(ruleappl**)p1)->therule->lsanz;
}
  
