#!/bin/sh
inventar1=$1 #/home/kip/projekte/autoseg/mansegreg/kielinventar.txt
inventar2=$2 #/home/kip/projekte/vorschlagstrans/inventar.txt

# Generic option parser for speech scripts
#
# options are supplied in keyword=value format
#
# Actually do the argument parsing here

while [ $# -gt 0 ]; do
        case "$1" in
        *=*)    key=`expr "$1" : '\(.*\)=.*'`
                val=`expr "$1" : '.*=\(.*\)'`
                eval "$key"=\'"$val"\'
                unset key val
                shift ;;
        *)      break;;
        esac
done

# end option parser 

gawk '
BEGIN { n=0; kanbuf="" ; rbuf="" ; state="kan" ; wtstack=""}
  NR == 1  {
    anzinv1 = readinvent1(inventar1);
    anzinv2 = readinvent2(inventar2)
  }
  
/kend/{
  n=1;
  wl=0;
  wtstack = ""
  kanbuf = ""
  rbuf= ""
  state = "kan"
  while( getline > 0 )
    {
      if( $1 == "hend" )
        {
          break;
        }
      gsub(/  /," * ");
      gsub( /[#'\'\"',.?=+%;_]/ ,"", $0 );
      for( i=1 ; i<=NF ; i++ )
        {
          if( $i == "*" )
            {
	      if( wl == 0 )
		{
		  wtstack = "#"
		  wl=1;
		}
	      continue;
            }
	  else if( $i== "-" || $i == "-h" )
           {
	     continue
	   }

	  label = parse($i);
	  if( label !~ /unk/ )
	    {
	      if( label ~ /-/ )
		{
		  if( state != "var" )
		    {
		      print splitinsym(kanbuf wtstack)
		      wtstack = ""
		      kanbuf = ""
		      rbuf= ""
		    }
		  state = "var"
		}
	      else
		{
		  if( state != "kan" )
		    {
		      print splitinsym(kanbuf) "-" splitinsym(rbuf)
		      kanbuf = ""
		      rbuf= ""
		    }
		  state = "kan"
		}
		split(label,a,"-")
		kanbuf = kanbuf wtstack a[1]
		rbuf = rbuf wtstack a[2]
		wl = 0;
		wtstack = ""
	    }
        }
    }
  if(state=="var")
    {
      if( wl == 1 )
	{
          print splitinsym(kanbuf) "-" splitinsym(rbuf)
          print wtstack
	}
    }
  else
    {
      print splitinsym(kanbuf wtstack)
    }
 printf("\n")
}
END {
  for( u in unklist )
    {
      print u > "unklog"
    }
  }
function parse( string )
{

  n = split(string,items,"-");
  label=items[1];

  if( n == 1 )
    {
      return check(string);
    }
  else
    {
      leftside = check(items[1])
      rightside = check(items[2]);
      return leftside == rightside ? leftside : leftside "-" rightside
    }
}
function check( label )
{
  if( label == "" )
    {
      return label
    }

  for( j in symbol )
    {
      if( label == symbol[j])
        {
          return symbol[j];
        }
    }

  unklist[label]++;
  return "(unk)"
}
function splitinsym( seq )
{
  ins=seq
  out=""
  for( l=length(ins) ; l>0 ; )
    {
      for( j=0 ; j<anzinv2 ; j++ )
        {
          if( match(ins,osymbol[j]) == 1 )
	    {
	      ins = substr(ins,RLENGTH+1)
	      l-=RLENGTH
	      out = out ( out=="" ? "" : "," ) osymbol[j]
	      break
            }
        }
      if( j==anzinv2 )
       {
         return out "(error)"
       }
    }
  return out
}
function readinvent1(filename)
{
  for( i=0 ; getline < filename  >0 ; i++ )
    {
      symbol[i]=$1;
    }
  return i;
}
function readinvent2(filename)
{
  for( i=0 ; getline < filename  >0 ; i++ )
    {
      osymbol[i]=$1;
    }
  return i;
}' inventar1=$inventar1 inventar2=$inventar2 $*


