+ rulper=0.7
+ inventar2=/home/kip/projekte/vorschlagstrans/inventar.txt
+ discan=0.01
+ disrule=0.01
+ '[' 2 -gt 0 ']'
+ break
+ rulout=rlind-0.7.rul
+ cellout=rlind-0.7.cel
+ gawk '
BEGIN { inr = 0; canpart="";realpart="";wt=0 }
$0 ~ /^$/ {
print kanbuf > "kanpfile"
kanbuf=""
next
}
$0 !~ /-/ {
  n=split($1,syms,",")
  lkontext = syms[n]
  kanbuf = kanbuf $0
}
$0 ~ /-/ {
  split($1,items,"-");
  if( items[1] == "" )
    {
      next
    }
  kanbuf = kanbuf items[1]
  gsub(",","",items[1])
  gsub(",","",items[2])
  getline
  n=split($1,syms,",")
  rule[lkontext "-" (items[1]=="" ? "_" : items[1]) "-" (items[2]=="" ? "_" : items[2]) "-" syms[1]]++
  rulecore[ (items[1]=="" ? "_" : items[1]) "-" (items[2]=="" ? "_" : items[2])]++
  lkontext = syms[n]
  kanbuf = kanbuf $1
}
END {
  if( kanbuf != "" )
    {
      print kanbuf > "kanpfile"
    }
  lacc = 0;
  racc = 0;
  i=0
  maxin=0
  #find out significant rulecores
  for( rc in rulecore )
    {
      if( rulecore[rc] > maxin )
        {
          maxin=rulecore[rc]
        }
      hist[rulecore[rc]]++
    }
  gesm=0
  for( i=0 ; i<=maxin ; i++ )
    {
      if( i in hist )
        {
          gesm += hist[i] * i
        }
    }
  #prue out
  accum=0;
  for( pin=maxin ; pin>0 ; pin-- )
    {
      if( pin in hist )
        {
          accum += hist[pin] * pin;
          if( accum > gesm * 0.7 )
            {
              break;
            }
         }
    }
  printf("[dicarding rules with count less than %d (%d)]\n", pin,maxin)
  i=0
  for( rc in rulecore )
    {
      if( rulecore[rc] < pin )
        {
          delete rulecore[rc]
          continue
        }
    }
  i=0
  #find out remaining kontexts and rules
  for( l in rule )
    {
      split(l,pts,"-")
      if( pts[2] "-" pts[3] in rulecore )
        {
          print rule[l] " " l > "rawrules"
          lrulek[pts[1] "-" pts[2] ]++
          rrulek[pts[2] "-" pts[4] ]++
          can[ pts[2]]++
        }
     }

  for( l in lrulek )
    {
      print l > "lsstr"
    }
  for( r in rrulek )
    {
      print r > "rsstr"
    }
  for( ca in can )
    {
      print ca > "rcsstr"
    }
}'
