#! /usr/bin/python2.7

import re
import os
import sys
import tempfile
import time
import subprocess
import shutil

class ChunkerWrapper:

    
    def __init__(self):
        
        self.VERSION = "0.19"
        self.PARAMPREFIX = "PARAM."
        
        self.MAUSFILES = {"--hmminventar": "HMMINVENTAR", "--graphinventar": "GRAPHINVENTAR", \
                "--kaninventar": "KANINVENTAR",
                "--ruleset": "rml-0.95.rul", "--dict": "DICT", "--mmffile": "MMF.mmf"}
        
        self._configkeys = {}
        self._arguments = {}
        self._clear = True
        self.set_paths()

    def infer_arguments(self):
    
        self._arguments["--wordvar"] = os.path.join(self._mausroot, "word_var-2.0")
        self._arguments["--exceptionfile"] = os.path.join(self._configdir, "EXCEPTIONS")
        self._arguments["--workdir"] = self._workdir
        self._arguments["--logfile"] = os.path.join(self._workdir, "log")
        self._arguments["--config"] = self._configfile
        

        for key in self.MAUSFILES: 
            self._arguments[key] = os.path.join(self._paramdir, self.MAUSFILES[key])
        
        self.get_warning_duration()

    def make_ipa_dict(self):
        mapping = os.path.join(self._mausroot, "PARAM.SAMPA", "KANINVENTAR.inv")
        olddict = self._arguments["--dict"]
        newdict = os.path.join(self._workdir, "IPADICT")
        newgraph = os.path.join(self._workdir, "IPAGRAPHINVENTAR")
        newkan = os.path.join(self._workdir, "IPAKANINVENTAR")
        
        maus_ipa = {}

        with open(mapping) as mappinghandle:
            for line in mappinghandle:
                splitline = line.split("\t")
                if len(splitline) >= 3:
                    maus_ipa[splitline[0]] = splitline[2]

        with open(olddict) as olddicthandle:
            with open(newdict, "w") as newdicthandle:
                newkanhandle = open(newkan, "w")
                with open(newgraph, "w") as newgraphhandle:
                    for line in olddicthandle:
                        splitline = line.split()
                        if len(splitline) <= 1:
                            newdicthandle.write(line)
                        else:
                            fromlabel = splitline[0]
                            tolabel = splitline[1]
                            
                            if fromlabel.startswith("P") and len(fromlabel) > 1 and re.match("[0-9]", fromlabel[1]):
                                fromlabel = fromlabel[1:]
                            
                            if fromlabel == "#" or splitline[1].startswith("<"):
                                newdicthandle.write(line)
                                newgraphhandle.write(fromlabel + "\n")
                            
                            else:
                                if not fromlabel in maus_ipa or maus_ipa[fromlabel] == "":
                                    print >> sys.stderr, "ERROR: Could not match ", fromlabel, " in IPA-SAMPA mapping"

                                newdicthandle.write(maus_ipa[fromlabel] + " " + tolabel + "\n")
                                newgraphhandle.write(maus_ipa[fromlabel] + "\n")
                                newkanhandle.write(maus_ipa[fromlabel] + "\n")

        newkanhandle.close()
        self._arguments["--dict"] = os.path.join(self._workdir, "IPADICT")
        self._arguments["--graphinventar"] = os.path.join(self._workdir, "IPAGRAPHINVENTAR")
        self._arguments["--kaninventar"] = os.path.join(self._workdir, "IPAKANINVENTAR")

        

    def set_paths(self):
        self._thisdir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
        self._mausroot = os.path.dirname(self._thisdir)
        self._configdir = os.path.join(self._mausroot, "Chunker", "Config")
        self._masterconfig = os.path.join(self._configdir, "master.config")
        self._docfile = os.path.join(self._thisdir, "README")
        self._chunkerexe = os.path.join(self._thisdir, "chunker.binary")
        self._workdir = os.path.join(tempfile.gettempdir(), "chunker_" + str(int(time.time())) + "_" + str(os.getpid()))
        self._configfile = os.path.join(self._workdir, "config")
        
        if not os.path.exists(self._chunkerexe):
            print >> sys.stderr, "ERROR: Could not file chunker binary executable:", self._chunkerexe
            sys.exit(1)

    def read_arguments(self):
        
        if len(sys.argv) <= 1:
            return

        for i in range(1, len(sys.argv[1:])):
            if sys.argv[1:][i-1].startswith("--") or sys.argv[1:][i-1].startswith("-"):
                arg = sys.argv[1:][i-1]
                if not arg.startswith("--"):
                    arg = "-" + arg
                self._arguments[arg] = sys.argv[1:][i]


    def read_masterconfig(self):
        if not os.path.exists(self._masterconfig):
            print >> sys.stderr, "ERROR: Cannot find master config file:", self._masterconfig
            sys.exit(1)

        with open(self._masterconfig) as masterconfighandle:
            for line in masterconfighandle:
                splitline = line.lower().split()
                if len(splitline) > 1:
                    if not splitline[0] in self._configkeys:
                        self._configkeys[splitline[0]] = splitline[1]

    def print_documentation(self):
        self.read_masterconfig()
        if not os.path.exists(self._docfile):
            print >> sys.stderr, "ERROR: Tried to print parameter information but the README does not seem to exist:", \
                    self._docfile
            sys.exit(1)
            
        with open(self._docfile) as handle:
            writing = False
            for line in handle:
                line = line.rstrip()

                if line.startswith("</help>"):
                    writing = False
                    
                if writing:
                    default = ""
                    if line.startswith("--") and line.split()[0][2:] in self._configkeys:
                        default = "<default="+self._configkeys[line.split()[0][2:]]+">"        
                    print >> sys.stderr, line, default

                if line.startswith("<help>"):
                    writing = True


        self.print_all_languages()

    def print_all_languages(self):
        languages = sorted([re.sub(self.PARAMPREFIX, "", f) for f in os.listdir(self._mausroot) if f.startswith(self.PARAMPREFIX)], key = lambda x:x.lower())
        print >> sys.stderr, "#" * 69
        print >> sys.stderr, "#" * 29 + " LANGUAGES " + "#" * 29
        print >> sys.stderr, "#" * 69 + "\n"

        print >> sys.stderr, "Possible values for the --language option"
        print >> sys.stderr, "(note that some of these are duplicates, e.g. 'deu-DE' and 'deu'):\n"

        lines = [", ".join(languages[i:i+6]) for i in range(0, len(languages), 6)]
        print >> sys.stderr, ",\n".join(lines)

        print >> sys.stderr, "\n"

    def print_version(self):
        print >> sys.stdout, self.VERSION

    def run_chunker(self):
        
        self.read_arguments()
        self.read_masterconfig()
        
        self.get_language()
        self.infer_arguments()

        self.get_outfile()
        self.get_clear()

        os.mkdir(self._workdir)
        
        if "--insymbols" in self._arguments and self._arguments["--insymbols"] == "ipa":
            self.make_ipa_dict()
       
        self.write_configfile()
        self.write_argumentstring()
        proc = subprocess.Popen(self._chunkerexe + " " + self._argumentstring, shell = True)

        errorcode = proc.wait()

        if self._clear:
            shutil.rmtree(self._workdir)

        if errorcode == 0 and not os.path.exists(self._outfile):
            print >> sys.stderr, "ERROR: Chunker exited with code 0 but there is no output file: ", self._outfile
            sys.exit(1)
        
        sys.exit(errorcode)

    def write_configfile(self):
        with open(self._configfile, "w") as confighandle:
            if os.path.exists(self._languageconfig):
                with open(self._languageconfig) as languageconfighandle:
                    for line in languageconfighandle:
                        splitline = line.lower().split()
                        if len(splitline) > 1:
                            self._configkeys[splitline[0]] = splitline[1]

            for key in self._configkeys:
                confighandle.write(key.upper() + " " + self._configkeys[key] + "\n")

    def write_argumentstring(self):
        self._argumentstring = " ".join(key + " " + self._arguments[key] for key in self._arguments)

    def get_warning_duration(self):
        cmd = "maus GETBPFTHRESHOLD=yes"
        proc = subprocess.Popen(cmd, shell = True, stdout=subprocess.PIPE, stderr = subprocess.PIPE)
        errorcode = proc.wait()
        [stdout, stderr] = proc.communicate()

        if errorcode == 0 and re.match("\d+", stdout):
            self._arguments["--warnchunkduration"] = stdout.strip()

        else:
            print >> sys.stderr, "WARNING: Could not read chunk duration threshold from MAUS. Using default."
       

    def get_clear(self):
        if "--clear" in self._arguments and self._arguments["--clear"] in ("false", "0"):
            self._clear = False

    def get_outfile(self):
        if "--outfile" in self._arguments:
            self._outfile = self._arguments["--outfile"]

        else:
            print >> sys.stderr, "ERROR: Missing option value: --outfile"
            sys.exit(1)

    def get_language(self):
        if "--language" in self._arguments:
            self._language = self._arguments["--language"]
            self._paramdir = os.path.join(self._mausroot, self.PARAMPREFIX + self._language)
            self._languageconfig = os.path.join(self._configdir, self._language + ".config")
            if not os.path.exists(self._paramdir):
                print >> sys.stderr, "ERROR: Not a known language:", self._language
                sys.exit(1)
        else:
            print >> sys.stderr, "ERROR: Missing option value: --language"
            sys.exit(1)
        

if __name__ == "__main__":
    
    wrapper = ChunkerWrapper()

    if len(sys.argv) <= 1 or sys.argv[1] in ("--help", "-help"):
        wrapper.print_documentation()
    
    elif sys.argv[1] in ("--version", "-version"):
        wrapper.print_version()

    else:
        wrapper.run_chunker()

    sys.exit(0)

