#!/usr/bin/python3

import time
import sys
import os
import argparse
import tempfile
import shutil

from pathlib import Path #needed for check if the file exists

from SRC.align import main as align_main
from SRC.format import main as format_main

VERSION = 0.9

#TIERS_MANDATORY = ["SAM", "ORT", "MAU"]
TIERS_MANDATORY = ["SAM", "ORT", "MAU"]

def parse_arguments():
    if len(sys.argv) > 1 and sys.argv[1] in ("--version", "-v"):
        print(VERSION)
        sys.exit(0)

    if len(sys.argv) == 1:
        sys.argv.append("-h")

    parser = argparse.ArgumentParser(add_help = False)
    helpNamed = parser.add_argument_group('help')
    requiredNamed = parser.add_argument_group('required arguments')
    optionalNamed = parser.add_argument_group('optional arguments')

    requiredNamed.add_argument('--bpf', required = True, type = str, help = 'BPF file containing at least an ORT tier and a MAU tier')
    requiredNamed.add_argument('--transcription', required = False, type = str, help = 'Original UTF-8 encoded transcription. If None, assume that BPF already contains TRO tier.', default = "")
    requiredNamed.add_argument('--outfile', required = True, type = str, help = 'Output path')
    
    helpNamed.add_argument("-h", "--help", action="help")
    
    optionalNamed.add_argument('--marker', type = str, help = 'Marker used to split transcription into subtitles. If set to punct (default), the transcription is split after some punctuation marks (currently [.!?:]. If set to newline, the transcription is split at newlines (\\n or \\r\\n). If set to tag, the program expects a special <BREAK> tag inside the transcription.', choices = ['tag', 'newline', 'punct'], default = 'punct')
    optionalNamed.add_argument('--tier', type = str, help = 'Tier to create subtitles from. If set to TRO, the transcription is aligned with the ORT tier to create subtitles. If set to ORT, the subtitles are based on the ORT tier. In this case, <break> tags are expected in the ORT tier.', choices = ['TRO', 'ORT'], default = 'TRO')
    optionalNamed.add_argument('--maxlength', type = int, help = 'Maximum subtitle length. If set to 0 (default), subtitles of indefinite length are created, based only on the distance of the split markers. If set to a value greater than 0, subtitles are split whenever a stretch between two neighbouring split markers is longer than that value (in words). Caution: This may lead to subtitle splits in suboptimal locations (e.g. inside syntactic phrases).', default = 0)
    optionalNamed.add_argument('--outformat', type = str, help = 'Output format. srt (default) or sub means SubRip or SubViewer subtitle format. bpf means a BAS Partitur file containing an additional TRO tier. bpf+trn means the BPF with an appended TRN tier corresponding to the subtitles. trn returns the TRN tier on its own. vtt returns WEBVTT subtitles.', choices = ['srt', 'sub', 'bpf', 'bpf+trn', 'trn', 'vtt'], default = 'srt')
    optionalNamed.add_argument('--windowsize', type = int, default = 100, help = 'Window size used during alignment (default 100). A low value speeds up performance but risks incorrect alignments in cases where the ORT tier and the transcription do not match well. The default should be sufficient unless there are substantial amounts of text in the transcription that do not feature in the ORT tier, or vice versa.')

    optionalNamed.add_argument('--verbose', action = "store_true", help = 'Set this flag for progress information')
    optionalNamed.add_argument('--no_clean', action = "store_true", help = 'Set this flag to keep temporary files after the process is done')
    
    args = parser.parse_args(sys.argv[1:])
    return args

if __name__ == "__main__":
    args = parse_arguments()

    ###### CHECKS
    #check all mandatory input files for existence
    files_mandatory = [args.bpf]

    for currFile in files_mandatory:
        currFileHandle = Path(currFile)
        if not currFileHandle.is_file():
            print("Mandatory input file " + currFile + " does not exist. Aborting!", file=sys.stderr)
            sys.exit(1)

    fileContent = open(args.bpf).read();
    # check if SAM/MAU/ORT tier is present (works for files that are not too big):
    # (tested for a large file with 13780 tokens - more than can be processed by MAUS online - takes around 4 ms)

    for currTier in TIERS_MANDATORY:
        if not currTier+':' in fileContent:
            print("Input file does not contain " + currTier + " tier. Aborting!", file=sys.stderr)
            sys.exit(1)

    ###### END CHECKS

    tempdir = os.path.join(tempfile.gettempdir(), "subtitle_" + str(int(time.time())) + "_" + str(os.getpid()))
   
    try:
        if os.path.exists(args.outfile):
            handle = open(args.outfile, "a")
        else:
            handle = open(args.outfile, "w")
        handle.close()
    except:
        raise Exception("Cannot write to", args.outfile)


    
    bpf = args.bpf
    os.mkdir(tempdir)
    if args.tier == "TRO":
        splitpunct = [".", "!", ":", "?", "…"]
        temp = os.path.join(tempdir, os.path.basename(args.bpf))
        if args.transcription:
            align_main(bpf, args.transcription, temp, args.verbose, args.windowsize, splitpunct)
            bpf = temp

    format_main(bpf, args.bpf, args.outfile, args.marker, args.outformat, args.maxlength, tier = args.tier)

    if not args.no_clean:
        shutil.rmtree(tempdir)


