#!/usr/bin/env python3

###### mausbpf2exb
#   author: David Huss
#   email: david.huss@phonetik.uni-muenchen.de
#   requires: python version >= 3.4
#   For more information, consult README.md or run
#       `python3 mausbpf2exb.py --help`
######


# TODO:
# check out whether '[ \t]+' is valid separator intra-tier as well
# ah, speaker_id has to be an ID in the xml sense, as does tier_id
# and language_lang would be an NMTOKEN, whatever that is
# implement .ags parser?
# ask Flo again if xmllint failing should produce a warning or an error
# add debug information for changes introduced in branch handle-incomplete-headers
# add debug information to xmllint block
# add debug information for all changes post lockdown basically
# add error message in the style of 'could not parse the following line:'
# format with black
# add missing docstrings
# use textwrap to format error messages properly
# document public attributes of classes as well
# maybe call the class SubElement instead of creating an instance of Element and then appending it
# fix IndexError that appears when the program relies on a reference tier which actually doesn't cover every link, such as SAP
#   you got this, for example, with:
#   py ../../../mausbpf2exb/mausbpf2exb.py ORT-KAN-TR2-MAU-SAP.par -o /dev/null --ignore MAU
#   also, update README concerning that
# maybe check not only that no start time < 1 sample, but also no end time > length of audio file?
# is referenced-file compulsory according to the DTD? if not, can you get rid of it?
# adjust program name and get rid of 'py' in help message and readme
# convert all tier names passed via --ignore to uppercase


import argparse
from os import path
import sys
import re
from collections import OrderedDict
from xml.etree.ElementTree import *
from xml.dom import minidom
import shutil
import subprocess
from textwrap import indent


class ParFile:
    """
    A class which represents/encapsulates files in the BAS Partitur Format.
    
    Arguments:
        inputstr (str, optional): a string with the contents of a BPF file.
        If this argument is provided, the method parse() will automatically be called.
        An alternative way to instantiate the class is to use the class method fromFilepath() -
        this method expects a filename and will automatically retrieve the contents of the file and parse them.
    """
    def __init__(self, inputstr = None):
        """See class docstring."""
        self.header = OrderedDict()
        self.raw_tiers = OrderedDict()
        self.tiers = []
        self.obligatory_header_elements = {"LHD", "REP", "SNB", "SAM",
                                           "SBF", "SSB", "NCH", "SPN"}
        self.optional_header_elements = {"FIL", "TYP", "DBN", "VOL",
                                         "DIR", "SRC", "BEG", "END",
                                         "RED", "RET", "RCC", "CMT",
                                         "SPI", "PCF", "PCN", "EXP",
                                         "SYS", "DAT", "SPA", "MAO",
                                         "GPO", "SAO"}
        self.all_possible_header_elements = self.obligatory_header_elements.union(self.optional_header_elements)
        if inputstr:
            self.parse(inputstr)
    
    def parse(self, inputstr):
        """
        Take the contents of a BPF file, parse it, and encapsulate it in the class's properties and methods.
        
        Arguments:
            inputstr (str): The contents of the BPF file.
        """
        if not re.match("[A-Z]{3}:\s", inputstr):
            # this is to check that the input is actually a BPF file.
            # it won't catch everything, but at least the files that
            # are very clearly something different.
            echo("The input does not seem to be a valid BPF file", type = "error")
        
        lines = (line for line in inputstr.splitlines())  # creates a generator
        bpf_line = re.compile("^([A-Z0-9]{3}):[ \t]+(.*)$")
            # this is a general regex for valid lines of a BPF file.
            # Its first group is the 3-letter tier or header element
            # name, its second group are the contents of the line
            # after the colon and whitespace separator
        
        # extract and parse header
        for line in lines:
            if not line.startswith("LBD:"):  # the LBD line signifies the end of the header, hence the break statement below
                if bpf_line.match(line):
                    label, value = bpf_line.match(line).groups()
                    self.header[label] = value
                else:
                    echo("Could not parse the following line:\n\t{}".format(line),
                         type = "error")
            else:
                break
        if not self.obligatory_header_elements.issubset(self.header):
            # check if all obligatory header elements are present in header after parsing
            echo("The BPF header is missing the following obligatory element(s): {}".format(
                 ", ".join(self.obligatory_header_elements.difference(self.header))),
                 type = "warning")
        if not set(self.header).issubset(self.all_possible_header_elements):
            # check that there are no unknown header elements
            echo("Detected one or more unknown BPF header elements, which will be discarded: {}".format(
                 ", ".join(set(self.header).difference(self.all_possible_header_elements))),
                 type = "warning")
        
        # extract and parse tiers
        for line in lines:
            # since lines is a generator object, this loop won't start over
            # at the first line, but rather after we broke out of the last loop
            if bpf_line.match(line):
                line_tiername, line_values = bpf_line.match(line).groups()
                if line_tiername in self.raw_tiers:
                    self.raw_tiers[line_tiername].append(line_values)
                else:
                    # if the dictionary entry for this tier doesn't exist yet,
                    # create it and insert the first value
                    self.raw_tiers[line_tiername] = [line_values]
            else:
                echo("Could not parse the following line:\n\t{}".format(line),
                     type = "error")
        
        for tier_name in self.raw_tiers:
            try:
                self.tiers.append(ParTier(tier_name).parse(self.raw_tiers[tier_name]))
            except AssertionError:
                echo("The input contains the following unknown BPF tier, which will be discarded: {}".format(tier_name),
                     type = "warning")
        
        del self.raw_tiers
    
    @classmethod
    def fromFilepath(cls, filepath):
        """
        Open a BPF file and pass its contents to the class constructor, returning an instance of the class.
        
        Arguments:
            filepath (str): The name of the BPF file.
        """
        with open(filepath) as file:
            return cls(file.read())
    
    def pop_header_element(self, label):
        """
        Pop an element from the BPF header.
        
        Arguments:
            label (str): The name of the header element.
        """
        return self.header.pop(label)
    
    def in_header(self, label):
        """
        Return a boolean indicating whether the specified element is present in the BPF header.
        
        Arguments:
            label (str): The name of the header element.
        """
        return label in self.header
    
    def samples_to_seconds(self, sample):
        """
        Convert samples to seconds in accordance with the BPF file's sampling rate.
        
        Arguments:
            sample (int): Number of samples.
        
        Returns (int): Number of seconds.
        """
        return sample / int(self.header["SAM"])
    
    def remove_tier(self, name):
        """
        Remove a BPF tier belonging to the BPF file object.
        
        Arguments:
            name (str): The name of the tier.
        """
        for tier in self.tiers:
            if tier.name == name:
                self.tiers.remove(tier)
    
    def get_tier_object(self, name):
        """
        Return a BPF tier object (which belongs to the BPF file object).
        
        Arguments:
            name (str): The name of the tier.
        """
        for tier in self.tiers:
            if tier.name == name:
                return tier
    
    def tier_exists(self, name):
        for tier in self.tiers:
            if tier.name == name:
                return True
        else:
            return False
    
    @staticmethod
    def get_reference_tier_hierarchy():
        """
        Return the 'hierarchy' of reference tiers used for symbolic link inference.
        For more information have a look at the method infer_link_times().
        """
        return ["MAU", "SAP", "WOR", "PHO", "MAS"]
    
    def get_reference_tier(self):
        for reference_tier in ParFile.get_reference_tier_hierarchy():
            assert self.tier_exists(reference_tier), "no valid reference tiers"  # if false, this exception will be caught in par_to_exb
            return reference_tier
    
    def get_tiers_requiring_link_inference(self):
        return [tier for tier in self.tiers \
            if tier.tier_class == 1 \
            or tier.tier_class == 5]
    
    def infer_link_times(self):
        reference_tier = self.get_tier_object(self.get_reference_tier())
        self.link_times = []
        for item in reference_tier.tier_items:
            if item["linktype"] == "single" and item["link"] != -1:
                i = item["link"]
                start = item["start"]
                end = item["start"] + item["duration"]
                if len(self.link_times) <= i:
                    self.link_times.append({"start": None, "end": None})
                link = self.link_times[i]
                if link["start"] == None or start < link["start"]:
                    link["start"] = start
                if link["end"] == None or end > link["end"]:
                    link["end"] = end
    
    def get_link_times(self, linkindex):
        return (self.link_times[linkindex]["start"], self.link_times[linkindex]["end"])


class ParTier:
    """
    A class which represents/encapsulates tiers of a BPF file.
    
    Arguments:
        name (str): The name of the tier.
    """
    def __init__(self, name):
        self.name = name
        self.tier_items = []
        self.tier_classes = {"KAN": 1, "KSS": 1, "MRP": 1, "KAS": 1, "PTR": 1, "ORT": 1, "TRL": 1, "TR2": 1, "TRO": 1, "SUP": 1, "DAS": 1, "PRS": 1, "NOI": 1, "PRO": 1, "SYN": 1, "FUN": 1, "LEX": 1, "POS": 1, "LMA": 1, "TRS": 1, "TLN": 1, "TRW": 1, "SPK": 1, "IPA": 2, "GES": 2, "USH": 2, "USM": 2, "OCC": 2, "SPD": 2, "VAD": 2, "LBP": 3, "LBG": 3, "PRM": 3, "PHO": 4, "SAP": 4, "MAU": 4, "WOR": 4, "TRN": 4, "USP": 4, "MAS": 4, "PRB": 5}
        assert self.name in self.tier_classes, "unknown tier"  # if false, this exception will be caught in ParFile.parse
    
    def parse(self, content):
        """
        Parse the raw text contents of a BPF tier (as taken from the file) into a list of dictionaries,
        where each entry represents one item of the tier.
        
        Arguments:
            content: a list, representing for example a BPF tier like this:
                `ORT:    0    das
                 ORT:    1    Dunkel
                 ORT:    2    war`
            and passed to this function as:
                 ["0    das",
                  "1    Dunkel",
                  "2    war"]
            becomes:
                 [{"link": 0, "linktype": "single", "content": "das"},
                  {"link": 1, "linktype": "single", "content": "Dunkel"},
                  {"link": 2, "linktype": "single", "content": "war"}]
        
        Returns (ParTier): itself.
        """
        sep = re.compile("[ \t]+")  # regex that matches the separator between intra-tier values, such as "108885    2056    1    d_s"
        for item in content:
            new_tier_item = {}
            # The reason we're operating with split here instead of simply indexing the items
            # is that we're "deconstructing" each tier item. first we remove those elements
            # which are specific to the items of certain tier classes, then we remove those
            # which are shared by all
            if self.tier_class in (2, 4):
                start, duration, item = sep.split(item, maxsplit = 2)
                new_tier_item["start"] = int(start)
                new_tier_item["duration"] = int(duration)
            if self.tier_class in (3, 5):
                time, item = sep.split(item, maxsplit = 1)
                new_tier_item["time"] = int(time)
            if self.tier_class in (1, 4, 5):
                link, item = sep.split(item, maxsplit = 1)
                if "," in link:
                    link = [int(num) for num in link.split(",")]
                    linktype = "multiple"
                elif ";" in link:
                    link = [int(num) for num in link.split(";")]
                    linktype = "between"
                else:
                    link = int(link)
                    linktype = "single"
                new_tier_item["link"] = link
                new_tier_item["linktype"] = linktype
            new_tier_item["content"] = item
            self.tier_items.append(new_tier_item)
        return self
    
    @property
    def tier_class(self):
        """
        Return the tier class.
        """
        return self.tier_classes[self.name]


class ExbFile:
    """
    A class which represents/encapsulates files in the EXMARaLDA Partitur-Editor format (.exb).
    """
    def __init__(self):
        self.timeline = []
        self.tiers = []
        self.times = []
        self.template = '<basic-transcription><head><meta-information><project-name></project-name><transcription-name></transcription-name><referenced-file url=""/><ud-meta-information></ud-meta-information><comment></comment><transcription-convention></transcription-convention></meta-information><speakertable></speakertable></head><basic-body><common-timeline></common-timeline></basic-body></basic-transcription>'
        self.tree = ElementTree(fromstring(self.template))
    
    def set_header_element(self, element, content):
        """
        Set the content of an element in <head> section of the .exb file object.
        
        Arguments:
            element (str): The name of the header element.
            content (str): The text to be inserted between the opening and closing tag of the header element.
        """
        self.tree.find("./head//" + element).text = content
    
    def set_header_attribute(self, element, attribute, value):
        """
        Set an attribute of an element in the <head> section of the .exb file object.
        
        Arguments:
            element (str): The name of the header element.
            attribute (str): The name of the element's attribute to be modified.
            value (str): The value of the attribute.
        """
        self.tree.find("./head//" + element).set(attribute, value)
    
    def add_meta_information(self, name, content):
        """
        Add a <ud-information> element as child of the <ud-meta-information> element in the <head> section of the .exb file object.
        
        Arguments:
            name (str): The attribute name of the piece of meta information.
            content (str): The content of the piece of meta information, which will be inserted between the opening and closing tag.
        """
        information_element = Element("ud-information", {"attribute-name": name})
        information_element.text = content
        self.tree.find("./head/meta-information/ud-meta-information").append(information_element)
    
    def add_speaker(self, speaker_id, abbreviation = ""):
        speaker_element = Element("speaker", {"id": speaker_id})
        speaker_element.extend([
            Element("abbreviation"),
            Element("sex", {"value": "u"}),  # 'unknown' or 'undefined' since this information is not provided in a .par file
            Element("languages-used"),
            Element("l1"),
            Element("l2"),
            Element("ud-speaker-information"),
            Element("comment")
        ])
        speaker_element.find("./abbreviation").text = abbreviation
        self.tree.find("./head/speakertable").append(speaker_element)
    
    def add_tier(self, *args, **kwargs):
        """
        A sort of wrapper function which instantiates the ExbTier class and adds a reference to said instance the list self.tiers.
        Globally, the ExbTier class should generally not need to be instantiated through any means other than this function.
        
        Arguments:
            Same as for ExbTier's constructor (see that class's docstring).
        """
        new_tier = ExbTier(*args, **kwargs)
        self.tiers.append(new_tier)
        return new_tier
    
    def remove_tier(self, tier):
        """
        Remove an ExbTier object belonging to the ExbFile object.
        
        Arguments:
            name (ExbTier): A reference to the tier instance.
        """
        self.tiers.remove(tier)
    
    def construct_common_timeline(self):
        common_timeline = self.tree.find("./basic-body/common-timeline")
        for tier in self.tiers:
            for event in tier.events:
                self.times.append(event["start_time"])
                self.times.append(event["end_time"])
        self.times = list(set(self.times))  # removes duplicate values
        self.times.sort()
        for i, time in enumerate(self.times):
            ID = "T%d" % i
            for tier in self.tiers:
                for event in tier.events:
                    if time == event["start_time"]:
                        event["start_ID"] = ID
                    if time == event["end_time"]:
                        event["end_ID"] = ID
            tli = Element("tli", {
                "id": ID,
                "time": "{:.8f}".format(time)
                    # we could just be using str(time), but this sometimes
                    # generates scientific notation. With the format option
                    # we can avoid this, and also ensure it uses precisely
                    # eight digits after the decimal point.
            })
            common_timeline.append(tli)
    
    def construct_tiers(self):
        basic_body = self.tree.find("./basic-body")
        for i, tier in enumerate(self.tiers):
            tier_element = Element("tier", {
                "id": "TIE%d" % i,
                "speaker": tier.speaker,
                "category": tier.category,
                "display-name": tier.display_name,
                "type": tier.tier_type
            })
            basic_body.append(tier_element)
            for event in tier.events:
                event_element = Element("event", {
                    "start": event["start_ID"],
                    "end": event["end_ID"],
                })
                event_element.text = event["content"]
                tier_element.append(event_element)
    
    def generate(self):
        """
        Generate the XML output of the current state of the ExbFile object.
        
        Returns (str): The output string, ready for writing to file.
        """
        self.construct_common_timeline()
        self.construct_tiers()
        outputstr = minidom.parseString(
            tostring(self.tree.getroot())
        ).toprettyxml()  # this just prettifies the output
        return outputstr


class ExbTier:
    """
    A class which represents/encapsulates tiers of an .exb file.
    
    Arguments:
        # All arguments are XML attributes of the tier element.
        category (str): The category of the tier.
        tier_type (str): The type of the tier.
        display_name (str): The name of the tier which will be displayed prominently in the EXMARaLDA GUI.
        speaker (str): The ID of the speaker of the tier.
    """
    def __init__(self, category, tier_type, display_name = "", speaker = None):
        self.category = category
        self.tier_type = tier_type
        self.display_name = display_name
        self.speaker = speaker
        self.events = []
    
    def add_event(self, start_time, end_time, content):
        """
        Add an event to the tier.
        
        Arguments:
            start_time (int, float): The start time of the event in seconds.
            end_time (int, float): The end time of the event in seconds.
            content (str): The text to be inserted between the opening and closing tag of the event element.
        """
        self.events.append({
            "start_ID": None,
            "end_ID": None,
            "start_time": start_time,
            "end_time": end_time,
            "content": content
        })


def par_to_exb(par, exb, referenced_file = None):
    """
    Convert a ParFile object's attributes to an ExbFile object's attributes.
    
    Arguments:
        par (ParFile): the input ParFile object.
        exb (ExbFile): the output ExbFile object.
        referenced_file (str, optional): the name of the audio file which the BPF file annotates.
    """
    echo("--- Starting header conversion ---\n", required_verbosity = 1)
    
    echo("Setting header attributes", required_verbosity = 1)
    if par.in_header("DBN"):
        project_name = par.pop_header_element("DBN")
        # the reason we're popping elements from the par.header dictionary instead of
        # simply reading them is that we're trying to "dismantle" it, so to speak.
        # once a handful of "special" header elements (the ones forming part of this
        # and the following if statements) have been handled, the other ones will be
        # inserted into the .exb file hierarchy using exb.add_meta_information
        exb.set_header_element("project-name", project_name)
        echo("Converted header elements: 'DBN' --> <project-name>", required_verbosity = 1)
    
    if referenced_file:
        exb.set_header_attribute("referenced-file", "url", referenced_file)
        echo("Inserted header element passed via command line parameter: -r --> <referenced-file>", required_verbosity = 1)
    elif par.in_header("SRC"):
        referenced_file = par.pop_header_element("SRC")
        exb.set_header_attribute("referenced-file", "url", referenced_file)
        echo("Converted header elements: 'SRC' --> <referenced-file>", required_verbosity = 1)
    
    if par.in_header("CMT"):
        comment = par.pop_header_element("CMT")
        exb.set_header_element("comment", comment)
        echo("Converted header elements: 'CMT' --> <comment>", required_verbosity = 1)
    
    if par.in_header("SPN"):
        speaker_id = par.pop_header_element("SPN")
        exb.add_speaker(speaker_id, abbreviation = speaker_id)
    else:
        # the SPN entry is obligatory for BPF headers, but nonetheless there are many files without it.
        # the default value for such cases is 'DefaultSpeaker'
        speaker_id = "DefaultSpeaker"
        exb.add_speaker(speaker_id, abbreviation = speaker_id)
        echo("Obligatory BPF header element 'SPN' (speaker ID) missing from input - resorting to default value 'DefaultSpeaker'", type = "warning")
    echo("Added speaker with ID '{}' to <speakertable>".format(speaker_id), required_verbosity = 1)
    
    if par.in_header("SYS"):
        transcription_convention = par.pop_header_element("SYS")
        exb.set_header_element("transcription-convention", transcription_convention)
        echo("Converted header elements: 'SYS' --> <transcription-convention>", required_verbosity = 1)
    
    echo("Appending remaining header elements to <ud-meta-information>", required_verbosity = 1)
    for element, content in par.header.items():
        # iterate over all remaining BPF header elements and add them
        # to the .exb file as part of the user-defined meta information
        exb.add_meta_information(element, content)
        echo("Appended header element '{}' as a <ud-information> element to <ud-meta-information>".format(element), required_verbosity = 2)
    
    if args.verbosity > 0:
        print()
    echo("--- Finished header conversion ---\n", required_verbosity = 1)
    
    echo("--- Starting tier conversion ---\n", required_verbosity = 1)
    if par.get_tiers_requiring_link_inference():
        try:
            par.infer_link_times()
        except AssertionError:
            echo("No valid reference tiers could be found - exiting...\n\tIf the input contains any class 1 or class 5 tiers, the program will cycle through a hierarchy of selected class 4 tiers (in the order MAU->SAP->WOR->PHO->MAS) and use the first one it finds to infer the times of the symbolic links. If there is not at least one of these tiers present which contains a reference to every symbolic link (and provided the reference is a singular link, not a comma or semicolon-separated list of links), the file cannot be converted.\n\tIf you would instead like to discard all class 1 and class 5 tiers in your file, you can append the option '--ignore {}'".format(
                 ",".join([tier.name for tier in par.get_tiers_requiring_link_inference()])),
                 type = "error")
    
    for par_tier in par.tiers:
        exb_tier = exb.add_tier(category = par_tier.name, tier_type = "t", display_name = par_tier.name, speaker = speaker_id)
        echo("Starting conversion of tier '{}'".format(par_tier.name),
             "Starting conversion of tier '{}' (class {})".format(par_tier.name, par_tier.tier_class),
             type = "debug_multiple")
        if par_tier.tier_class == 3:
            # discard the current tier and continue with the next one
            echo("Class 3 tiers are currently not supported. Skipping tier '{}'".format(par_tier.name), type = "warning")
            exb.remove_tier(exb_tier)
            continue
        elif par_tier.tier_class in (1, 5):
            inference_necessary = True
            exb_tier.tier_type = "a"
            echo("The tier '{}' is a class {} tier - will use symbolic link time inference".format(par_tier.name, par_tier.tier_class),
                 "The tier '{}' is a class {} tier - it is necessary to infer the time of the symbolic links, which will be accomplished by making use of a selection of class 4 tiers. See --list-reference-tiers for more information".format(par_tier.name, par_tier.tier_class),
                 type = "debug_multiple")
        elif par_tier.tier_class in (2, 4):
            inference_necessary = False
            if par.get_tiers_requiring_link_inference() and par_tier.name != par.get_reference_tier():
                additional_speaker_id = speaker_id + "-" + par_tier.name
                exb.add_speaker(additional_speaker_id, abbreviation = additional_speaker_id)
                exb_tier.speaker = additional_speaker_id
                echo("Correction for header conversion: added speaker with ID '{}' to <speakertable>".format(additional_speaker_id),
                     "correction for header conversion: EXMARaLDA allows only one tier of type = 't' per speaker. Since the file's main speaker ID '{}' is already in use for the tier '{}', we will add an additional speaker with ID '{}' to <speakertable>".format(speaker_id, par.get_reference_tier(), additional_speaker_id),
                     type = "debug_multiple")
        for i, tier_item in enumerate(par_tier.tier_items):
            if inference_necessary:
                if tier_item["linktype"] == "single":
                    start, end = par.get_link_times(tier_item["link"])
                    echo("Performing symbolic link time inference of link '{}'".format(tier_item["link"]), required_verbosity = 2)
                elif tier_item["linktype"] == "multiple":
                    start, _ = par.get_link_times(min(tier_item["link"]))
                    _, end = par.get_link_times(max(tier_item["link"]))
                    echo("Performing symbolic link time inference of link '{}'".format(tier_item["link"]), required_verbosity = 2)
                elif tier_item["linktype"] == "between":
                    # discard the current tier and continue with the next one
                    echo("Cannot handle semicolon-separated symbolic links (such as '2;3'). Skipping tier '{}'".format(par_tier.name), type = "warning")
                    exb.remove_tier(exb_tier)
                    break
            else:
                start = tier_item["start"]
                end = start + tier_item["duration"]
            
            if end <= start:
                # if the item has a duration of 0 samples
                # or lower (yes, that does happen),
                # discard it
                continue
            
            if start >= 1:
                start -= 0.5
            end += 0.5
            
            start, end = par.samples_to_seconds(start), par.samples_to_seconds(end)
            exb_tier.add_event(start, end, tier_item["content"])
            echo("Converted BPF tier item {} of index {} to .exb tier event".format(repr(tier_item["content"]), i), required_verbosity = 2)
        echo("Added tier '{}'\n".format(par_tier.name), required_verbosity = 1)
    
    echo("--- Finished tier conversion ---\n", required_verbosity = 1)


def echo(*messages, type = "debug", required_verbosity = None):
    """
    Display debug information, a warning, or an error to the user.
    (to the console - this function does not raise python errors!)
    
    Arguments:
        messages (arbitrary number of str): The strings to be displayed. For all values of type except 'debug_multiple',
            the strings will be joined with spaces. If the value of type is 'debug_multiple', then one message
            should be provided for each verbosity level, in ascending order. Example usage:
            `echo('message for verbosity = 0', 'message for verbosity = 1', 'message for verbosity = 2', type = 'debug_multiple')`
        type (str): either 'debug', 'debug_multiple', 'warning', or 'error'.
        required_verbosity (int): if the value of type is 'debug', then this is the minimum required verbosity
            necessary for the message(s) to be displayed.
    """
    if type == "debug":
        if args.verbosity >= required_verbosity:
            print("DEBUG: mausbpf2exb:", *messages)
    elif type == "debug_multiple":
        if args.verbosity > 0:
            print("DEBUG: mausbpf2exb:", messages[args.verbosity - 1])
    elif type == "warning":
        print("WARNING: mausbpf2exb:", *messages, file = sys.stderr)
    elif type == "error":
        sys.exit("ERROR: mausbpf2exb: " + " ".join(messages))


if __name__ == "__main__":
    
    ###### this block implements the parser as well as the help page etc
    parser = argparse.ArgumentParser(description = "This program converts speech annotation files in the BAS Partitur Format (BPF) to files for the Partitur-Editor of the EXMARaLDA speech software suite.\nFor more detailed information, consult README.md.")
    parser.add_argument("input", nargs = "?", help = "input file (.par, .bpf). If not specified, STDIN will be used")
    parser.add_argument("-o", "--output", help = "output file (.exb). If not specified, STDOUT will be used")
    parser.add_argument("-v", "--verbosity", type = int, default = 0, help = "verbosity level of debug information on a scale of 0 to 2 (default = 0, meaning only errors and warnings, but no debug information)")
    parser.add_argument("-r", "--referenced-file", help = "name of referenced audio file")
    parser.add_argument("--ignore", dest = "ignored_tier", help = "name (or comma-separated list of names) of BPF tier(s) to be ignored during conversion")
    parser.add_argument("--version", action = "version", version = "0.3")
    parser.add_argument("--list-reference-tiers", action = "store_true", help = "print the hierarchy of class 4 tiers used to infer times of symbolic links and exit")
    args = parser.parse_args()
    ######
    
    ###### this block is for when the user passes the --list-reference-tiers parameter
    if args.list_reference_tiers:
        for tier in ParFile.get_reference_tier_hierarchy():
            print(tier)
        print("If the input contains any class 1 or class 5 tiers (which it usually does), the program will cycle through a hierarchy of selected class 4 tiers and use the first one it finds to infer the times of the symbolic links. If there is not at least one of these tiers present which contains a reference to every symbolic link (and provided the reference is a singular link, not a comma or semicolon-separated list of links), the file cannot be converted. However, you can instead choose to discard all class 1 and class 5 tiers; for this, run the program once with your input, the resulting error message will tell you exactly which option you will have to use to do so. (This annotation has been printed to stderr, so you can still safely pipe the above list)",
              file = sys.stderr)
        sys.exit()
    ######
    
    ###### this block takes care of the actual conversion
    # if an input file name has been provided, use that, otherwise, use STDIN
    if args.input:
        if path.exists(args.input):
            parFile = ParFile.fromFilepath(args.input)
        else:
            echo("The input file '{}' could not be found.".format(args.input), type = "error")
    else:
        parFile = ParFile(sys.stdin.read())
    exbFile = ExbFile()
    # remove tier(s) that the user decided to ignore
    if args.ignored_tier:
        ignored_tiers = args.ignored_tier.split(",")
        for ignored_tier in ignored_tiers:
            if parFile.tier_exists(ignored_tier):
                parFile.remove_tier(ignored_tier)
            else:
                echo("The tier you have chosen to ignore ('{}') is not present in the input anyway".format(
                     ignored_tier),
                     type = "warning")
    par_to_exb(parFile, exbFile, args.referenced_file)
    outputstr = exbFile.generate()
    ######
    
    ######
    # this block will attempt to use the program xmllint (pre-installed on many Unix systems)
    # to check whether the output complies with the DTD (document type definition) for
    # EXMARaLDA basic transcriptions, as specified in basic-transcription.dtd
    # (if you don't have it, you can download it here: https://www.exmaralda.org/files/basic-transcription.dtd)
    if shutil.which("xmllint") is not None:  # check if xmllint is installed
        scriptdir = path.dirname(path.realpath(__file__))  # location of mausbpf2exb.py
        dtdpath = path.join(scriptdir, "basic-transcription.dtd")
        if path.isfile(dtdpath):  # check if basic-transcription.dtd exists
            try:
                subprocess.run(
                    # this runs the shell command responsible for checking the output against the DTD
                    ["xmllint", "--dtdvalid", "basic-transcription.dtd", "--noout", "-"], # the last argument (dash) is xmllint's placeholder for stdin
                    input = outputstr, # pass our output to stdin
                    stdout = subprocess.PIPE, # capture stdout
                    stderr = subprocess.PIPE, # capture stderr
                    cwd = scriptdir, # execute subprocess in directory of script
                    universal_newlines = True, # this allows us to use strings instead of bytes objects for the standard streams
                    check = True # raise an error if xmllint fails
                )
            except subprocess.CalledProcessError as linterror:
                errormessage = indent(linterror.stderr, "\t")
                echo("The verification of the converter's output against the document type definition using xmllint produced the following error:\n{}".format(errormessage),
                     type = "error")
        else:
            echo("The document type definition 'basic-transcription.dtd' could not be found, the output could therefore not be verified using xmllint. You can download the DTD here: https://www.exmaralda.org/files/basic-transcription.dtd", type = "warning")
    else:
        echo("The program 'xmllint' is not installed on your system. The output of the converter could not be verified with the document type definition (basic-transcription.dtd, which can be downloaded here: https://www.exmaralda.org/files/basic-transcription.dtd)", type = "warning")
    ######
    
    # if an output file name has been provided, use that, otherwise, use STDOUT
    if args.output:
        with open(args.output, mode = "w", encoding = "utf-8", newline = "\n") as outputfile:
            outputfile.write(outputstr)
    else:
        print(outputstr)
