# 2017-05-31 : mask double quotes and backslashes in label strings, and restore
#              blanks in label strings, e.g. something like bEr\O u"pt is now      
#              translated into JSON conform "bEr\\O u\"pt"
# 2017-10-19 : generic script for all class 4 BPF tiers: creates emu items list
#              without attributes string with emu id EMUID; input is the filtered
#              BPF class 4 tier.
# 2019-03-22 : BPF segments with begin = dur = 0 are ignored (can happen in SAP/PHO)
# 2019-12-29 : add TON attribute to MAS tier, and delete tone marker from MAS
# 2020-01-18 : delete tone marker from MAU
# 2020-03-31 : if input segments are not covering all of the signal, add empty labelled
#              segments in the gap; this is mostly relevant for TRN tiers which are not 
#              accepted by emu-webapp if they contain gaps.

BEGIN { first_idx = EMUID
        idx = EMUID
        toneExist = 0 
        first_segment = 1 }
{ 
  # ignore segments that have duration = 0 (can happen in SAP/PHO: deleted segments)
  if($3 == 0) next

  key = $1
  gsub(/:/,"",key)
  # get everything from column 5 to end (could be white space in the MAS label!)
  lab = $5
  i=6
  while($i != "")
  {
    lab = lab " " $i
    i ++
  }
  # mask \ "
  gsub(/\\/,"\\\\",lab)
  gsub(/"/,"\\\"",lab)
  # get tone in MAS, if present
  tone = gensub(/^.*_([1-9]).*$/,"\\1",1,lab)
  # if a tone marker is labelled, create a TON attribute with just the tone number
  # and delete the tone marker from MAS
  if(key=="MAS" && tone!=lab){
    toneExist = 1
    tiertone[idx] = tone
    gsub(/_[1-9]/,"",lab)
  } else {
    # if a label contains no tone, but others do, use the empty string for TON
    tiertone[idx] = ""
  }
  # delete the tone marker from MAU
  # we can do this because emuDB output cannot be further processed in a pipe where 
  # we might need the tonal information; if the input contains no MAS tier the tone
  # information is still in the KAN tier.
  if(key=="MAU" && tone!=lab){
    gsub(/_[1-9]/,"",lab)
  } 
  # look for gap before first segment and if so, insert a non-labelled segment
  if(first_segment==1 && $2!=0) {
    tierlab[idx] = ""
    tierbeg[idx] = 0
    tierdur[idx] = $2 - 1
    idx ++
  }
  # look for gap to the previous segment and if so, insert a non-labelled segment
  if(idx>0 && $2>(tierbeg[(idx-1)]+tierdur[(idx-1)]+1)) {
    tierlab[idx] = ""
    tierbeg[idx] = tierbeg[(idx-1)] + tierdur[(idx-1)] + 1
    tierdur[idx] = $2 - tierbeg[idx] - 1
    idx ++
  }
  # process input segment
  tierlab[idx] = lab
  tierbeg[idx] = $2
  tierdur[idx] = $3
  idx ++
  first_segment = 0
}
END {
  # look for gap after last input segment to end of signal and if so, insert a non-labelled segment
  if(ENDSAMPLE>(tierbeg[(idx-1)]+tierdur[(idx-1)]+1)) {
    tierlab[idx] = ""
    tierbeg[idx] = tierbeg[(idx-1)] + tierdur[(idx-1)] + 1
    tierdur[idx] = ENDSAMPLE - tierbeg[idx] - 1
    idx ++
  }

  for(i=first_idx;i<idx;i++) {
    if(toneExist==1){
      # MAS tier contains tone markers: add attribute TON
      printf("              {\n                    \"id\": %d,\n                    \"sampleStart\": %d,\n                    \"sampleDur\": %d,\n                    \"labels\": [\n                        {\n                            \"name\": \"%s\",\n                            \"value\": \"%s\"\n                        },\n                        {\n                            \"name\": \"TON\",\n                            \"value\": \"%s\"\n                        }\n                    ]\n              }",i,tierbeg[i],tierdur[i],key,tierlab[i],tiertone[i])
    } else {
      printf("              {\n                    \"id\": %d,\n                    \"sampleStart\": %d,\n                    \"sampleDur\": %d,\n                    \"labels\": [\n                        {\n                            \"name\": \"%s\",\n                            \"value\": \"%s\"\n                        }\n                    ]\n              }",i,tierbeg[i],tierdur[i],key,tierlab[i])
    }
    if(i<idx-1) { printf(",\n") }
    else { printf("\n") }
  }
}
