#
# Specific post analysis routines for Sibilant 2017
#

##################################################################
# SET THIS TO THE EXPERIMENTS ROOT DIR, IF YOU MOVE THE EXPERIMENT
pfad2 = "/homes/schiel/ABM/PublicABM.3"
##################################################################

# This script performs several post-ABM analyses in multiple runs, based on the
# stored res2.df dataframes (LogDirDate/Dataframe_*.df, * = ABM nr).
# It has to be sourced from the ABM root dir (e.g. SibilantsAustralianEnglish2017)

# The only parameters required are: 

if(!exists("LogDirDate")) LogDirDate = "Results/20170328090740"
if(!exists("simGroups")) simGroups = 60

# These define
# - from which results dir the Dataframes are read
# - which ABM run should be analysed as the final one (this of course must
# be in the range 1...simGroups of the ABM!)

# Required sources:
if(!exists("equal_class")) source("Rcmd/pathsAndLibraries.R") 
# colors
# the following are name definitions of equivalence labels and their colors (very specific for this example!)
phonClusterLog.names = c("S","S+s","S+s+str","S+str","s","s+str","str")
                    # this vector is used to sort agent-specific equivalence phone cluster statistics
                    # (must be char sorted!), their colors in all plots in the same order
                    # blue,pink,darkgrey,cyan,red,orange,green
                    # useful: col2rgb("orange")
strongColors = c(rgb(0,0,255,250,maxColorValue=255),rgb(255,0,255,250,maxColorValue=255),rgb(60,60,60,250,maxColorValue=255),rgb(0,255,255,250,maxColorValue=255),rgb(255,0,0,250,maxColorValue=255),rgb(255,165,0,250,maxColorValue=255),rgb(0,255,0,250,maxColorValue=255))
fadedStrength = 75  # faded colors: fading factor 1...255 (not faded)
fadedColors = c(rgb(0,0,255,fadedStrength,maxColorValue=255),rgb(255,0,255,fadedStrength,maxColorValue=255),rgb(60,60,60,fadedStrength,maxColorValue=255),rgb(0,255,255,fadedStrength,maxColorValue=255),rgb(255,0,0,fadedStrength,maxColorValue=255),rgb(255,255,0,fadedStrength,maxColorValue=255),rgb(0,255,0,fadedStrength,maxColorValue=255))
generalLegend = list(points=list(pch = 1, col = strongColors), text = list(phonClusterLog.names, col = strongColors),columns = 4, col=strongColors)

#########################
# First get all log data before and after ABM 
# To ease later plotting fuctions we replace the 'orig' in Cond by '0'
# and convert the Cond column into numeric; note that the before data exists only once 
# (because they are the same for all ABM runs), while the after data exist multipleABMRuns times;
# the res2.df.before has the same column structure as res2.df.after but ABM is always '0'.
cat("Post analysis: loading log files from ",LogDirDate," (may take a few minutes)\n")

# all ABMs start with the same memory content, therefore we simply load the res2.df 
# of the first ABM run and filter to 'orig'
res2.df.before = read.table(paste(LogDirDate,"/Dataframe_1.df",sep=""),header=T,stringsAsFactors=F)
res2.df.before = res2.df.before[res2.df.before$Cond=="orig",]
res2.df.before$Cond[res2.df.before$Cond=="orig"] = "0"
res2.df.before$Cond = as.numeric(res2.df.before$Cond)
res2.df.before$ABM = 0

# make a large table with the memory data after the last (simGroups) ABM group; the reason I do this 
# with system calls is that the tables are very large and take forever to load in R
if(!file.exists(paste(LogDirDate,"/DataframeAfter",simGroups,".df",sep=""))) {
  system(paste("cat ",LogDirDate,"/Dataframe_1.df | head -n 1 > ",LogDirDate,"/DataframeAfter",simGroups,".df",sep=""))
  # the simGroup is in the penultimate column followed by numeric ABM!
  system(paste("cat ",LogDirDate,"/Dataframe_*.df | grep \'\"",simGroups,"\" [0-9]*$\' | cut -d ' ' -f 2- >> ",LogDirDate,"/DataframeAfter",simGroups,".df",sep=""))
}
res2.df.after = read.table(paste(LogDirDate,"/DataframeAfter",simGroups,".df",sep=""),header=T,stringsAsFactors=F)
res2.df.after$Cond = as.numeric(res2.df.after$Cond)

# add equivalence labels: equiv.labels must be computed for each agent and memory state (ABMxCond) separately!
cat("Post analysis: computing equiv.labels (may take a few minutes)\n")
eclassLabels = as.character(res2.df.before$V)
for(spkr in unique(res2.df.before$Vpn)){
  temp = res2.df.before$Vpn == spkr
  eclassLabels[temp] = equal_class(as.character(res2.df.before$Initial[temp]), as.character(res2.df.before$V[temp]))
}
res2.df.before = cbind(res2.df.before,eclassLabels)
eclassLabels = as.character(res2.df.after$V)
for(spkr in unique(res2.df.after$Vpn)){
  for(abm in unique(res2.df.after$ABM)){
    temp = res2.df.after$Vpn == spkr & res2.df.after$ABM == abm
    eclassLabels[temp] = equal_class(as.character(res2.df.after$Initial[temp]), as.character(res2.df.after$V[temp]))
  }
}
res2.df.after = cbind(res2.df.after,eclassLabels)

# for some plots we need some special loggings that are loaded here:
phonClusterLog = read.table(paste(LogDirDate,"/PhonClusterLog.txt",sep=""),stringsAsFactors=F)
DistanceValuesOverABMLog = read.table(paste(LogDirDate,"/DistanceValuesOverABMLog.txt",sep=""),stringsAsFactors=F)
DistanceValuesOverABMLog$Cond[DistanceValuesOverABMLog$Cond=="orig"] = "0"
DistanceValuesOverABMLog$Cond = as.numeric(DistanceValuesOverABMLog$Cond)

# now we are ready for analysis:

#########################
# Correlation between individual cluster configuration (merges S+str) and acoustics (lowering of M1 tracks):
# Search for ABM runs where at X agents have an S+str equivalence class 
# in the same simGroup, then plot the average M1 DCT-0 for str words over these ABM runs
# in an ensemble plot over the course of the ABM; vary X from MergeAgents to MaxMergeAgents.
# This analysis requires an additional table DistanceValuesOverABMLog.txt in LogDirDate 
# with the average P1 value of all tokens of all agents for ABM, simGroup and Initial, and
# the standard cluster configuration log in PhonClusterLog.txt. 
# Hypothesis:
# In ABMs that show a higher number of agents that posses a merged S+str cluster (X), the 
# acoustic lowering of M1 DCT-0 should be stronger.

MergeAgents = 1  # minimum number of agents that posses an equiv label at the same simGroup
MaxMergeAgents = 10  # max number of agents that posses an equiv label at the same simGroup


log1 = T
while(log1) {
# find ABM numbers 
temp = phonClusterLog[,6] == MergeAgents
foundABM = unique(phonClusterLog[temp,1])
# find logged (distance) measures for these ABMs in 'str' words (to avoid double counts, only the str words!)
temp = DistanceValuesOverABMLog$ABM %in% foundABM & DistanceValuesOverABMLog$Initial == "str"
DF = DistanceValuesOverABMLog[temp,]
Dplot = xyplot(P1 ~ Cond,group=ABM,data=DF,type = c("l", "g"),main=paste("ABMs with ",MergeAgents," agents with equiv.label S+str",sep=""),xlab="Simulation groups",ylab="DCT-0 of M1 tracks of 'str' words",ylim=c(0.09,0.3))
print(Dplot)
  inpu = readline("Continue? (Y/n) Print ? (P)")
  if(inpu == "N" || inpu == "n") {
    log1 = F
  }
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_S+str_DCT0_",MergeAgents,".pdf",sep=""))
    print(Dplot)
    dev.off()
  }

  if(MergeAgents >= MaxMergeAgents) {
    MergeAgents = 1
  } else { 
    MergeAgents = MergeAgents + 1
  }
}

##########################
# estimate rates of word labels, stress class and gender in 'str' tokens with merged S+str equiv.clusters
# after the multiple ABM

# filter after log data to 'str' tokens
res2.Initialstr.df = res2.df.after[res2.df.after$Initial=="str",]

# Add prosodic features to res2.Initialstr.df
# get features Position, Stress and Syllables by matching the word labels
Position=rep("medial", length(res2.Initialstr.df$Vpn))
Position[res2.Initialstr.df$W %in% c("seem","sane","sheep","Shane","stream","Soak","Show","Strong","Strut")]="initial"
# location relative to lexical stress (w_s, s_w, w_w)
Stress=rep("w_s", length(res2.Initialstr.df$W))
Stress[res2.Initialstr.df$W %in% c("Fascinating", "Information", "Catastrophe", "Pedestrian", "Possible", "Passionate", "Astronaut", "Oestrogen", "Messy", "Tissue", "Gastro", "District")]="s_w"
Stress[res2.Initialstr.df$W %in% c("Motorcycle", "Perishable", "Administrate", "Claustrophobic", "Policy", "Polishing", "Chemistry", "Orchestra")]="w_w"
# number of syllables in the word (up to 4)
Syllables=rep("4", length(res2.Initialstr.df$W))
Syllables[res2.Initialstr.df$W %in% c("Assembly", "Disheveled", "Pastrami", "Astringent", "Possible", "Passionate", "Astronaut", "Oestrogen", "Policy", "Polishing", "Chemistry", "Orchestra")]="3"
Syllables[res2.Initialstr.df$W %in% c("Assault", "Machine", "Destroy", "Restrict", "Messy", "Tissue", "Gastro", "District")]="2"
Syllables[res2.Initialstr.df$W %in% c("Soak","Show", "Strong", "Strut")]="1"

res2.Initialstr.df = cbind(res2.Initialstr.df,Position,Stress,Syllables)
# subset of these 'str' tokens that posses a merger S+str
res2.SMstr.df = res2.Initialstr.df[res2.Initialstr.df$eclassLabels=="S+str",]

#
# Analyse Words combined with Stress
#

Names = names(table(res2.SMstr.df$W)) 

# make pie chart with Stress coloring
Colors=rep("red", length(names(table(res2.SMstr.df$W))))
Colors[names(table(res2.SMstr.df$W)) %in% c("Fascinating", "Information", "Catastrophe", "Pedestrian", "Possible", "Passionate", "Astronaut", "Oestrogen", "Messy", "Tissue", "Gastro", "District")]="blue"
Colors[names(table(res2.SMstr.df$W)) %in% c("Motorcycle", "Perishable", "Administrate", "Claustrophobic", "Policy", "Polishing", "Chemistry", "Orchestra")]="green"

pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (green = w_w, red= w_s, blue = s_w)",col=Colors)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_Pie_Stress.pdf",sep=""))
    pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (green = w_w, red= w_s, blue = s_w)",col=Colors)
    dev.off()
  }

#
# Analyse Words combined with Position
#

# make pie chart with Position coloring
Colors=rep("red", length(names(table(res2.SMstr.df$W)))) # 'medial'
Colors[names(table(res2.SMstr.df$W)) %in% c("seem","sane","sheep","Shane","stream","Soak","Show","Strong","Strut")]="blue" # "initial"

pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (red = medial, blue = initial)",col=Colors)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_Pie_Position.pdf",sep=""))
    pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (red= medial, blue = initial)",col=Colors)
    dev.off()
  }

#
# Analyse Words combined with Syllable Number
#

# make pie chart with Syllable Number coloring
Colors=rep("red", length(names(table(res2.SMstr.df$W)))) # "4"
Colors[names(table(res2.SMstr.df$W)) %in% c("Assembly", "Disheveled", "Pastrami", "Astringent", "Possible", "Passionate", "Astronaut", "Oestrogen", "Policy", "Polishing", "Chemistry", "Orchestra")]="blue" #"3"
Colors[names(table(res2.SMstr.df$W)) %in% c("Assault", "Machine", "Destroy", "Restrict", "Messy", "Tissue", "Gastro", "District")]="green" # "2"
Colors[names(table(res2.SMstr.df$W)) %in% c("Soak","Show", "Strong", "Strut")]="magenta" # "1"

pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (red = 4, blue = 3, green = 2, magenta = 1)",col=Colors)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_Pie_Syllables.pdf",sep=""))
    pie(table(res2.SMstr.df$W),names(table(res2.SMstr.df$W)),main="Tokens with S+str equiv.cluster (red = 4, blue = 3, green = 2, magenta = 1)",col=Colors)
    dev.off()
  }



##########################
# Gender : the mean number of tokens that are part of a S+str mergers (equiv. cluster)
# during all multiple ABMs by female/male agent
# If 100 ABMs a 30 simGroups are run, the memory of each agent is inspected 3100 times;
# There are 190 str words in the agents' memory, hence total 190*3100 = 589000 potential str tokens.
# Hence the average female has 6125 of 589000 that has a merged equiv.label
cat("Mean number of tokens per agent that at some point in the ABMs posses a S+str merger (eqiv. cluster) sorted by Gender:\n")
print(table(res2.SMstr.df$G)/c(13,6))             # we have 13 females and 6 males
#         F         M 
# 6125.462 20065.000


  
##########################
# Bar plot: 3 bars: w_w, w_s, s_w, 100% = number of 'str' tokens, counted value: memory tokens that carry a S+str eqiv.cluster
# in in final memories  (defined by simGroups!)

barv = NULL
bar_w_w = sum(res2.Initialstr.df$Stress=="w_w")
barv[1] = sum(res2.Initialstr.df$Stress=="w_w"&res2.Initialstr.df$eclassLabels=="S+str")/bar_w_w * 100
bar_s_w = sum(res2.Initialstr.df$Stress=="s_w")
barv[2] = sum(res2.Initialstr.df$Stress=="s_w"&res2.Initialstr.df$eclassLabels=="S+str")/bar_s_w * 100
bar_w_s = sum(res2.Initialstr.df$Stress=="w_s")
barv[3] = sum(res2.Initialstr.df$Stress=="w_s"&res2.Initialstr.df$eclassLabels=="S+str")/bar_w_s * 100
names(barv) = c("w_w","s_w","w_s")

barch = barchart(barv,xlab="% tokens in final memories",ylab="Stress Pattern",main=paste("Proportion merged S+str cluster after ",simGroups*1000," interactions",sep=""))
print(barch)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_Barchart_Stress.pdf",sep=""))
    print(barch)
    dev.off()
  }



##########################
# Boxplot: 2 panels (f/m), each panel 3 boxes: s, S, str words, value: DCT-0(simGroups) - DCT-0(orig)
# get averaged P1 before ABM by Initial and Vpn
bwplot1.df = aggregate(P1 ~ Initial * Vpn,FUN=mean,data=res2.df.before)
# get averaged P1 after ABM by Initial and Vpn
bwplot2.df = aggregate(P1 ~ Initial * Vpn,FUN=mean,data=res2.df.after)
# add P1Diff (afterP1 - beforeP1) to bwplot1.df : hypothesis is that this should be negative for 'str' words
# (= a lowering of the M1 tracks) and stable for 's' and 'S' words
bwplot1.df = cbind(bwplot1.df,P1Diff = bwplot2.df$P1-bwplot1.df$P1)
# add Gender
bwplot1.df = cbind(bwplot1.df,Gender=substr(bwplot1.df$Vpn,1,1))
bwpl = bwplot(P1Diff ~ Initial|Gender,data=bwplot1.df,xlab="word token class",ylab="Change of DCT-0 of M1 tracks (after ABM - before ABM)",main="Change of M1 track heights")
print(bwpl)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_DCT-0Change.pdf",sep=""))
    print(bwpl)
    dev.off()
  }
# dito with only the 'str' tokens
bwpl = bwplot(P1Diff ~ Gender,data=bwplot1.df[bwplot1.df$Initial=="str",],xlab="Gender",ylab="Change of DCT-0 of M1 tracks (after ABM - before ABM)",main="Change of M1 track heights of 'str' tokens")
print(bwpl)
  inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
  if(inpu == "P" || inpu == "p" ) {
    pdf(paste(LogDirDate,"/Postanalysis_DCT-0ChangeStr.pdf",sep=""))
    print(bwpl)
    dev.off()
  }



##########################
# make before (solid) and after (dashed) ABM plot group-level averaged M1 tracks
# aggregate DCTs by sibilant initials (averaged over agents!) for start and end ABM
res2.df = rbind(res2.df.before,res2.df.after)
res2.agg = aggregate(cbind(P1,P2,P3) ~ Initial + Cond,mean,data=res2.df)
# add dummy Age, V, Vpn and W columns because build_dct_df.sibilant() needs one
Age = rep(1,times=length(res2.agg$Initial))
res2.agg = data.frame(res2.agg,Vpn=Age,V=Age,Age=Age,W=Age)
# calculate (smoothed) M1 tracks from the DCT values
dctTracks.df = build_dct_df.sibilant(res2.agg)
# rename F2Pop as M1smooth
names(dctTracks.df)[1] = "M1smooth"
# plot orig data
ylim=c(-2.5,2.5)
plot.orig = xyplot(M1smooth ~ times, group = Initial, type = c("l", "g"), data=dctTracks.df[dctTracks.df$Cond==0,],ylim=ylim,col=strongColors[c(1,5,7)],lty=1,main="Averaged M1 tracks before (solid) / after (dashed) ABM, canonical labels")
# plot of same tracks after plotSimGroup * simGroupSize interactions
plot.end = xyplot(M1smooth ~ times, group = Initial, type = c("l", "g"), data=dctTracks.df[dctTracks.df$Cond==simGroups,],ylim=ylim,col=strongColors[c(1,5,7)],lty=2)
print(plot.orig + plot.end)
inpu = readline("Press ENTER to continue, press 'P' to print this figure into the LogDir ")
if(inpu == "P" || inpu == "p" ) {
  pdf(paste(LogDirDate,"AverageTracks.pdf",sep="/"))
  print(plot.orig + plot.end)
  dev.off()
}

 
