#
# all functions with '###ADAPT###' in the leading comment need probably changes, if you decide
# to change the contents of agent's memory.
#

###ADAPT###
cmem.timedelay <-
function(df){
  # this is to initialise the memory of each agent
  # version with variable dimensionality (20170220): 

  # df : a data-frame with components Speaker (the speakers), Word (word class), labels (initial phoneme class),
  # Gender (speaker group, can be anything that separates agents), Initial (the phonological or canonical label)
  # the number of columns named "P..." in df define the dimensionality of the feature space
     
  # contrary to cmem() the Age component of memory is a random assigned time index for token age (numeric)
  # for each *Word class*

  # get all columns out of df that have names "P..." and put them into a matrix
  params = as.matrix(df[,grep("P.", names(df))])
  colnames(params) = paste("P", 1:ncol(params), sep="")
  

    initMemory = list()
    ageIndex = 1:length(df$Gender)  # arbitrary *numeric* parallel vector to dataframe
    k = 1
    for(j in unique(df$Speaker)){
        temp = df$Speaker == j
        memLength = sum(temp)
        for(wClass in unique(df$Word)){
          temp1 = temp & (df$Word == wClass)   # index to elements of a *word class* in speaker j
          ageIndex[temp1] <- sample(1:sum(temp1))  # asign random (full) sample to index Age
        }  # this loop should cover all elements in column ageindex for temp (= speaker j)
        # initialize memory - note that the list elements partly differ in their label names to be 
        # compatibel with ABM functions that follow
        initMemory[[k]] = list(P = params[temp,], Word = as.character(df$Word[temp]), V = as.character(df$labels[temp]), Age = ageIndex[temp], Vpn = as.character(df$Speaker[temp]), Initial = as.character(df$Initial[temp]), Gender = as.character(df$Gender[temp]), nrOfTimesHeard= rep(1, sum(temp)))
        if(!is.matrix(initMemory[[k]]$P ))
        initMemory[[k]]$P = cbind(initMemory[[k]]$P)
        k = k+1
    }
    initMemory
    
}

cmem.timedelay.obsolete.20170220 <- function(df){
    # this is to initialise the memory of each agent

    # df is a data-frame with components Speaker (the speakers), Word (word class), labels (sibilant class /s/ or /S/),
    # gender (speaker group), k0-k2 (the DCT-0...2), Initial (s S str)
    
    # contrary to cmem() the Age component of memory is a random assigned time index for token age (numeric)
    # for each *Word class*

    initMemory = list()
    ageIndex = 1:length(df$gender)  # arbitrary *numeric* parallel vector to dataframe
    k = 1
    for(j in unique(df$Speaker)){
        temp = df$Speaker == j
        memLength = sum(temp)
        for(wClass in unique(df$Word)){
          temp1 = temp & (df$Word == wClass)   # index to elements of a *word class* in speaker j
          ageIndex[temp1] <- sample(1:sum(temp1))  # asign random (full) sample to index Age
        }  # this loop should cover all elements in column ageindex for temp (= speaker j)
        # initialize memory - note that the list elements partly differ in their label names to be 
        # compatibel with ABM functions that follow: V is the sibilant class, Age is the age of token, Vpn is Speaker
        initMemory[[k]] = list(k0 = df$k0[temp], k1 = df$k1[temp], k2 = df$k2[temp], Word = as.character(df$Word[temp]), V = as.character(df$labels[temp]), Age = ageIndex[temp], Vpn = as.character(df$Speaker[temp]), Initial = as.character(df$Initial[temp]), nrOfTimesHeard= rep(1, sum(temp)))
        k = k+1
    }
    initMemory
    
}

# the original production function used in Harrington 2017 Language
prod <- function(agent){
    # randomly sample a word
    randomIdx = sample(1:length(agent$memory$Word), 1)
    Word = agent$memory$Word[randomIdx]
    # generate single random sample from a Gaussian formed from the corresponding DCT distribution of the same words
    temp = agent$memory$Word == Word
    # this is the data
    dat = cbind(agent$memory$k0[temp], agent$memory$k1[temp], agent$memory$k2[temp])
    # we need the mean and covariance matrix
    m = apply(dat, 2, mean); s = cov(dat)
    # generate one sample
    Fdat = rmvnorm(1, m, s)
    # this is the vowel label
    V = agent$memory$V[randomIdx]
    # speaker's age group
    Age = agent$memory$Age[randomIdx]
    # token's initial
    Initial = agent$memory$Initial[randomIdx]
    # speaker's id
    Vpn = agent$memory$Vpn[randomIdx]
    # bind into a data-frame
    prodObj = data.frame(k0 = Fdat[1], k1=Fdat[2], k2 = Fdat[3], Word=Word, V=V, Age = Age, Vpn = Vpn, Initial = Initial, stringsAsFactors = F)
    prodObj 
}

# production in Sibilant ABM 1B + 1C (Flo: experimental!)
# prod of 'str' word is special:
# firstly, a 3-dim Gaussian is estimated based on all stored sibilant data labelled with the same phoneme class as the 
# selected 'str' token. The idea here is that the the speaker
# does not care about the sibilant quality when producing a word that has no sibilant contrast.
# Secondly we compress the estimated Gaussian around its mean by a global constant compressSD (if compressSD = 1.0, no
# compression takes place (radical ABM 1B). This compression can be justified for efficiency reasons: it is unlikely 
# that an agent, when give the full freedom to produce whatever sibilant she/he likes, that she/he will actually 
# sample the whole range, but rather stick to a convenient smaller range around the mean.
prod.sibilant.1b <- function(agent){
    # randomly sample a word
    randomIdx = sample(1:length(agent$memory$Word), 1)
    # this is the selected word for production
    Word = agent$memory$Word[randomIdx]
    # distinguish 'str' and non-'str' words
    if(substr(Word,1,3) != "str") {
      # producing a non-'str' word
      # generate single random sample from a Gaussian formed from the corresponding DCT distribution of the same words
      temp = agent$memory$Word == Word
      # this is the data
      dat = cbind(agent$memory$k0[temp], agent$memory$k1[temp], agent$memory$k2[temp])
      # we need the mean and covariance matrix
      m = apply(dat, 2, mean); s = cov(dat)
      # generate one sample
      Fdat = rmvnorm(1, m, s)
      # this is the phoneme label
      V = agent$memory$V[randomIdx]
      # token's age 
      Age = agent$memory$Age[randomIdx]
      # speaker's id
      Vpn = agent$memory$Vpn[randomIdx]
      Initial = agent$memory$Initial[randomIdx]
      # bind into a data-frame
      prodObj = data.frame(k0=Fdat[1], k1=Fdat[2], k2=Fdat[3], Word=Word, V=V, Age=Age, Vpn=Vpn, Initial=Initial, stringsAsFactors=F)
    }
    else {
      # producing a 'str' word
      # Gaussian estimated from all memorized sibilants with the same label as the selected 'str' token
      # this is the phoneme label
      V = agent$memory$V[randomIdx]
      # these are the tokens
      temp = agent$memory$V == V
      dat = cbind(agent$memory$k0[temp], agent$memory$k1[temp], agent$memory$k2[temp])
      # Gaussian
      m = apply(dat, 2, mean); s = cov(dat)
      # compress Gaussian by scaling the variances in the diagonal of the covariance matrix
      s[1,1] <- compressSD * s[1,1]
      s[2,2] <- compressSD * s[2,2]
      s[3,3] <- compressSD * s[3,3]
      # generate one sample from that Gaussian      
      Fdat = rmvnorm(1, m, s)     
      # other data to pass on
      Age = agent$memory$Age[randomIdx]
      Vpn = agent$memory$Vpn[randomIdx]
      Initial = agent$memory$Initial[randomIdx]
      # bind into a data-frame
      prodObj = data.frame(k0=Fdat[1], k1=Fdat[2], k2=Fdat[3], Word=Word, V=V, Age=Age, Vpn=Vpn, Initial=Initial, stringsAsFactors=F)
    }
      prodObj
}

# production (ABM Sibilants 3)
# ###ADAPT###
prod.sibilant.3 <-
function(agent){
    # JMH/Flo 20170220 : this version supports variable dim in feature space but does not 
    #                    support the articulSpace and the several experiments about phonetic coarticulation
    #                    thus, the globals articulSpace,phoneticCoarticulate1,phoneticCoarticulate2 have 
    #                    no effect here, even is set by the calling context.
    #                    Reason: we don't want artificial biases in the production data and the articulSpace 
    #                    turned out not to benecessary.

    # randomly sample a word
    randomIdx = sample(1:length(agent$memory$Word), 1)
    Word = agent$memory$Word[randomIdx]
    # generate single random sample from a Gaussian formed from the corresponding DCT distribution of the same words
    # Note that production is thus word-based and not phonological-based: even if the same phonological category
    # is shared by many other words, their tokens are *not* considered for production!
    # Since a word category can only contain one phonological category, we can filter by Word label
    temp = agent$memory$Word == Word
    # this is the data
    # JMH modified here to endJMH
    if(ncol(agent$memory$P) > 1) {
      # we need the mean and covariance matrix
      m = apply(agent$memory$P[temp,], 2, mean); s = cov(agent$memory$P[temp,])
      Fdat = rmvnorm(1, m, s)
    }
    else {
      m = mean(agent$memory$P[temp,]); s = var(agent$memory$P[temp,])
      Fdat = rnorm(1, m, s)
    }
  
    # this is the phoneme label
    V = agent$memory$V[randomIdx]
    # time index (not used by receiving agent)
    Age = agent$memory$Age[randomIdx]
    # token's initial (= grouping factor)
    Initial = agent$memory$Initial[randomIdx]
    # speaker's id
    Vpn = agent$memory$Vpn[randomIdx]
    # bind into a data-frame
    # JMH modified this output
    prodObj = data.frame(P = Fdat, Word=Word, V=V, Age = Age, Vpn = Vpn, Initial = Initial, stringsAsFactors = F)
    names(prodObj)[1:length(Fdat)] = paste("P", 1:length(Fdat), sep="")
    prodObj
}


prod.sibilant.2 <- function(agent){
    # This version suppresses productions that occur outside an 'articulation space' defined by articulSpace (2x3 matrix)
    # Each column in articulSpace contains the minimum and maximum value for a feature value

    # A very crude way to introduce phonetic coarticulation:
    # If the boolean phoneticCoarticulate1 is TRUE, only randomly created tokens for a /str/ word are 
    # produced if they are located between the centroid of the /str/ tokens and the centroid of the /S/ tokens.
    # with other words; only randomly sampled /str/ productions that 'coarticulate in the direction of /s/ are 
    # produced. 
    # If the boolean phoneticCoarticulate2 is TRUE, the centroid of the /str/ words is shifted by by 
    # (shiftStrGaussionFactor*100)% in the direction of the /S/ centroid 


    # randomly sample a word
    randomIdx = sample(1:length(agent$memory$Word), 1)
    Word = agent$memory$Word[randomIdx]
    # generate single random sample from a Gaussian formed from the corresponding DCT distribution of the same words
    # Note that production is thus word-based and not phonological-based: even if the same phonological category
    # is shared by many other words, their tokens are *not* considered for production!
    # Since a word category can only contain one phonological category, we can filter by Word label
    temp = agent$memory$Word == Word
    # this is the data
    dat = cbind(agent$memory$P1[temp], agent$memory$P2[temp], agent$memory$P3[temp])
    # we need the mean and covariance matrix
    m = apply(dat, 2, mean); s = cov(dat)
    
    # Phonetic articulation type 2
    # shift the centroid of the /str/ Gaussian by (shiftStrGaussionFactor*100)% in the direction of the /S/ centroid
    if(phoneticCoarticulate2 == T & agent$memory$Initial[randomIdx] == "str") {
          # calculate the mean of current /S/ tokens of agent
          temp = agent$memory$Initial == "S"
          dat = cbind(agent$memory$P1[temp], agent$memory$P2[temp], agent$memory$P3[temp])
          m.S = apply(dat, 2, mean)
          # the vector from /str/ centroid to /S/ centroid
          m.str.S.diff = m.S - m
          m = m + shiftStrGaussionFactor * m.str.S.diff
    }
    # generate samples until one is within the articulSpace and on the same side of the m.str plane as m.S
    isOutside = T
    while(isOutside) {
      # generate one sample
      Fdat = rmvnorm(1, m, s)
      # test if within articulSpace ...
      if( sum(c(Fdat < articulSpace[1,],Fdat > articulSpace[2,])) == 0 ) { 
        # Phonetic coarticulation type 1
        # ... and then test if the word is a /str/ word and coarticulated in direction of /S/
        if(phoneticCoarticulate1 == T & agent$memory$Initial[randomIdx] == "str") {
          # First calculate the plane that is orthogonal to the difference vector between the 
          # m.str and m.S centroid and goes through m.str;then we can later simply look if a produced
          # /str/ token is on teh same side as the m.S. of this plane to check for phonetic coarticulation
       
          # calculate the mean of current /S/ tokens of agent
          temp = agent$memory$Initial == "S"
          dat = cbind(agent$memory$P1[temp], agent$memory$P2[temp], agent$memory$P3[temp])
          m.S = apply(dat, 2, mean)
          # calculate the mean of current /str/ tokens of agent
          temp = agent$memory$Initial == "str"
          dat = cbind(agent$memory$P1[temp], agent$memory$P2[temp], agent$memory$P3[temp])
          m.str = apply(dat, 2, mean)
          # vector from m.S to m.str
          m.diff = m.str - m.S
          # the orthogonal plane to m.diff that goes through m.str has the 
          # formula  p.0 * k0 + p.1 * k1 + p.2 * k2 + p.k = 0    with:
          p.0 = m.diff[1]
          p.1 = m.diff[2]
          p.2 = m.diff[3]
          p.k = - m.diff %*% m.str
          # check if formula is correct: put m.str into formula must result in zero:
          #p.0 * m.str[1] + p.1 * m.str[2] + p.2 * m.str[3] + p.k
          # test on which 'side' (pos or neg) is the centroid of m.S of this plane
          if(p.0 * m.S[1] + p.1 * m.S[2] + p.2 * m.S[3] + p.k > 0 ) { 
            sideOfPlane = 1
          } else {
            sideOfPlane = -1
          }
          # => all points that when inserted in above plane formula result in a scalar of the same
          # sign as sideOfPlane, are in the 'direction' of the /s/ centroid.
          # check the side of the plane this token is located; if the plane formula results in the same 
          # sign as sideOfPlane (see above) then the point is on the same side of the plane as m.S
          if((p.0 * Fdat[1] + p.1 * Fdat[2] + p.2 * Fdat[3] + p.k)*sideOfPlane > 0) {
            isOutside = F
          } 
        } else {
          isOutside = F
        }
      }
    }
    # this is the phoneme label
    V = agent$memory$V[randomIdx]
    # time index (not used by receiving agent)
    Age = agent$memory$Age[randomIdx]
    # token's initial (= grouping factor)
    Initial = agent$memory$Initial[randomIdx]
    # speaker's id
    Vpn = agent$memory$Vpn[randomIdx]
    # bind into a data-frame
    prodObj = data.frame(P1 = Fdat[1], P2=Fdat[2], P3 = Fdat[3], Word=Word, V=V, Age = Age, Vpn = Vpn, Initial = Initial, stringsAsFactors = F)
    prodObj
}

# perception in sibilant ABM 1
###ADAPT###
perc.sibilant.1 <- function(agent, percThis, removePriortoUpdate=T)

# Further changes:
# Flo 20170327 : bug fix : the threshold of posterior 1/3 was replaced by the 1/nr_of_classes (1/3 was 
#                probably motivated by the case with three classes as in the u-fronting experiment!). If maxPosteriorMatch=T,
#                this 'soft' decision rule is replaced by the more rigid rule that the incoming token must
#                satisfy a maximum posterior match: the incoming token must have the maximum 
#                posterior prob of the full qda model (with the 'soft' rule there could be other classes that have a 
#                higher posterior!) Also,
#                we took out the apriors of qda(); that way inhomogenous sets of derived clusters (always
#                if you use split&merge!) are handled correctly.
#                Multiple ABM runs on teh Australian s-retraction experiment show that this 'correct' decision rule
#                outperforms the 'soft' decision rule; since it is also better theoretically based, we recommend 
#                to use this function only with maxPosteriorMatch=T in the future (set as default, if variable 
#                maxPosteriorMatch does not exist!)

# The first ABM strategy used by Harrington in his u-fronting experiments (if timeDelayStrategy=F and 
# splitAndMerge=F).
# The original outlayer removal strategy (timeDelayStrategy = F) causes phonological Gaussian models to
# collapse very quickly during ABM, and was therefore later 
# replaced by F. Schiel with the ABM 2 strategy (see perc.sibilant.2()).
# Tip: you can use the original Harrington Rule together with time-delay token removal by setting the 
# parameter timeDelayStrategy=T in the calling context; this will avoid the collapse of distributions.
# Tip: you can use the original Harrington Rule together with split&merge by setting the 
# parameter splitAndMerge=T in the calling context.

# Incoming token is tested on all phonological classes ($V), if recognized correctly 
# (= the posterior prob of the correct derived cluster is larger than 1/nr_of_clusters), the token is accepted for 
# memorization, i.e. percThis is added to memory, and the new token gets an memory$Age element 
# to indicate the time index when this token was added to the memory.

# In contrast to earlier versions of perc() here the memorized token does not get the $memory$V label of the 
# incoming token, but rather the $memory$V label of the already stored word tokens of the same class, i.e. 
# misclassification is not possible. The reason is
# that with the introduction of splitandmerge() it is possible (and happens) that an incoming word has a (derived) phonological 
# label that differs from the phonological labels in memory. Since the memorization of such a class makes no sense and leads to further errors
# (when an agent carries the same word with different phonological labels!), we simply give the memorized 
# word token the same phonological category as the already stored tokens of the same word.

# timeDelayStrategy=T : A newly memorized token gets a time index one higher than the highest time index within the word class.

# removePriortoUpdate=T : if percThis is added, then an observation from the agent's memory is 
# removed following criteria: 
# (i) the maximum number of storable tokens in agent's memory (parameter 'maxTokens') has been reached
#     (maxTokens is defined in the calling context)
# (ii) the removed word is of the same type e.g. if percThis$Word 
# is 'stream', then all the data corresponding to one of the agent's 'stream' words is removed. 
# (iii) which token is removed depends on the parameter 'timeDelayStrategy': 
# if set TRUE, the token removed is the one of the word type that has 
# been kept in memory for the longest number of interactions. We simulate this by storing an 
# 'interaction time index' in the element Age of the agent memory.  During initialization we assign random time indices to Age.
# if set FALSE, the token removed is the one that has the largest Mahalanobis distance to the phonological
# Gaussian model, i.e. the most 'out-lying' token is removed, not the oldest. This is the original strategy by 
# Harrington in his first experiments on u-fronting.

# If splitAndMerge=T (from main context), the function checks the agents memory for possible 
# splits and mergers of phonological categories; this merely regards the $memory$V labels, all
# other agent data stay untouched. The splitAndMerge check is only triggered, when 
# the total number of received tokens (= the number of logged updates in memory + 1) %% splitAndMergeInterval 
# (set by calling context) equal 0 (= every splitAndMergeInterval updates of the agent).

{
  debug = F
  # FLO 20170327 : default is rigid (max) decision rule
  if(!exists("maxPosteriorMatch")) maxPosteriorMatch = T

  numberTokens = length(agent$memory$Word)
  if(debug) { 
    cat("\nperc.sib.1: Agent ",agent$agentNr," receives incoming token:\n"); print(percThis)
    cat("perc.sib.1: tokens in agent's memory = ",numberTokens,"\n") 
  }

  recognized = F
  # JMH 20170220: the next three lines are to find the parameters, assuming they are always named 'Pn', 1 <= n <= 20
  nm = paste("P", 1:20, sep="")
  temp = names(percThis) %in% nm
  incomingdct = cbind(percThis[,temp])
  #incomingdct = cbind(percThis$k0, percThis$k1, percThis$k2)    
  
  # determine phonological class of incoming token by word label, that is we assume
  # that the word is always recognized by the agent (this is necessary, if splitAndMerge is
  # activated, since then the incoming label depends on the individual phonological system 
  # of the speaker).
  incomingV = unique(agent$memory$V[agent$memory$Word == percThis$Word])  
  if(debug) { cat("perc.sib.1: agents V class for incoming token ",percThis$Word,": ",incomingV,"\n") }

  # The original Harrington test of memorization (see function perc() in this file)
  # test if the posterior probability P(incomingdct|percThis$V) of the incoming token 
  # given a quadratic discriminant model trained on all exemplars in memory labeled in derived cluster classes (V) is 
  # higher than the 'worst case' posterior prob 1/nr_of_classes; the idea is that if the incoming token 
  # is classified with less probability, then one other class must be classified higher.
  # Flo 20170327
  # threshold = 1/3
  threshold = 1/length(unique(agent$memory$V))
  # calculate means and covariance matrices 
  # JMH 20170220 changed this
  #dat = cbind(agent$memory$k0, agent$memory$k1, agent$memory$k2)
  dat = as.matrix(agent$memory$P)
  labs = agent$memory$V
  # Flo 20170327
  #qdat = qda(dat, labs, prior = rep(1/length(unique(labs)), length(unique(labs))))
  qdat = qda(dat, labs)
  # calculate the posterior probability of the incoming signal
  pincoming = predict(qdat, incomingdct)$posterior
  # find out whether the posterior-probability exceeds 'threshold'
  posteri = pincoming[colnames(pincoming) == incomingV]
  # Flo 20170327
  if(maxPosteriorMatch) {
    recognized = colnames(pincoming)[which.max(pincoming)] == incomingV
  } else {
    recognized = posteri > threshold
  }
  if(debug) { cat("perc.sib.1: posterior prob of incoming token ",incomingV," given class ",incomingV," is: ",posteri,"\n") }

  # See discusssion in perc() in this file; alternatively we could use a real Bayessian 
  # classifyer and accept the incoming token only if it is classified correctly:
  # test if the incoming token is correctly recognized as incomingV
  # build bayes models based on all memorized tokens of agent
  #dat = agent$memory$P
  #labs = agent$memory$V
  #tdat = train(dat,labs)
  # classify the incoming sibilant
  #recognizedClass = classify(incomingdct, tdat, metric = "bayes")
  #if(debug) { cat("perc.sib.1: incoming token ",incomingV," classified as: ",recognizedClass,"\n") }
  #recognized = recognizedClass == incomingV
  # There are some good reasons *not* to use a classification constraint, though. See
  # the discussion in function perc() in this file.

  if(debug & recognized) { cat("perc.sib.1: Agent accepts incoming token\n") }
  if(debug & !recognized) { cat("perc.sib.1: Agent rejects incoming token\n") }

  # do the memorization and model update
  incominglabs = c(percThis$Vpn, percThis$V, percThis$Age, percThis$Word, percThis$Initial, percThis$Gender)  
  if(recognized)
  # then update 
  {
        incominglabs = c(incominglabs, "y")
        if(removePriortoUpdate && numberTokens >= maxTokens) {
          if(timeDelayStrategy) {
            # discard the word in the same category with lowest memory$Age index
            ageTemp = agent$memory$Age
            ageTemp[agent$memory$Word != percThis$Word] <- NA  # just look for incoming Word in Age
            onetoremove = which.min(ageTemp)            # index to min value just in Word group
            # remove the above from the agent's memory
            agent$memory$nrOfTimesHeard =  agent$memory$nrOfTimesHeard[-onetoremove]
            # JMH 20170220 changed this 3 next lines
            agent$memory$P = agent$memory$P[-onetoremove,]
            #agent$memory$k2 = agent$memory$k2[-onetoremove]
            #agent$memory$k1 =  agent$memory$k1[-onetoremove]
            #agent$memory$k0 = agent$memory$k0[-onetoremove]
            agent$memory$Word =  agent$memory$Word[-onetoremove]
            agent$memory$V =  agent$memory$V[-onetoremove]
            agent$memory$Age = agent$memory$Age[-onetoremove]
            agent$memory$Initial = agent$memory$Initial[-onetoremove]
            agent$memory$Vpn = agent$memory$Vpn[-onetoremove]
            agent$memory$Gender = agent$memory$Gender[-onetoremove]
            if(debug) { cat("perc.sib.1: remove oldest token with index ",onetoremove,"\n") }
          } else {
            # discard the word in the same category with highest Mahalanobis distance to the 
            # centroid of the corresponding vowel category (ignoring the time index)
            # calculate centroid and covariance matrix for the agent's label incomingV (training phase)
            temp.mahal = agent$memory$V == incomingV
            # JMH 20170220 changed this
            tdat.mahal = train(as.matrix(agent$memory$P[temp.mahal,]))
            # establish which of the agent's words correspond to the incoming word
            temp.word.mahal = agent$memory$W == percThis$W
            # next two lines: we need a numerical index to identify relative to agent$memory which observation to remove
            nums.mahal = 1:length(agent$memory$W)
            nums.mahal = nums.mahal[temp.word.mahal]
            # calculate the Mahalanobis distances from all the agent's words of label percThis$W to the agent's incomingV-centroid (testing phase)
            # JMH 20170220 changed this
            #dist.mahal = distance(cbind(agent$memory$k0[temp.word.mahal],agent$memor$k1[temp.word.mahal], agent$memory$k2[temp.word.mahal]), tdat.mahal, metric = "mahal")
            dist.mahal = distance(agent$memory$P[temp.word.mahal,], tdat.mahal, metric = "mahal")
            # get numerical index relative to agent$memory of the observation that is to be removed depending on which Mahalanobis distance is greatest
            onetoremove = nums.mahal[which.max(dist.mahal)]
            # remove the above from the agent's memory
            agent$memory$nrOfTimesHeard =  agent$memory$nrOfTimesHeard[-onetoremove]
            #agent$memory$k2 = agent$memory$k2[-onetoremove]
            #agent$memory$k1 =  agent$memory$k1[-onetoremove]
            #agent$memory$k0 = agent$memory$k0[-onetoremove]
            # JMH 20170220 changed this 3 next lines
            agent$memory$P = agent$memory$P[-onetoremove,]
            if(!is.matrix(agent$memory$P))
              agent$memory$P = cbind(agent$memory$P)
            agent$memory$Word =  agent$memory$Word[-onetoremove]
            agent$memory$V =  agent$memory$V[-onetoremove]
            agent$memory$Age = agent$memory$Age[-onetoremove]
            agent$memory$Vpn = agent$memory$Vpn[-onetoremove]
            agent$memory$Gender = agent$memory$Gender[-onetoremove]
            agent$memory$Initial = agent$memory$Initial[-onetoremove]
            if(debug) { cat("perc.sib.1: remove outlier token with index ",onetoremove,"\n") }
          }
        }
        # now update with percThis, but set the phonological label $memory$V to the label of the same words in memory
        agent$memory$update = c(agent$memory$update, "y")
        agent$memory$nrOfTimesHeard = c(agent$memory$nrOfTimesHeard, unique(agent$memory$nrOfTimesHeard[agent$memory$Word == percThis$Word]))
        # JMH 20170220 changed this
        agent$memory$P = rbind(agent$memory$P, incomingdct)
        #agent$memory$k2 = c(agent$memory$k2, percThis$k2)
        #agent$memory$k1 = c(agent$memory$k1, percThis$k1)
        #agent$memory$k0 = c(agent$memory$k0, percThis$k0)
        agent$memory$V = c(agent$memory$V, incomingV)
        agent$memory$Initial = c(agent$memory$Initial, percThis$Initial)
        agent$memory$Vpn = c(agent$memory$Vpn, agent$memory$Vpn[1])
        agent$memory$Gender = c(agent$memory$Gender, agent$memory$Gender[1])
        # assign next free time index within the word group to the new learned token
        newIndex = max(agent$memory$Age[agent$memory$Word==percThis$Word]) + 1
        agent$memory$Age <- c(agent$memory$Age, newIndex)
        # cat("agent$memory$Word==percThis$Word : ",agent$memory$Word==percThis$Word,"\npercThis$Word = ",percThis$Word," newIndex = ",newIndex,"\n")
        agent$memory$Word = c(agent$memory$Word, percThis$Word)
        # this next line stores as rows all observations ever perceived by the agent
        agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
        temp = agent$memory$Word == percThis$Word
        agent$memory$nrOfTimesHeard[temp] = agent$memory$nrOfTimesHeard[temp] + 1
        
    }
    else
    # no update
    {
      agent$memory$update = c(agent$memory$update, "n")
      incominglabs = c(incominglabs, "n")
      agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
    }
    # check for splits and mergers
    numReceivedTokens = length(agent$memory$update) 
    if(splitAndMerge == T & numReceivedTokens%%splitAndMergeInterval == 0) { 
      if(debug) {
         cat("perc.sib.1: performing split and merge on agent number ",agent$agentNr,"\n")
         cat("perc.sib.1: numReceivedTokens = ", numReceivedTokens,"\n")
         print(agent)
      }
      agent = splitandmerge(agent) 
      if(debug) {
        cat("perc.sib.1: updated agent$memory$V : \n",agent$memory$V,"\n")
      }
    }
    agent
}


# perception in sibilant ABM 2
###ADAPT###

perc.sibilant.3 <-
function(agent, percThis, removePriortoUpdate=T)

# Modified to cover variable dim in features agent$memory$P (20170220)

# Modified perception test for s-retraction experiment of Mary Stevens (2016)
# Main differences to perc():
# 1. Apply Harrington Rule in form of a Mahalanobis threshold instead posterior 
# probalility (as in the second experiment in Harrington&Schiel, Language, 2017)
# based on all exemplars that have the same (agent-individual) phonological class
# as the incoming *word* (not phonological label!)
# 2. Apply time-delayed token removal instead of outlier-removal
# 3. Allow up to maxTokens to be learned in each agent's memory, before removing tokens
# (4. Optionally apply split&merge operations on agent memories.)
# (5. Optionally apply a different Harrington Rule on /s/ tokens that are from 'str' context.)

# Incoming non-'str' tokens: 
# The Harrington Rule (with simMahalThreshold) is applied;
# tokens beyond simMahalThreshold are not memorized. 
# Incoming 'str' tokens: 
# The Harrington Rule is applied (with threshold.str).
# If the log MH distance is smaller than the respective threshold, then 
# percThis is added to memory, and the new token gets an memory$Age element to indicate the time index 
# when this token was added to the memory.
# (We use two different thresholds here; for one thing, we can let agents memorize all incoming 'str' tokens
# by setting threshold.str to a very high number, or we can assume that the Harrington Rule is relaxed 
# for 'str' tokens to be memorized, since these don't endanger phonological contrast. In most cases both 
# thresholds will be set to the same value, to satisfy conceptual simplicity.)

# In contrast to earlier versions of perc() here the memorized token does not get the $memory$V label of the 
# incoming token, but rather the $memory$V label of the already stored word tokens of the same class. The reason is
# that with the introduction of splitandmerge() it is possible (and happens) that an incoming word has a phonological 
# label that differs from the phonological labels in memory. Since the memorization of such a class makes no sense and leads to further errors
# (when an agent carries the same word with different phonological labels!), we simply give the memorized 
# word token the same phonological category as the already stored tokens of the same word.
# Since phonological classes can be shared by several word types, the Gaussian used for the Harrington Rule
# is then calculated on all tokens with this phonological label $V.

# A newly memorized token gets a time index one higher than the highest time index within the word class.

# removePriortoUpdate: if percThis is added, then an observation from the agent's memory is 
# removed following criteria. (i) the removed word is of the same type e.g. if percThis$Word 
# is 'stream', then all the data corresponding to one of the agent's 'stream' words is removed. 
# (ii) the one removed is the one that has 
# been kept in memory for the longest number of interactions. We simulate this by storing an 
# 'interaction time index' in the element Age of the agent memory (which was redundant anyway). 
# During initialization we assign random time indices to Age.
# (iii) the maximum number of storable tokens in agent's memory maxTokens has reached

# If splitAndMerge==T (from main context), the function checks the agents memory for possible 
# splits and mergers of phonological categories; this merely regards the $memory$V labels, all
# other agent data stay untouched. The splitAndMerge check is only triggered, when the number of received 
# tokens (=) %% splitAndMergeInterval (set by calling context) is 0.

{
  debug = F
  numberTokens = length(agent$memory$Word)
  if(debug) { 
    cat("\nperc.sib.3: Agent ",agent$agentNr," receives incoming token:\n"); print(percThis)
    cat("perc.sib.3: tokens in agent's memory = ",numberTokens,"\n") 
  }
  # JMH 20170220: the next three lines are to find the parameters, assuming they are always named 'Pn', 1 <= n <= 20
  nm = paste("P", 1:20, sep="")
  temp = names(percThis) %in% nm
  incomingdct = cbind(percThis[,temp])
  
  # determine phonological class of incoming token by word label, that is we assume
  # that the word is always recognized by the agent; this is necessar, because following individual 
  # splits in the sending agents memory the phonological label could be anything.
  incomingV = unique(agent$memory$V[agent$memory$Word == percThis$Word])  
  if(debug) { cat("perc.sib.3: agents V class for incoming token ",percThis$Word,": ",incomingV,"\n") }

  # apply Harrington Rule with Mahalanobis threshold
  # estimate mean and covariance matrix from all tokens in memory that have class incomingV
  temp = agent$memory$V == incomingV
  # JMH 20170220: modified next line. This is the typical multidimensional case
  if(ncol(agent$memory$P) > 1) {
    dat = as.matrix(agent$memory$P)
    V.mahal = train(dat)
    # calculate the MH distance of the incoming signal to this V model
    V.dist = log(as.numeric(distance(incomingdct, V.mahal, metric = "mahal")))
  } else {
    # JMH 20170220: this as far as I can see is the equivalent for one parameter
    # Flo: quite right, except that I'd use the quadratic form as in the HM case (compare the 
    # one-dim and multi-dim Gauss probability):
    #V.dist = log(abs((incomingdct - mean(agent$memory$P))/sd(agent$memory$P)))
    V.dist = log(((incomingdct - mean(agent$memory$P))/sd(agent$memory$P))^2)
  }
    
  # test whether the MH distance is below threshold, meaning the agent accepts the incoming signal 
  # as of his word class percThis$Word and phonological class incomingV
  
  if(percThis$Initial != "str") {
    p1temp = V.dist < simMahalThreshold
    if(debug) { cat("perc.sib.3: MH simMahalThreshold = ",simMahalThreshold,"\n") }
  } else {
    p1temp = V.dist < threshold.str
    if(debug) { cat("perc.sib.3: MH threshold.str = ",threshold.str,"\n") }
  }

  if(debug) { cat("perc.sib.3: Agent log MH distance for incoming token ",percThis$Word," = ",V.dist,"\n") }
  if(debug & p1temp) { cat("perc.sib.3: Agent accepts incoming token\n") }
  if(debug & !p1temp) { cat("perc.sib.3: Agent rejects incoming token\n") }

  # do the memorization and model update
  incominglabs = c(percThis$Vpn, percThis$V, percThis$Age, percThis$Word, percThis$Initial, percThis$Gender)  
  if(p1temp)
  # then update 
  {
        incominglabs = c(incominglabs, "y")
        if(removePriortoUpdate && numberTokens >= maxTokens)
        {
            # discard the word in the same category with lowest memory$Age index
            ageTemp = agent$memory$Age
            ageTemp[agent$memory$Word != percThis$Word] <- NA  # just look for incoming Word in Age
            onetoremove = which.min(ageTemp)            # index to min value just in Word group
            # remove the above from the agent's memory
            agent$memory$nrOfTimesHeard =  agent$memory$nrOfTimesHeard[-onetoremove]
            # JMH 20170220 changed this 3 next lines
            agent$memory$P = agent$memory$P[-onetoremove,]
            if(!is.matrix(agent$memory$P))
              agent$memory$P = cbind(agent$memory$P)
            agent$memory$Word =  agent$memory$Word[-onetoremove]
            agent$memory$V =  agent$memory$V[-onetoremove]
            agent$memory$Age = agent$memory$Age[-onetoremove]
            agent$memory$Initial = agent$memory$Initial[-onetoremove]
            agent$memory$Vpn = agent$memory$Vpn[-onetoremove]
            agent$memory$Gender = agent$memory$Gender[-onetoremove]
            if(debug) { cat("perc.sib.3: remove oldest token with index ",onetoremove,"\n") }
        }
        # now update with percThis, but set the phonological label $memory$V to the label of the same words in memory
        agent$memory$update = c(agent$memory$update, "y")
        agent$memory$nrOfTimesHeard = c(agent$memory$nrOfTimesHeard, unique(agent$memory$nrOfTimesHeard[agent$memory$Word == percThis$Word]))
        # JMH 20170220 changed this next line
        agent$memory$P = rbind(agent$memory$P, incomingdct)
        agent$memory$V = c(agent$memory$V, incomingV)
        agent$memory$Initial = c(agent$memory$Initial, percThis$Initial)
        agent$memory$Vpn = c(agent$memory$Vpn, agent$memory$Vpn[1])
        agent$memory$Gender = c(agent$memory$Gender, agent$memory$Gender[1])
        # assign next free time index within the word group to the new learned token
        newIndex = max(agent$memory$Age[agent$memory$Word==percThis$Word]) + 1
        agent$memory$Age <- c(agent$memory$Age, newIndex)
        # cat("agent$memory$Word==percThis$Word : ",agent$memory$Word==percThis$Word,"\npercThis$Word = ",percThis$Word," newIndex = ",newIndex,"\n")
        agent$memory$Word = c(agent$memory$Word, percThis$Word)
        # this next line stores as rows all observations ever perceived by the agent
        agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
        temp = agent$memory$Word == percThis$Word
        agent$memory$nrOfTimesHeard[temp] = agent$memory$nrOfTimesHeard[temp] + 1
        
    }
    else
    # no update
    {
      agent$memory$update = c(agent$memory$update, "n")
      incominglabs = c(incominglabs, "n")
      agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
    }
    # split and merge, first time after splitAndMergeInterval memory updates
    numReceivedTokens = length(agent$memory$update)
    if(splitAndMerge == T & numReceivedTokens%%splitAndMergeInterval == 0) { 
      if(debug) {
         cat("perc.sib.3: performing split and merge on agent number ",agent$agentNr,"\n")
         cat("numReceivedTokens = ", numReceivedTokens,"\n")
         print(agent)
      }
      agent = splitandmerge(agent) 
      if(debug) {
        cat("perc.sib.3: updated agent$memory$V : \n",agent$memory$V,"\n")
      }
    }
    agent
}


perc.sibilant.2 <- function(agent, percThis, removePriortoUpdate=T)

# Modified perception test for s-retraction experiment of Mary Stevens
# Main differences to perc():
# 1. Apply Harrington Rule in form of a Mahalanobis threshold instead posterior 
# probalility (as in the second experiment in Harrington&Schiel, Language, 2017)
# based on all exemplars that have the same (agent-individual) phonological class
# as the incoming *word* (not phonological label!)
# 2. Apply time-delayed token removal instead of outlier-removal
# 3. Allow up to maxTokens to be learned in each agent's memory, before removing tokens
# (4. Optionally apply split&merge operations on agent memories.)
# (5. Optionally apply a different Harrington Rule on /s/ tokens that are from 'str' context.)

# Incoming non-'str' tokens: 
# The Harrington Rule (with simMahalThreshold) is applied;
# tokens beyond simMahalThreshold are not memorized. 
# Incoming 'str' tokens: 
# The Harrington Rule is applied (with threshold.str).
# If the log MH distance is smaller than the respective threshold, then 
# percThis is added to memory, and the new token gets an memory$Age element to indicate the time index 
# when this token was added to the memory.
# (We use two different thresholds here; for one thing, we can let agents memorize all incoming 'str' tokens
# by setting threshold.str to a very high number, or we can assume that the Harrington Rule is relaxed 
# for 'str' tokens to be memorized, since these don't endanger phonological contrast. In most cases both 
# thresholds will be set to the same value, to satisfy conceptual simplicity.)

# In contrast to earlier versions of perc() here the memorized token does not get the $memory$V label of the 
# incoming token, but rather the $memory$V label of the already stored word tokens of the same class. The reason is
# that with the introduction of splitandmerge() it is possible (and happens) that an incoming word has a phonological 
# label that differs from the phonological labels in memory. Since the memorization of such a class makes no sense and leads to further errors
# (when an agent carries the same word with different phonological labels!), we simply give the memorized 
# word token the same phonological category as the already stored tokens of the same word.
# Since phonological classes can be shared by several word types, the Gaussian used for the Harrington Rule
# is then calculated on all tokens with this phonological label $V.

# A newly memorized token gets a time index one higher than the highest time index within the word class.

# removePriortoUpdate: if percThis is added, then an observation from the agent's memory is 
# removed following criteria. (i) the removed word is of the same type e.g. if percThis$Word 
# is 'stream', then all the data corresponding to one of the agent's 'stream' words is removed. 
# (ii) the one removed is the one that has 
# been kept in memory for the longest number of interactions. We simulate this by storing an 
# 'interaction time index' in the element Age of the agent memory (which was redundant anyway). 
# During initialization we assign random time indices to Age.
# (iii) the maximum number of storable tokens in agent's memory maxTokens has reached

# If splitAndMerge==T (from main context), the function checks the agents memory for possible 
# splits and mergers of phonological categories; this merely regards the $memory$V labels, all
# other agent data stay untouched. The splitAndMerge check is only triggered, when the number of received 
# tokens (=) %% splitAndMergeInterval (set by calling context) is 0.

{
  debug = F
  numberTokens = length(agent$memory$Word)
  if(debug) { 
    cat("\nperc.sib.2: Agent ",agent$agentNr," receives incoming token:\n"); print(percThis)
    cat("perc.sib.2: tokens in agent's memory = ",numberTokens,"\n") 
  }

  incomingdct = cbind(percThis$P1, percThis$P2, percThis$P3)    
  
  # determine phonological class of incoming token by word label, that is we assume
  # that the word is always recognized by the agent
  incomingV = unique(agent$memory$V[agent$memory$Word == percThis$Word])  
  if(debug) { cat("perc.sib.2: agents V class for incoming token ",percThis$Word,": ",incomingV,"\n") }

  # apply Harrington Rule with Mahalanobis threshold
  # estimate mean and covariance matrix from all tokens in memory that have class incomingV
  temp = agent$memory$V == incomingV
  dat = as.matrix(cbind(agent$memory$P1[temp], agent$memory$P2[temp], agent$memory$P3[temp]))
  V.mahal = train(dat)
  # calculate the MH distance of the incoming signal to this V model
  V.dist = log(as.numeric(distance(incomingdct, V.mahal, metric = "mahal")))
  # test whether the MH distance is below threshold, meaning the agent accepts the incoming signal 
  # as of his iword class percThis$Word and phonological class incomingV
  if(percThis$Initial != "str") {
    p1temp = V.dist < simMahalThreshold
    if(debug) { cat("perc.sib.2: MH simMahalThreshold = ",simMahalThreshold,"\n") }
  } else {
    p1temp = V.dist < threshold.str
    if(debug) { cat("perc.sib.2: MH threshold.str = ",threshold.str,"\n") }
  }

  if(debug) { cat("perc.sib.2: Agent log MH distance for incoming token ",percThis$Word," = ",V.dist,"\n") }
  if(debug & p1temp) { cat("perc.sib.2: Agent accepts incoming token\n") }
  if(debug & !p1temp) { cat("perc.sib.2: Agent rejects incoming token\n") }

  # do the memorization and model update
  incominglabs = c(percThis$Vpn, percThis$V, percThis$Age, percThis$Word, percThis$Initial, percThis$Gender)  
  if(p1temp)
  # then update 
  {
        incominglabs = c(incominglabs, "y")
        if(removePriortoUpdate && numberTokens >= maxTokens)
        {
            # discard the word in the same category with lowest memory$Age index
            ageTemp = agent$memory$Age
            ageTemp[agent$memory$Word != percThis$Word] <- NA  # just look for incoming Word in Age
            onetoremove = which.min(ageTemp)            # index to min value just in Word group
            # remove the above from the agent's memory
            agent$memory$nrOfTimesHeard =  agent$memory$nrOfTimesHeard[-onetoremove]
            agent$memory$P3 = agent$memory$P3[-onetoremove]
            agent$memory$P2 =  agent$memory$P2[-onetoremove]
            agent$memory$P1 = agent$memory$P1[-onetoremove]
            agent$memory$Word =  agent$memory$Word[-onetoremove]
            agent$memory$V =  agent$memory$V[-onetoremove]
            agent$memory$Age = agent$memory$Age[-onetoremove]
            agent$memory$Initial = agent$memory$Initial[-onetoremove]
            agent$memory$Vpn = agent$memory$Vpn[-onetoremove]
            agent$memory$Gender = agent$memory$Gender[-onetoremove]
            if(debug) { cat("perc.sib.2: remove oldest token with index ",onetoremove,"\n") }
        }
        # now update with percThis, but set the phonological label $memory$V to the label of the same words in memory
        agent$memory$update = c(agent$memory$update, "y")
        agent$memory$nrOfTimesHeard = c(agent$memory$nrOfTimesHeard, unique(agent$memory$nrOfTimesHeard[agent$memory$Word == percThis$Word]))
        agent$memory$P3 = c(agent$memory$P3, percThis$P3)
        agent$memory$P2 = c(agent$memory$P2, percThis$P2)
        agent$memory$P1 = c(agent$memory$P1, percThis$P1)
        agent$memory$V = c(agent$memory$V, incomingV)
        agent$memory$Initial = c(agent$memory$Initial, percThis$Initial)
        agent$memory$Vpn = c(agent$memory$Vpn, agent$memory$Vpn[1])
        agent$memory$Gender = c(agent$memory$Gender, agent$memory$Gender[1])
        # assign next free time index within the word group to the new learned token
        newIndex = max(agent$memory$Age[agent$memory$Word==percThis$Word]) + 1
        agent$memory$Age <- c(agent$memory$Age, newIndex)
        # cat("agent$memory$Word==percThis$Word : ",agent$memory$Word==percThis$Word,"\npercThis$Word = ",percThis$Word," newIndex = ",newIndex,"\n")
        agent$memory$Word = c(agent$memory$Word, percThis$Word)
        # this next line stores as rows all observations ever perceived by the agent
        agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
        temp = agent$memory$Word == percThis$Word
        agent$memory$nrOfTimesHeard[temp] = agent$memory$nrOfTimesHeard[temp] + 1
        
    }
    else
    # no update
    {
      agent$memory$update = c(agent$memory$update, "n")
      incominglabs = c(incominglabs, "n")
      agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
    }
    # check for splits and mergers
    numReceivedTokens = length(agent$memory$update) 
    if(splitAndMerge == T & numReceivedTokens%%splitAndMergeInterval == 0) { 
      if(debug) {
         cat("perc.sib.2: performing split and merge on agent number ",agent$agentNr,"\n")
         cat("numReceivedTokens = ", numReceivedTokens,"\n")
         print(agent)
      }
      agent = splitandmerge(agent) 
      if(debug) {
        cat("perc.sib.2: updated agent$memory$V : \n",agent$memory$V,"\n")
      }
    }
    agent
}


# the original perception of Harringtons u-fronting experiment Language 2017
perc <- function(agent, percThis, threshold = 1/3, removePriortoUpdate=T)

# threshold: the posterior probability that percThis$V could be a member of agent$memory$V is computed. 
# If it is larger than threshold, then percThis$V is added to memory
#
# removePriortoUpdate: if percThis is added, then an observation from the agent's memory is removed following two criteria. (i) the removed word is of the same type e.g. if percThis$W is 'queued',then all the data corresponding to one of the agent's 'queued' words is removed. (ii) the one removed has the biggest Mahalanobis distance to the centroid of the corresponding vowel. Thus for this example, the Mahalanobis distances are calculated of all the agent's 'queued' words to the agent's /ju:/-centroid (this does not include percThis). Then the 'queued' word which has the biggests mahal-distance is removed.

{
    # this is the incoming signal
    incomingdct = cbind(percThis$k0, percThis$k1, percThis$k2)
    # some lab. details on the incoming labels
    incominglabs = c(percThis$Vpn, percThis$V, percThis$Age, percThis$Word)
    # dat and labs are all of the agent's DCT coefficients and labels
    dat = cbind(agent$memory$k0, agent$memory$k1, agent$memory$k2)
    labs = agent$memory$V
    # calculate means and covariance matrices assuming equal priors (in this case 1/3) between all categories
    qdat = qda(dat, labs, prior = rep(1/length(unique(labs)), length(unique(labs))))
    # calculate the posterior probability of the incoming signal
    pincoming = predict(qdat, incomingdct)$posterior
    # find out whether the posterior-probability exceeds 'threshold'
    p1temp = pincoming[colnames(pincoming) == percThis$V] > threshold

    # FLO discussion: trying to interpret what is happening here:
    # The qda estimates 3-dim means and covariances for each V; class priors are set to
    # be equal (even if the number of samples per class would differ!); then the 
    # posterior probabilities P(incomingdct|V) are calculated (since no method is given 
    # to the qda() I assume that these are simply the Gaussian probabilities given the means
    # and covariances); then only the posterior of the incoming class is compared to 
    # a fixed threshold 1/3; if larger, we assume that the incoming token is to be accepted.
    # A posterior of more than 1/3 does not mean that the qda model would predict a correct 
    # classification: lets assume C1 is the incoming class and C2 and C3 the others, the 
    # qda model predict a posterior P(incomingdct|C1) = 0.35 > 0.33, but the other posteriors
    # are P(incomingdct|C2) = 0.05 and P(incomingdct|C3) = 0.60. Thus, the agent would 'recognize'
    # C3 instead of C1! 
    # (This problem does not occur ifi the number of classes is 2!)
    # Following these argumentation I replaced the qda() method by a real Bayessian classifier
    # in perc.sibilant.2, but this caused the phonological distributions to explode 
    # (in the s-retraction experiment). The reason is probably that using a classification rather 
    # than a local posterior probability evaluation causes *more* tokens to be accepted, for 
    # instance tokens that are somewhere in the middle between three well separated Gaussians,
    # and these tokens mess up the distributions.    
    # It is interesting that using a fixed threshold for the posterior is very similar to the 
    # fixed Mahalanobis threshold as used in perc.sibilant.2. I can't figure out a way to
    # relate P(dct|class) = 1/3 and log MH = 2.5 at the moment, but I suspect that they are 
    # basically the same constraint. 

       
    if(p1temp)
    # then update if the threshold condition is met
    {
        incominglabs = c(incominglabs, "y")
        if(removePriortoUpdate)
        {
            # discard an existing word in the same category with highest Mahalanobis distance to the centroid of the corresponding vowel category
            # calculate centroid and covariance matrix for the agent's vowels corresponding to percThis$V (training phase)
            temp.mahal = agent$memory$V == percThis$V
            tdat.mahal = train(cbind(agent$memory$k0[temp.mahal],    agent$memory$k1[temp.mahal], agent$memory$k2[temp.mahal]))
            # establish which of the agent's words (e.g. 'queued') correspond to the incoming word
            temp.word.mahal = agent$memory$W == percThis$W
            # next two lines: we need a numerical index to identify relative to agent$memory which observation to remove
            nums.mahal = 1:length(agent$memory$W)
            nums.mahal = nums.mahal[temp.word.mahal]
            # calculate the Mahalanobis distances from e.g. all the agent's 'queued' words to the agent's /ju:/-centroid' (testing phase)
            dist.mahal = distance(cbind(agent$memory$k0[temp.word.mahal],agent$memor$k1[temp.word.mahal], agent$memory$k2[temp.word.mahal]), tdat.mahal, metric = "mahal")
            # this is a numerical index relative to agent$memory of the observation that is to be removed depending on which Mahalanobis distance is greatest
            onetoremove = nums.mahal[which.max(dist.mahal)]
            
            # remove the above from the agent's memory
            agent$memory$nrOfTimesHeard =  agent$memory$nrOfTimesHeard[-onetoremove]
            agent$memory$k2 = agent$memory$k2[-onetoremove]
            agent$memory$k1 =  agent$memory$k1[-onetoremove]
            agent$memory$k0 = agent$memory$k0[-onetoremove]
            agent$memory$Word =  agent$memory$Word[-onetoremove]
            agent$memory$V =  agent$memory$V[-onetoremove]
            agent$memory$Age = agent$memory$Age[-onetoremove]
        }
        # now update with percThis
        agent$memory$update = c(agent$memory$update, "y")
        agent$memory$nrOfTimesHeard = c(agent$memory$nrOfTimesHeard, unique(agent$memory$nrOfTimesHeard[agent$memory$Word == percThis$Word]))
        agent$memory$k2 = c(agent$memory$k2, percThis$k2)
        agent$memory$k1 = c(agent$memory$k1, percThis$k1)
        agent$memory$k0 = c(agent$memory$k0, percThis$k0)
        agent$memory$Word = c(agent$memory$Word, percThis$Word)
        agent$memory$V = c(agent$memory$V, percThis$V)
        agent$memory$Age = c(agent$memory$Age, percThis$Age)
        # this next line stores as rows all observations ever perceived by the agent
        agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
        temp = agent$memory$Word == percThis$Word
        agent$memory$nrOfTimesHeard[temp] = agent$memory$nrOfTimesHeard[temp] + 1
    }
    else
    {
        agent$memory$update = c(agent$memory$update, "n")
        incominglabs = c(incominglabs, "n")
        agent$memory$Incoming = rbind(agent$memory$Incoming, incominglabs)
    }
    agent
}


# function to convert myPop into data-frame
###ADAPT###
myPoptodf <-
function(Population, Cond = "x")
{
    # Population is a list
    # changed 20170220 to cover variable dimensions in agent$memory$P
#    res = NULL
#    n  = length(Population)
#    for(j in 1:n){
#        res$params = rbind(res$params, Population[[j]]$memory$P)
#        res$W = c(res$W, Population[[j]]$memory$Word)
#        res$V = c(res$V, Population[[j]]$memory$V)
#        res$Age = c(res$Age, Population[[j]]$memory$Age)
#        res$Vpn = c(res$Vpn, Population[[j]]$memory$Vpn)
#        res$Gender = c(res$Gender, substr(Population[[j]]$memory$Vpn,1,1))
#        res$Initial = c(res$Initial, Population[[j]]$memory$Initial)
#    }
#    df = data.frame(P = res$params, W = factor(res$W), V = factor(res$V), Age = factor(res$Age), Vpn = factor(res$Vpn), Gender = factor(res$Gender), Initial = factor(res$Initial))
#    cond = rep(Cond, nrow(df))
#    df = data.frame(df, Cond = factor(cond))
    # changed Flo 20170303
    n  = length(Population)
    params = NULL
    W = NULL
    V = NULL
    Age = NULL
    Vpn = NULL
    Gender = NULL
    Initial = NULL
    for(j in 1:n){
        params = rbind(params, Population[[j]]$memory$P)
        W = c(W, Population[[j]]$memory$Word)
        V = c(V, Population[[j]]$memory$V)
        Age = c(Age, Population[[j]]$memory$Age)
        Vpn = c(Vpn, Population[[j]]$memory$Vpn)
        Gender = c(Gender, Population[[j]]$memory$Gender)
        Initial = c(Initial, Population[[j]]$memory$Initial)
    }
    cond = rep(Cond, nrow(params))
    df = data.frame(P = params, W = factor(W), V = factor(V), Age = Age, Vpn = factor(Vpn), Gender = factor(Gender), Initial = factor(Initial), Cond = factor(cond))
    names(df) = c(paste("P", 1:ncol(params), sep=""), "W", "V", "Age", "Vpn", "Gender", "Initial", "Cond")
    df
}



myPoptodf.obsolete.20170220 = function(Population, Cond = "x")
{
    # Population is a list
    res = NULL
    n  = length(Population)
    for(j in 1:n){
        res$k0 = c(res$k0, Population[[j]]$memory$k0)
        res$k1 = c(res$k1, Population[[j]]$memory$k1)
        res$k2 = c(res$k2, Population[[j]]$memory$k2)
        res$W = c(res$W, Population[[j]]$memory$Word)
        res$V = c(res$V, Population[[j]]$memory$V)
        res$Age = c(res$Age, Population[[j]]$memory$Age)
        res$Vpn = c(res$Vpn, Population[[j]]$memory$Vpn)
        res$Gender = c(res$Gender, substr(Population[[j]]$memory$Vpn,1,1))
        res$Initial = c(res$Initial, Population[[j]]$memory$Initial)
    }
    df = data.frame(res$k0, res$k1, res$k2, W = factor(res$W), V = factor(res$V), Age = factor(res$Age), Vpn = factor(res$Vpn), Gender = factor(res$Gender), Initial = factor(res$Initial))
    cond = rep(Cond, nrow(df))
    df = data.frame(df, Cond = factor(cond))
    names(df) = c("k0", "k1", "k2", "W", "V", "Age", "Vpn", "Gender", "Initial", "Cond")
    df
}

###ADAPT###
build_dct_df.sibilant <-
function(df)
{
    # the purpose of this function is to (a) carry out inverse DCT (b) create a trackdata object and (c) build a corresponding data-frame from the trackdata object
    # df has components:  P1,P2,P3,W,V,Age,Vpn,Initial,Cond
    # (a)
    coeffs = with(df, cbind(P1, P2, P3))
    print("carrying out invdct")
    # NB I've hard coded 21 here because these are the number of points after carrying out linear time normalisation by default - see args(tracklinear)
    coeffs.dat = invdct(coeffs, rep(21, nrow(coeffs)))
    print("done")
    # (b) Build a trackdata object from the above
    # total number of observations
    N = nrow(coeffs)
    
    # build $ftime: between 0 and 1 - see comment 2. above
    coeffs.tz = cbind(rep(0, N), rep(1, N))
    
    # build $index
    coeffs.right = rep(21, N) * (1:N)
    coeffs.left = coeffs.right - 21 + 1
    coeffs.inds = cbind(coeffs.left, coeffs.right)
    colnames(coeffs.inds) = c("left", "right")
    
    # build track times (each track has 21 equally spaced time points between 0 and 1)
    coeffs.tracktimes = rep(seq(0, 1, length=21), N)
    # each signal has 21 equally spaced times between 0 and 1
    names(coeffs.dat) = coeffs.tracktimes
    
    # build the track data
    coeffs.trk = as.trackdata(coeffs.dat,coeffs.inds, coeffs.tz)
    # give the track a name
    colnames(coeffs.trk$data) = "F2Pop"
    
    # (c) data-frame corponding to trackdata object
    coeffs.trk.df = tracktodf(coeffs.trk)
    
    # the corresponding labels: W  V Age AgeGroup Vpn Cond
    coeffs.W = rep(as.character(df$W), table(coeffs.trk.df$segno))
    coeffs.V = rep(as.character(df$V), table(coeffs.trk.df$segno))
    coeffs.Vpn = rep(as.character(df$Vpn), table(coeffs.trk.df$segno))
    coeffs.Cond = rep(as.character(df$Cond), table(coeffs.trk.df$segno))
    coeffs.Initial = rep(as.character(df$Initial), table(coeffs.trk.df$segno))
    coeffs.Age = rep(as.character(df$Age), table(coeffs.trk.df$segno))
    
    # make the data-frame
    data.frame(coeffs.trk.df, W = factor(coeffs.W), V = factor(coeffs.V), Vpn = factor(coeffs.Vpn), Cond = factor(coeffs.Cond), Initial = factor(coeffs.Initial), Age = factor(coeffs.Age))
}

build_dct_df.sibilant.obsolete.20170220 <-
function(df)
{
    # the purpose of this function is to (a) carry out inverse DCT (b) create a trackdata object and (c) build a corresponding data-frame from the trackdata object
    # df has components:  k0, k1,k2,W, V, Age,Vpn,Initial,Cond
    # (a)
    coeffs = with(df, cbind(k0, k1, k2))
    print("carrying out invdct")
    # NB I've hard coded 21 here because these are the number of points after carrying out linear time normalisation by default - see args(tracklinear)
    coeffs.dat = invdct(coeffs, rep(21, nrow(coeffs)))
    print("done")
    # (b) Build a trackdata object from the above
    # total number of observations
    N = nrow(coeffs)
    
    # build $ftime: between 0 and 1 - see comment 2. above
    coeffs.tz = cbind(rep(0, N), rep(1, N))
    
    # build $index
    coeffs.right = rep(21, N) * (1:N)
    coeffs.left = coeffs.right - 21 + 1
    coeffs.inds = cbind(coeffs.left, coeffs.right)
    colnames(coeffs.inds) = c("left", "right")
    
    # build track times (each track has 21 equally spaced time points between 0 and 1)
    coeffs.tracktimes = rep(seq(0, 1, length=21), N)
    # each signal has 21 equally spaced times between 0 and 1
    names(coeffs.dat) = coeffs.tracktimes
    
    # build the track data
    coeffs.trk = as.trackdata(coeffs.dat,coeffs.inds, coeffs.tz)
    # give the track a name
    colnames(coeffs.trk$data) = "F2Pop"
    
    # (c) data-frame corponding to trackdata object
    coeffs.trk.df = tracktodf(coeffs.trk)
    
    # the corresponding labels: W  V Age AgeGroup Vpn Cond
    coeffs.W = rep(as.character(df$W), table(coeffs.trk.df$segno))
    coeffs.V = rep(as.character(df$V), table(coeffs.trk.df$segno))
    coeffs.Vpn = rep(as.character(df$Vpn), table(coeffs.trk.df$segno))
    coeffs.Cond = rep(as.character(df$Cond), table(coeffs.trk.df$segno))
    coeffs.Initial = rep(as.character(df$Initial), table(coeffs.trk.df$segno))
    coeffs.Age = rep(as.character(df$Age), table(coeffs.trk.df$segno))
    
    # make the data-frame
    data.frame(coeffs.trk.df, W = factor(coeffs.W), V = factor(coeffs.V), Vpn = factor(coeffs.Vpn), Cond = factor(coeffs.Cond), Initial = factor(coeffs.Initial), Age = factor(coeffs.Age))
}

###ADAPT###
get.cgroups <-
function(o1)
{
  # this function reads a dataframe containing a population set in sibilant2 fashion and 
  # produces a two-colum table with Vpn ID in the first and a string representing a phonological
  # cluster (cgroup) of that Vpn by concatenating the word initial of the words that share this cluster,
  # e.g.   'F01   s+str'  means: 'Speaker F01 has a phonological cluster that is shared by at least two words
  # with initials 's' and 'str' (e.g. 'seen', 'sane' and 'stream'). 
  # Note that for each derived label in memory a cgroup is printed
  # i.e. if for instance the string 's+str' appears two times for 
  # for an agent, then this means that he has two different derived clusters, that *both* are 
  # used in minimum two words (so there must be at least 4 words with 's' or 'str' in the memory);
  #
  ctable=table(o1$Initial,o1$V,o1$Vpn)
  cgroups = NULL
  # go over all speaker sub-tables
  for(vpn in 1:length(unique(o1$Vpn))) {
    # logical table with TRUE at data points of interest (mapping Initial - V)
    clogtab=ctable[,,vpn]!=0
    # go over columns, skip all empty columns, build class strings from non-empty rows
    cgroup = NULL
    for(cidx in 1:ncol(clogtab)) {
      # logical that points to rows with values != 0; if sum(clogrows) is 0, all rows in this column are 0
      clogrows = clogtab[, cidx]!=0
      if(sum(clogrows != 0)) {
        cnames = names(clogrows)
        # cgroup = paste(cnames[clogrows], collapse = "+")
        cgroup = paste(unique(cnames[clogrows]), collapse = "+")
        cgroups = rbind(cgroups,data.frame(Vpn=levels(o1$Vpn)[vpn],cGroup=cgroup,stringsAsFactors=F)) 
      }
    }
  }
  cgroups
}
###ADAPT###
calc.clusterStat <-
function(o1)
{
    # gives one line dataframe with the number of agents per equivalence cluster
    # global phonClusterLog.names must contain the names of the possible equivalence
    # labels in sorted order

    if(printMessages) cat("Equivalence phone clusters in agents at this point:\n\n")
    cgroups = get.cgroups(o1)
    # make cgroups in conform string orders and remove double clusters, since in the statistics
    # we don't want to distinguish between agent configurations that only differ in the number
    # of a equivalence cluster type, e.g. an agent
    # F01 : s+str s+str S S should be counted as 'agent F01 has s+str and S'
    cgroups = unique(cgroups[ order(cgroups$Vpn,cgroups$cGroup),])
    if(printMessages) print(cgroups)
    cgroups$cGroup=factor(cgroups$cGroup,levels=phonClusterLog.names)
    # create a compressed statistics with the number of agents that contain at least one of the 
    # seven possible equivalence clusters:
    # phonClusterLog.names = c("S","S+s","S+s+str","S+str","s","s+str","str") with '+' = merge
    # create a line in a fixed order of clusterconfigurations (phonClusterLog.names) with occurance counters
    # (the reason I do this manually is that we often do not see all configurations in cgroups)
    cLine = data.frame(ABM=abmRun,simGroup=sG,rbind(table(cgroups$cGroup)))
    colnames(cLine) = c("ABM","simGroup",phonClusterLog.names)
    cLine
}

get.ratioOfRejections <-
function(agent)
{
  numTokens = 0
  numRejections = 0
  for(i in 1:length(agent)) {
    numTokens = numTokens + length(agent[[i]]$memory$update)
    numRejections = numRejections + sum(agent[[i]]$memory$update=="n")
  }
  numRejections/numTokens
}

# the following overwrites the standard emuR::train() function to build a statistical model
# we need this hack for the variable dimensionality thing introduced by jmh 20170220
 
train.jmh <-
function (x, lab = rep("x", nrow(x))) 
{
  # JMH added next two lines 7.2.17
  if(is.vector(x))
    x = as.matrix(x)
    mat <- NULL
    # JMH added 7.2.17
    if (ncol(x) > 1) {
        summeanvals <- NULL
        sumcovvals <- NULL
        sumcovvals.inv <- NULL
        for (j in unique(lab)) {
            temp <- lab == j
            if (sum(temp) == 1) {
                stop("\n\tData passed to train has only one entry for one of the labels.\n\tA gaussian model can't be generated for this data.")
            }
            values <- x[temp, ]
            meanvals <- apply(values, 2, mean)
            covvals <- var(values, values)
            covvals.inv <- solve(covvals)
            summeanvals <- rbind(summeanvals, meanvals)
            sumcovvals <- rbind(sumcovvals, covvals)
            sumcovvals.inv <- rbind(sumcovvals.inv, covvals.inv)
        }
        mat$label <- unique(lab)
        mat$means <- summeanvals
        mat$cov <- sumcovvals
        mat$invcov <- sumcovvals.inv
    }
    else {
        mat <- NULL
        mat$means <- NULL
        mat$cov <- NULL
        for (j in unique(lab)) {
            #cat("data for ", j, " \n")
            temp <- lab == j
            mat$means <- c(mat$means, mean(x[temp]))
            mat$cov <- c(mat$cov, sqrt(var(x[temp])))
        }
        mat$label <- unique(lab)
        mat$invcov <- 1/mat$cov
    }
    mat
}

