#####
##
## Script that provides some helper functions I need in more than 
## one script (like means and scaling with extra checking)
##
####


###
#
# STATIC VARIABLES
#
EPS <- .Machine$double.eps


#old Values from deutsch heute: MID_POINT_LAT_DH <- 50.02326
#old values from deutsch heute: MID_POINT_LON_DH <- 10.41336

#only Germany: MID_POINT_LAT_DH <- 51.10719 #calculated with the germany map from the "maps" package and the gCentroid function from the rgeos package
#only Germany: MID_POINT_LON_DH <- 10.38547 #calculated with the germany map from the "maps" package and the gCentroid function from the rgeos package

MID_POINT_LAT_DH <- 50.01903 #if calculated the mean with the speakers (minimal possible error)
MID_POINT_LON_DH <- 10.41484 #if calculated the mean with the speakers (minimal possible error)

#MID_POINT_LON_DH <- 10.42673 #for only the sites (so it doesnt matter how many speakers per site)
#MID_POINT_LAT_DH <- 50.03497 #for only the sites (so it doesnt matter how many speakers per site)

BLUE = colorspace::sRGB(0.230, 0.299, 0.754) # sRGB here and RGB in the diverging.colormap function is needed for the nice colors
RED = colorspace::sRGB(0.706, 0.016, 0.150) # sRGB here and RGB in the diverging.colormap function is needed for the nice colors
GREY = colorspace::RGB(0.665, 0.665, 0.665) # gray value almost like in the pulication of Kenneth Moreland

####################################################

findColumnNamesWithNA <- function(dataframe){
  colnames(dataframe)[colSums(is.na(dataframe)) > 0]
}

###
# Function that calculates the eman of a vector. It makes some checks,
# e.g. if the vector is numeric, if so it calculates the mean, if not it checks 
# if all entries are the same and if so returns the first entry.
###
mymean <- function(x,na.rm=FALSE) {
  #print(paste("X is:", paste(x, collapse=", ")))
  if(is.numeric(x)){
    return(base::mean(x,na.rm=na.rm))
  } else{
    if(!(all(!(is.na(x))))){
      #print("B")
      Sys.sleep(5)
      stop(paste("[MYMEAN] Column contains NA values in non-numeric column (entry ", paste(x,collapse=","),"). Do not know what to do. Aborting!",sep=""))
    } else{
      if(all(x == x[1])){
        return (x[1]); # return first occurence if non-numeric
      } else{
        stop(paste("[MYMEAN] Not all values of non-numeric column are the same (", paste(x,collapse=",")," in this case). Do not know what to do. Aborting!",sep=""))
      }
    }
  }
}

## 
#
# Function that standardizes (scales and cernteres) a vector. It makes some checks,
# e.g. if the vector is numeric, if so it checks if all entries are the same and if so
# returns the first entry.
# 
myscale <- function(x,center=T,scale=T) {
  if(is.numeric(x)){
    if(sum(abs(x))<=EPS){ # if the sum is smalle than the smallest, safely storable float (precision wise), assume all values are zero
      return(x) #do nothing
    } else{ #as it seems that not all values are zero => do scaling
      return(base::scale(x,center=center,scale=scale))
    }
  } else{
      return (x) # return everything if non-numeric
  }
}

###
# normal min max normalization
# param x : data
# param a : lower target value
# param b : upper target value
###
normalize <- function(x,a,b) { 
  if(is.numeric(x)){ # if sum(abs(x)) is zero, just return zero 
    xMax = max(x)
    xMin = min(x)
    
    if(is.na(xMin) || is.na(xMax) || (xMin == 0 && xMax == 0)){
      return(x) #return x (otherwise dividing by zero, or doing something with na)
    }
    
    x = a+(((x - xMin)*(b-a)) / (xMax-xMin))
    #do it on a complete data frame (very fast, what people write on StackOverflow)
    #x <- sweep(x, 2, apply(x, 2, min))
    #x <- sweep(x, 2, apply(x, 2, max), "/") 
    #2*x - 1
  } else{
    #do nothing
  }
  return(x)
}

normalizeQuartile <- function(x,a,b) { 
  if(is.numeric(x)){ # if sum(abs(x)) is zero, just return zero 
    
    #using 5% and 95% quartile instead of minimal and maximal values (outliers)
    quantiles = quantile(x,probs=seq(0,1,0.05))
    xMin = quantiles[2]
    xMax = quantiles[length(quantiles)-1]
    
    #clipping the values
    x[x<xMin]<-xMin
    x[x>xMax]<-xMax
    
    if(is.na(xMin) || is.na(xMax) || (xMin == 0 && xMax == 0)){
      return(x) #return x (otherwise dividing by zero, or doing something with na)
    }
    
    x = a+(((x - xMin)*(b-a)) / (xMax-xMin))
    #do it on a complete data frame (very fast, what people write on StackOverflow)
    #x <- sweep(x, 2, apply(x, 2, min))
    #x <- sweep(x, 2, apply(x, 2, max), "/") 
    #2*x - 1
  } else{
    #do nothing
  }
  return(x)
}

##
## Creating a color palette ranging from RGB1 to RGB2 with the possibility to specify a MIDPOINT
## a good idea is:
## BLUE = colorspace::sRGB(0.230, 0.299, 0.754) # sRGB here and RGB in the diverging.colormap function is needed for the nice colors
## RED = colorspace::sRGB(0.706, 0.016, 0.150) # sRGB here and RGB in the diverging.colormap function is needed for the nice colors
## GREY = colorspace::RGB(0.665, 0.665, 0.665) # gray value almost like in the pulication of Kenneth Moreland
##
## param nColorVals : how many steps between rgb1 and rgb2 should exist (e.g. 500)
## param rgb1 : start color
## param midPoint : color that should be in between the two
## param rgb2 : end color
##
getColorPalette <- function(nColorVals,rgb1=BLUE,midPoint=GREY,rgb2=RED){
  source("~/work/diss/code/deutschHeuteEvaluation/divergingColor-kisler.r")
  #nice, but doesn't work on white map: my_palette <- colorRampPalette(c("dark red", "white", "dark blue"))(n = 500)
  #my_palette <- colorRampPalette(c("red", "blue"))(n = 500)
  #my_palette <- colorRampPalette(c("yellow", "blue"))(n = 500)
  #best: my_palette <- colorRampPalette(c("red", "grey", "blue"),space = "Lab")(n = 500)
  sequenceVals = seq(from=0,to=1,length.out = nColorVals)
  
  colorMapRGB <-diverging.colormap(sequenceVals,rgb1=rgb1,rgb2=rgb2, midPoint=midPoint, outColorspace = "RGB")
  
  #clipping values that are bigger than 1 (to 1)
  colorMapRGB[colorMapRGB > 1.0] <- 1.0
  
  my_paletteMSH <- colorspace::hex(colorspace::RGB(R=colorMapRGB[,1],G=colorMapRGB[,2],B=colorMapRGB[,3]))
  return(my_paletteMSH)
}


###
# Function that converts from arff file format to the sparse 
# matrix data representation used by libSVM. It uses the methods
# from e1071, methods, SparseM, data.table and foreign. They will
# be loaded.
# It can handle a column called "sex" to specify gender, which will be transformed
# to numerical
##
arffToLibSVMFormatDH <- function(dt,targetColumn,fileOut){
  
  
  #DEBUGGING targetColumn = "y"
  #DEBUGGING dframe = data.frame(x1=c(1,2,3,4,5),x3=c(4,5,6,7,8),sex=as.factor(c("w","m","w","m","w")), y=c(6,2,5,3,6))
  #dt=data.table(dframe,keep.rownames = F)
  
  ##########################
  ####correct sex to numeric, if it exists
  if("sex" %in% colnames(dt)){
    print("  Converting the column 'sex' from m/w to numerical 1/2 (so m==1,w==2)")
    dt$sexNew = rep(0,dim(dt)[1])
    #replace all 'w' through 1 and 'm' through 2
    dt[like(sex,"m"),sexNew:=1]
    dt[like(sex,"w"),sexNew:=2]
    
    dt$sex = dt$sexNew
    
    dt[,c("sexNew"):=NULL]
    #### correct sex to numeric done
    ################################
  }

  ############################################################
  ## much faster
  dtToWrite = copy(dt)
  data.table.fm(data=dtToWrite,fileName = fileOut,target=targetColumn)
  
  # not needed anymore
  #
  #library(foreign)
  #library(e1071)
  #library(methods) #to get the as.matrix.csr
  #library(SparseM) #to get the as.matrix.csr
  #library(data.table)
  ###
  #print("done faster faster")
  #print(date())
  #for performance test get the complete data table:
  # a bit faster
  #dfCompleteM = as.matrix(dt)
  #write.libsvm(dfCompleteM,paste(fileOut,"-faster.dat",sep=""),class = length(colnames))
  #print("Done faster")
  #print(date())
  ############################################################
  ### old/slow?
  #Only the target Column
  #onlyTargetVector =  t(dt[,targetColumn,with=FALSE])
  # transform to sparse matrix
  #smNoTargetCSR = as.matrix.csr(as.matrix(allExceptTarget))
  #write.matrix.csr(x=smNoTargetCSR,file=fileOut,y=onlyTargetVector)
  #print(date())
}

##
# Function that writes svm data, is about 1/3 faster than the standard implemenation
##
write.libsvm <- function(data, filename = "out.dat", class = 1) {
  out = file(filename) 
  writeLines(apply(data, 1, function(X){
    paste(X[class],apply(cbind(which(X != 0)[-class], X[which(X != 0)[-class]]), 1, paste, 
                     collapse = ":"), collapse = " "
    ) }), out) 
  close(out)
}

#
# writes the fm format, which is supposed to be the same as libsvm, this function is 3 times as fast
# as the original code from the e1071 package and twice as fast as the code above. Not very readable though,
# but okay.
#
data.table.fm <- function (data = X, fileName = "../out.fm", target = "y_train", train = TRUE) {
  if (train) {
    print("    (if you want to process logical values, please recheck the code)")
    #for now we have no logical values
    #if (is.logical(data[[target]]) | sum(levels(factor(data[[target]])) == 
    #                                     levels(factor(c(0, 1)))) == 2) {
    #  data[[target]][data[[target]] == TRUE] = 1
    #  data[[target]][data[[target]] == FALSE] = -1
    #}
  }
  specChar = "\\(|\\)|\\||\\:"
  specCharSpace = "\\(|\\)|\\||\\:| "
  parsingNames <- function(x) {
    ret = c()
    for (el in x) ret = append(ret, gsub(specCharSpace, "_", 
                                         el))
    ret
  }
  parsingVar <- function(x, keepSpace, hard_parse) {
    if (!keepSpace) 
      spch = specCharSpace
    else spch = specChar
    if (hard_parse) 
      gsub("(^_( *|_*)+)|(^_$)|(( *|_*)+_$)|( +_+ +)", 
           " ", gsub(specChar, "_", gsub("(^ +)|( +$)", 
                                         "", x)))
    else gsub(spch, "_", x)
  }
  setnames(data, names(data), parsingNames(names(data)))
  target = parsingNames(target)
  format_vw <- function(column, formater) {
    ifelse(as.logical(column), sprintf(formater, j, column), 
           "")
  }
  all_vars = names(data)[!names(data) %in% target]
  #cat("Reordering data.table if class isn't first\n")
  target_inx = which(names(data) %in% target)
  rest_inx = which(!names(data) %in% target)
  #cat("Adding Variable names to data.table\n")
  for (j in rest_inx) {
    column = data[[j]]

    formater = "%s:%.8f"  
    set(data, i = NULL, j = j, value = format_vw(column, formater))
    #cat(sprintf("Fixing %s\n", j))
  }
  data = data[, c(target_inx, rest_inx), with = FALSE]
  drop_extra_space <- function(x) {
    gsub(" {1,}", " ", x)
  }
  #cat("Pasting data - Removing extra spaces\n")
  data = apply(data, 1, function(x) drop_extra_space(paste(x, 
                                                           collapse = " ")))
  #cat("Writing to disk\n")
  write.table(data, file = fileName, sep = " ", row.names = FALSE, 
              col.names = FALSE, quote = FALSE)
}

## from http://stackoverflow.com/questions/5173692/how-to-return-number-of-decimal-places-in-r
num.decimals <- function(x) {
  stopifnot(class(x)=="numeric") # can be used in writing libsvm format, should not happen anyway
  x <- sub("0+$","",x)
  x <- sub("^.+[.]","",x)
  nchar(x)
}

#
# Wrapper function to help me stick to the latitude-first-rule
#
latLondegreeToKilometers <- function(latDegree, lonDegree){
  require(fossil)
  lonLatKM = lonLatdegreeToKilometers(lonDegree,latDegree)
  return(c(lonLatKM[2],lonLatKM[1]))
}

#
# Returns a two dimensional structure with first the longitude
# in kms and then the latitude in kms
#
lonLatdegreeToKilometers <- function(lonDegree,latDegree){
  require(fossil)
  #using the midpoint of our data instead of lat=0,lon=0 for
  #calculating the distance. Being around the equator might
  #change the distances (we are not working on a plane)
  nullPosition = data.frame(latMAE = MID_POINT_LAT_DH,lonMAE = MID_POINT_LON_DH)
  currErrorLat = latDegree
  currErrorLon = lonDegree
  
  ##check if lat or lon are outside the defined area
  #if(abs(currErrorLat)>85){
  #  #get -1 or +1 => multiply by 85
  #  currErrorLat = (currErrorLat/currErrorLat) * 85
  #}
  #if(abs(currErrorLon)>180){
  #  #get -1 or +1 => multiply by currErrorLon
  #  currErrorLon = (currErrorLon/currErrorLon) * 180
  #}
  
  inputLat =  data.frame(latMAE = MID_POINT_LAT_DH+currErrorLat, lonMAE = MID_POINT_LON_DH)
  currDFLat = rbind(inputLat,nullPosition)
  distanceLat = earth.dist(currDFLat, dist = TRUE)
  
  
  inputLon =  data.frame(latMAE = MID_POINT_LAT_DH, lonMAE = MID_POINT_LON_DH+currErrorLon)
  currDFLon = rbind(inputLon,nullPosition)
  distanceLon = earth.dist(currDFLon, dist = TRUE)
  
  return(c(distanceLon, distanceLat))
}

#
#
# Function that returns the distance between two points. 
#
#
lonLatKmDistanceBetweenTwoPoints <- function(point1Lon, point1Lat, point2Lon, point2Lat){
  require(fossil)
  ## DEBUG
  #ZIT
  #point1Lon=14.81
  #point1Lat=50.90
  #ZEL
  #point2Lon=12.80
  #point2Lat=47.32
  
  position1 = data.frame(longitude = point1Lon,latitude = point1Lat)
  position2 = data.frame(longitude = point2Lon,latitude = point2Lat)
  
  positions = rbind(position1,position2)
  distanceResult = earth.dist(positions, dist = TRUE)
  
  return(distanceResult)  
}

###
#
# Extract the basename from a path (containing no path and extension information)
#
###
basenameNoExtension <- function(filename){
    res = sub("^([^.]*).*", "\\1", basename(filename)) #first get the basename with extension with basename and then strip the extension
    return(res)
}

###
#
# Extract the filename including the path and dropping the extension(containing no path and extension information)
#
###
filenameNoExtension <- function(filename){
    res = sub("^([^.]*).*", "\\1", filename) #first get the basename with extension with basename and then strip the extension
    return(res)
}

###
#
# Delete files according to pattern from path. ATTENTION it is a partial match anywhere in the filename
#
###
deleteFilesFromFolder <- function(path, pattern){
  filesToDelete = list.files(path = path,pattern=pattern) #get all files from folder
  if(length(filesToDelete)>0){
    print(paste("I am about to delete the following files... ! Be warned. You got 10 seconds to stop that!"))
    for(currFile in filesToDelete){
      print(currFile)
    }
    maxTime = 10
    for(i in 1:maxTime){
      Sys.sleep(1) #sleep 1 second
      #print(paste("You got",maxTime-i,"seconds left..."))
      cat("You got a few seconds left. Counting down: ")
      cat(maxTime-i, " \r") 
    }
    for(currFile in filesToDelete){
      file.remove(currFile)
    }
    print("All files deleted!")
  } else{
    print(paste("No files found with pattern >",pattern,"< in PATH >",path,"< that could be deleted!",sep=""))
  }
}


##
#
# Function that plots a circle around a certain location with radius specified in kms
# (from: http://stackoverflow.com/questions/23071026/drawing-circle-on-r-map)
#
##
plotCircleMap <- function(LatDec, LonDec, Km, lty=2, col="blue") {#Corrected function
  #call the plotEllipse function with the same radius in x- and y-direction
  plotEllipseMap(LatDec, LonDec, Km, Km, lty, col)
}

##
#
# function that plots a circle around a certain location 
# It takes a latitude and a longitude and two 
# radii (in degree) and prints it to the plot
# (from: http://stackoverflow.com/questions/23071026/drawing-circle-on-r-map)
#
# LatDec = latitude in decimal degrees of the center of the circle
# LonDec = longitude in decimal degrees
# a = radius in latitude direction
# b = radius in longitude direction
# lty = line type
# col = color of line
##
plotEllipseMap <- function(LonDec, LatDec, aLon, bLat, lty=2, col="blue") {#Corrected function
  require(mapdata)#For the worldHires database
  require(mapproj)#For the mapproject function
  
  ER <- 6371 #Mean Earth radius in kilometers. Change this to 3959 and you will have your function working in miles.
  
  AngDeg <- seq(1:360) #angles in degrees 
  AngRad <- AngDeg*(pi/180)#angles in radians
  
  Lat1Rad <- LatDec*(pi/180) #Latitude of the center of the circle in radians
  Lon1Rad <- LonDec*(pi/180) #Longitude of the center of the circle in radians
  
  Lat2Rad <-asin(sin(Lat1Rad)*cos(aLon/ER)+cos(Lat1Rad)*sin(aLon/ER)*cos(AngRad)) #Latitude of each point of the circle rearding to angle in radians
  Lon2Rad <- Lon1Rad+atan2(sin(AngRad)*sin(bLat/ER)*cos(Lat1Rad),cos(bLat/ER)-sin(Lat1Rad)*sin(Lat2Rad))#Longitude of each point of the circle rearding to angle in radians
  
  #transformation back to degree
  Lat2Deg <- Lat2Rad*(180/pi)#Latitude of each point of the circle rearding to angle in degrees (conversion of radians to degrees deg = rad*(180/pi) )
  Lon2Deg <- Lon2Rad*(180/pi)#Longitude of each point of the circle rearding to angle in degrees (conversion of radians to degrees deg = rad*(180/pi) )
  
  polygon(x=Lon2Deg,y=Lat2Deg,lty=lty, border=col,lwd=3)
}

#
# function that shows all calling functions
#
traceCallers <- function(){
  allCallers = sys.calls()
  index = length(allCallers)-1
  for (caller in allCallers) {
    print(paste(index, caller,sep=". "))
    index = index-1
  }
}

#setting some reference points on the map (from http://www.gpskoordinaten.de)
MAP_REFERENCE_POINTS = data.frame(name="Berlin",latitude=52.520007,longitude=13.40)
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Hamburg",latitude=53.57, longitude=10.04))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="München", latitude=48.14, longitude=11.57))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Köln", latitude=50.94, longitude= 6.960))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Frankfurt", latitude=50.11, longitude=8.68))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Stuttgart", latitude=48.78, longitude=9.18))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Bremen", latitude=53.08, longitude=8.81))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Leipzig", latitude=51.33, longitude=12.38))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Kassel", latitude=51.32, longitude=9.5))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Wien", latitude=48.21, longitude=16.37))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Graz", latitude=47.07, longitude=15.44))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Linz", latitude=48.30, longitude=14.29))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Innsbruck", latitude=47.27, longitude=11.39))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Zürich", latitude= 47.38, longitude= 8.54))
MAP_REFERENCE_POINTS = rbind(MAP_REFERENCE_POINTS, data.frame(name="Bern", latitude= 46.9479, longitude= 7.444))

#english version
MAP_REFERENCE_POINTS_ENG = MAP_REFERENCE_POINTS
MAP_REFERENCE_POINTS_ENG$name = gsub("München", "Munich", MAP_REFERENCE_POINTS_ENG$name)
MAP_REFERENCE_POINTS_ENG$name = gsub("Köln", "Cologne", MAP_REFERENCE_POINTS_ENG$name)
MAP_REFERENCE_POINTS_ENG$name = gsub("Wien", "Vienna", MAP_REFERENCE_POINTS_ENG$name)
MAP_REFERENCE_POINTS_ENG$name = gsub("Zürich", "Zurich", MAP_REFERENCE_POINTS_ENG$name)

#nam: "Duesseldorf","Dortmund","Essen", "Basel",
#lat: 51.227,51.513,51.455,47.567
#lon: 6.773,7.465,7.0115,7.597

##
# Function that takes a vector and returns the element, that appear the most often. In case of a tie, the one that comes first in the
# table is returned.
##
majorityVote <- function(x) {
  tabulatedOutcomes = table(x)
  #print(tabulatedOutcomes)
  sortedOutcomes = sort(tabulatedOutcomes, decreasing=TRUE)
  mostCommonLabel = names(sortedOutcomes)[1]
  #print(paste("Ret",mostCommonLabel))
  return(mostCommonLabel)
}


##
# Function that generates a German Map based on Germany, Austria and the german speaking parts
# of Switzerland. This is needed as the Deutsch Heute Corpus was recorded (at least partially)
# in those areas.
#
# Solution from: http://stackoverflow.com/questions/37924849/merging-two-and-a-half-countries-from-maps-package-to-one-map-object-in-r
##
getGermanMapFromRaster <- function(){
  require(raster)
  deu <- getData('GADM', country='DEU', level=0)
  aut <- getData('GADM', country='AUT', level=0)
  lie <- getData('GADM', country='LIE', level=0)
  swi <- getData('GADM', country='CHE', level=1)
  ita <- getData('GADM', country='ITA', level=1)
  
  ######## FIX SWITZERLAND #######
  
  #Subset the Swiss cantons there is no need for a loop for such things in R.
  #The list is from the census report of all german speaking and multilingual
  #cantones if at least 50% of the speakers are speaking German
  kantoneDeusch = c("Uri", 
                    "Appenzell Innerrhoden", 
                    "Nidwalden", 
                    "Obwalden", 
                    "Appenzell Ausserrhoden", 
                    "Schwyz", 
                    "Lucerne", 
                    "Thurgau", 
                    "Solothurn", 
                    "Sankt Gallen", 
                    "Schaffhausen", 
                    "Basel-Landschaft", 
                    "Aargau", 
                    "Glarus", 
                    "Zug", 
                    "Zürich", 
                    "Basel-Stadt",
                    "Bern",
                    "Graubünden")
  GermanSwiss <- swi[swi$NAME_1 %in% kantoneDeusch,]
  
  #Aggregate (dissolve) Swiss internal boundaries
  GermanSwiss <- aggregate(GermanSwiss)
  
  ######## END SWITZERLAND #######
  ######## FIX ITALY #######
  provinciaDeutsch = c("Trentino-Alto Adige")
  GermanItaly <- ita[ita$NAME_1 %in% provinciaDeutsch,]
  
  ######## END ITALY #######
  
  german <- bind(deu, aut, lie, GermanSwiss, GermanItaly)
  german <- aggregate(german)
  return(german)
}

getNecessaryFilesAndPathsForRBasedImagePrinting <- function(dataFileExtension, imageFileExtension){
  scriptLocation = NULL
  scriptLocation <- sys.frame(1)$ofile
  if(is.null(scriptLocation)){
    stop("Could not read current file, that means the script will not work as expected")
  }
  #DEBUG: print(scriptLocation)
  
  # estimate current path
  currDirectory = dirname(scriptLocation)

  dataFileLocation = paste(basenameNoExtension(scriptLocation), dataFileExtension, sep="")
  imageFileLocation = paste(basenameNoExtension(scriptLocation), imageFileExtension, sep="")
  
  information = data.frame(currDirectory = as.character(currDirectory),
                           dataFileLocation = as.character(dataFileLocation),
                           imageFileLocation = as.character(imageFileLocation),
                           scriptLocation = as.character(scriptLocation),
                           stringsAsFactors = F)
  return(information)
}


testFunctions <- function(){
  ######## Test 1 #########
  currTestName = "lonLatKmDistanceBetweenTwoPoints"
  #ZIT
  point1Lon=14.81
  point1Lat=50.90
  #ZEL
  point2Lon=12.80
  point2Lat=47.32
  
  correctDist = 424.189709598301
  res = lonLatKmDistanceBetweenTwoPoints(point1Lon=point1Lon, point1Lat=point1Lat,point2Lon=point2Lon,point2Lat=point2Lat)
  if(res-correctDist<EPS){ #if difference is below machine epsilon, result is correct
    print(paste(currTestName, ": Test passed, result correct", sep=""))
  } else{
    stop(paste(currTestName, ": Test failed, result IS INCORRECT. Should be: ", correctDist, " but is: ", res, " GO AND FIX THIS!!!!", sep=""))
  }
  ######## Test 1 END #######
}

######
#
# correct variable names to accord to R specification
#
correctColumnNames <- function(colName){
  retVal = colName
  
  retVal = gsub("<","",retVal)
  retVal = gsub(">","",retVal)
  
  retVal = gsub(":", "_c", retVal)
  retVal = gsub("@","at",retVal)
  
  return(retVal)
}

#
# Function that checks if a vector is contained in in another vector and returns the start
# index.
#
getStartPositionOfSubvector <- function(vectorToFind, vectorToSearch){
  patrn = vectorToFind
  exmpl = vectorToSearch
  
  m <- length(patrn) 
  n <- length(exmpl) 
  candidate <- seq.int(length=n-m+1) 
  for (i in seq.int(length=m)) { 
    candidate <- candidate[patrn[i] == exmpl[candidate + i - 1]] 
  } 
  return (candidate)
}
