From a5a098e0ef116d0045c42b155e21ffc28330710a Mon Sep 17 00:00:00 2001
From: Pavlo-Andrianatos <andpavlo@gmail.com>
Date: Sat, 11 Feb 2023 15:53:22 +0200
Subject: [PATCH] First commit, added all needed files

---
 .../analysis/cpandrianatos/DataEntry.py       |  52 ++
 .../analysis/cpandrianatos/DataReader.py      |  31 ++
 .../analysis/cpandrianatos/FunctionalNode.py  |  47 ++
 .../analysis/cpandrianatos/Genetic_Program.py | 452 ++++++++++++++++
 .../analysis/cpandrianatos/Individual.py      |  77 +++
 .../analysis/cpandrianatos/LoadParameters.py  |  43 ++
 challenge1/analysis/cpandrianatos/Main.py     |  53 ++
 challenge1/analysis/cpandrianatos/Node.py     |  29 +
 .../cpandrianatos/OutputPredictions.py        |  54 ++
 challenge1/analysis/cpandrianatos/README.md   |  72 +++
 .../analysis/cpandrianatos/Run_script.bat     |   3 +
 .../analysis/cpandrianatos/TerminalNode.py    |  34 ++
 .../analysis/cpandrianatos/environment.yml    |  20 +
 challenge1/analysis/cpandrianatos/mape.txt    |   1 +
 .../analysis/cpandrianatos/parameters.config  |  15 +
 .../predicted_energy_production.csv           | 501 ++++++++++++++++++
 16 files changed, 1484 insertions(+)
 create mode 100644 challenge1/analysis/cpandrianatos/DataEntry.py
 create mode 100644 challenge1/analysis/cpandrianatos/DataReader.py
 create mode 100644 challenge1/analysis/cpandrianatos/FunctionalNode.py
 create mode 100644 challenge1/analysis/cpandrianatos/Genetic_Program.py
 create mode 100644 challenge1/analysis/cpandrianatos/Individual.py
 create mode 100644 challenge1/analysis/cpandrianatos/LoadParameters.py
 create mode 100644 challenge1/analysis/cpandrianatos/Main.py
 create mode 100644 challenge1/analysis/cpandrianatos/Node.py
 create mode 100644 challenge1/analysis/cpandrianatos/OutputPredictions.py
 create mode 100644 challenge1/analysis/cpandrianatos/README.md
 create mode 100644 challenge1/analysis/cpandrianatos/Run_script.bat
 create mode 100644 challenge1/analysis/cpandrianatos/TerminalNode.py
 create mode 100644 challenge1/analysis/cpandrianatos/environment.yml
 create mode 100644 challenge1/analysis/cpandrianatos/mape.txt
 create mode 100644 challenge1/analysis/cpandrianatos/parameters.config
 create mode 100644 challenge1/analysis/cpandrianatos/predicted_energy_production.csv

diff --git a/challenge1/analysis/cpandrianatos/DataEntry.py b/challenge1/analysis/cpandrianatos/DataEntry.py
new file mode 100644
index 000000000..413be0f23
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/DataEntry.py
@@ -0,0 +1,52 @@
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class DataEntry:
+    ID: float
+    Label: float
+    House: float
+    Year: float
+    Month: float
+    Temperature: float
+    Daylight: float
+    EnergyProduction: float
+
+    """
+        Each data entry represents one row in the dataset
+    """
+    def __init__(self, id_num, label, house, year, month, temperature, daylight, energy_production):
+        self.ID = id_num
+        self.Label = label
+        self.House = house
+        self.Year = year
+        self.Month = month
+        self.Temperature = temperature
+        self.Daylight = daylight
+        self.EnergyProduction = energy_production
+
+    def getID(self):
+        return self.ID
+
+    def getLabel(self):
+        return self.Label
+
+    def getHouse(self):
+        return self.House
+
+    def getYear(self):
+        return self.Year
+
+    def getMonth(self):
+        return self.Month
+
+    def getTemperature(self):
+        return self.Temperature
+
+    def getDaylight(self):
+        return self.Daylight
+
+    def getEnergyProduction(self):
+        return self.EnergyProduction
diff --git a/challenge1/analysis/cpandrianatos/DataReader.py b/challenge1/analysis/cpandrianatos/DataReader.py
new file mode 100644
index 000000000..2cd8232da
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/DataReader.py
@@ -0,0 +1,31 @@
+import csv
+from DataEntry import DataEntry
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class DataReader:
+    fileName: str
+
+    """
+        The data reader will read the data in either the training set or test set, whichever the user provides
+    """
+
+    def __init__(self, fileName):
+        self.fileName = fileName
+
+    def ReadInData(self) -> list:
+        data = []
+        with open("../../data/" + self.fileName) as csv_file:
+            csv_reader = csv.reader(csv_file, delimiter=',')
+            line_count = 0
+            for row in csv_reader:
+                if line_count == 0:
+                    line_count += 1
+                else:
+                    data.append(DataEntry(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]))
+                    line_count += 1
+        return data
diff --git a/challenge1/analysis/cpandrianatos/FunctionalNode.py b/challenge1/analysis/cpandrianatos/FunctionalNode.py
new file mode 100644
index 000000000..eb0c05140
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/FunctionalNode.py
@@ -0,0 +1,47 @@
+from Node import Node
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class FunctionalNode(Node):
+    label: str
+
+    children: list
+
+    """
+        Possible operators + - * /
+        Each functional node will either have the mathematical operation plus, minus, multiply, or divide.
+        Functional nodes can either be the root or are situated above the terminal nodes.
+        Functional nodes have children (a max of 2).
+    """
+
+    def __init__(self, label: str, children: list):
+        super().__init__()
+        self.label = label
+        self.children = children
+
+    def getLabel(self) -> str:
+        return self.label
+
+    def setLabel(self, newLabel: str):
+        self.label = newLabel
+
+    def getChildren(self) -> list:
+        return self.children
+
+    def setChildren(self, newChildren: list):
+        self.children = newChildren
+
+    # Count nodes from this node downwards, this a recursive function and is
+    # usually run from the root node downwards
+    def CountNodes(self) -> int:
+        c = 1
+        if not self.children:
+            return c
+        for i in range(0, len(self.children)):
+            if self.children[i] is not None:
+                c += self.children[i].CountNodes()
+        return c
diff --git a/challenge1/analysis/cpandrianatos/Genetic_Program.py b/challenge1/analysis/cpandrianatos/Genetic_Program.py
new file mode 100644
index 000000000..edf137a5a
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/Genetic_Program.py
@@ -0,0 +1,452 @@
+import random
+import sys
+
+from TerminalNode import TerminalNode
+from Node import Node
+from FunctionalNode import FunctionalNode
+from Individual import Individual
+from DataReader import DataReader
+
+import copy
+import re
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class Genetic_Program:
+    seed: int
+    max_generations: int
+    pop_size: int
+    max_depth: int
+    crossover_chance: float
+    mutation_chance: float
+    reproduction_chance: float
+
+    ramped_half_half: float
+
+    population: list
+    newPopulation: list
+
+    matingPool: list
+
+    bestIndividual: Individual
+
+    totalAdjusted: float
+
+    def __init__(self):
+        self.seed = 0
+        self.max_generations = 1
+        self.pop_size = 1
+        self.max_depth = 3
+        self.crossover_chance = 0.6
+        self.mutation_chance = 0.3
+        self.reproduction_chance = 0.1
+        self.ramped_half_half = 0.5
+
+        self.population = []
+
+        self.newPopulation = []
+
+        # Matingpool of individuals used during Fitness Proportionate Selection
+        self.matingPool = []
+
+        # Used Fitness Proportionate Selection
+        self.totalAdjusted = 0.0
+
+        # 100000.0 is the starting fitness, the best individual is the one with the lowest fitness in a population
+        # 100000.0 was picked arbitrarily, it can be any large number, that should not be encountered when training
+        self.bestIndividual = Individual(Node(), 100000.0)
+
+    def Perform_Run(self, filename, seed, maxGen, popSize, maxDepth, cross, mutate, reproduction,
+                    ramped_half_half, random_seed):
+        self.seed = seed
+        # If set to 1 in parameters.config, a random seed will be used instead of the user-provided one
+        if random_seed:
+            self.seed = random.randrange(sys.maxsize)
+        random.seed(self.seed)
+        self.max_generations = maxGen
+        self.pop_size = popSize
+        self.max_depth = maxDepth
+        self.crossover_chance = cross
+        self.mutation_chance = mutate
+        self.reproduction_chance = reproduction
+        self.ramped_half_half = ramped_half_half
+
+        # Data is read in from the training dataset
+        dr = DataReader(filename)
+
+        data = dr.ReadInData()
+
+        # Data is shuffled to try and make each training unique and hopefully means we avoid local minimums.
+        random.shuffle(data)
+
+        # Generating the initial population of trees
+        self.InitialPopulationGeneration()
+
+        currentGeneration = 0
+
+        while currentGeneration < self.max_generations:
+            print("Generation: ", currentGeneration)
+
+            for i in range(0, len(self.population)):
+                fitness = 0.0
+                # Each individuals are run with each data entry to calculate MAPE and use it for the fitness for each individual
+                for j in range(0, len(data)):
+                    prediction = self.RunGP(self.population[i], self.population[i].getRoot(), data[j])
+
+                    fitness += abs((float(data[i].getEnergyProduction()) - prediction) / float(data[i].getEnergyProduction()))
+
+                fitness = fitness * (1 / len(data))
+                # The fitness (actually MAPE) is multiplied by 100
+                fitness = fitness * 100
+                self.population[i].setFitness(fitness)
+
+                # Update best individual
+                if self.bestIndividual.getFitness() > fitness:
+                    self.bestIndividual = copy.deepcopy(self.population[i])
+
+                # The fitnesses used in Fitness Proportionate Selection is calculated
+                self.population[i].setStandardisedFitness(self.population[i].getFitness())
+
+                tempAdjustedFitness = 1 / (1 + self.population[i].getStandardisedFitness())
+
+                self.population[i].setAdjustedFitness(tempAdjustedFitness)
+
+                self.totalAdjusted += self.population[i].getAdjustedFitness()
+
+            self.CreateFitnessProportionateSelection()
+
+            # These genetic operators are used to populate the new population used in the next generation
+            while len(self.newPopulation) < self.pop_size:
+                rand = random.random()
+                if rand <= self.crossover_chance and (len(self.newPopulation) <= self.pop_size - 2):
+                    self.CrossOver()
+                elif rand <= self.mutation_chance:
+                    self.Mutation()
+                elif rand <= self.reproduction_chance:
+                    self.Reproduction()
+
+            self.population = []
+
+            for i in range(0, len(self.newPopulation)):
+                self.population.append(copy.deepcopy(self.newPopulation[i]))
+
+            self.newPopulation = []
+
+            self.totalAdjusted = 0.0
+
+            self.matingPool = []
+
+            currentGeneration += 1
+
+            print("Best Individual Fitness: ", self.bestIndividual.getFitness())
+            self.bestIndividual.printTree(self.bestIndividual.getRoot(), "-")
+
+        print("Best Final Individual Fitness: ", self.bestIndividual.getFitness())
+        self.bestIndividual.printTree(self.bestIndividual.getRoot(), "-")
+        print("Seed: ", self.seed)
+
+        return [self.bestIndividual, self.seed]
+
+    def InitialPopulationGeneration(self):
+        treeDepthTemp = self.max_depth - 1
+
+        if treeDepthTemp <= 0:
+            treeDepthTemp = 1
+
+        numberOfTreesForEachDepth = self.pop_size // treeDepthTemp  # Math.floorDiv
+
+        depthNumber = 1
+
+        count = 0
+
+        # Will generate trees using each method
+        # METHOD_GROW will generate a full tree with the chance (ramped_half_half) to stop a branch from
+        # generating a full branch and terminate with a terminal node
+        # METHOD_FULL will generate a full tree given a specific depth
+        for i in range(0, treeDepthTemp):
+            for j in range(0, numberOfTreesForEachDepth // 2):
+                self.population.append(self.GenerateTree(depthNumber, "METHOD_GROW"))
+                count += 1
+            for j in range(0, numberOfTreesForEachDepth // 2):
+                self.population.append(self.GenerateTree(depthNumber, "METHOD_FULL"))
+                count += 1
+            depthNumber += 1
+
+        # This is here just incase it does not generate the correct amount (strange pop_size provided)
+        leftToGenerate = self.pop_size - count
+
+        if leftToGenerate > 0:
+            randTreeDepth = random.randint(1, self.max_depth)
+            if random.random() < 0.5:
+                self.population.append(self.GenerateTree(randTreeDepth, "METHOD_GROW"))
+                count += 1
+            else:
+                self.population.append(self.GenerateTree(randTreeDepth, "METHOD_FULL"))
+                count += 1
+
+    """
+        Will generate a tree, this starts out with the root, the root will always be a functional node
+    """
+
+    def GenerateTree(self, maxDepth, method) -> Individual:
+        node = FunctionalNode("", [])
+
+        children = []
+        rand = random.random()
+
+        if rand <= 0.25:
+            node.setLabel("+")
+        elif rand <= 0.50:
+            node.setLabel("-")
+        elif rand <= 0.75:
+            node.setLabel("*")
+        elif rand <= 1.0:
+            node.setLabel("/")
+
+        children.append(self.GenRndExpr(maxDepth - 1, method))
+        children.append(self.GenRndExpr(maxDepth - 1, method))
+        node.setChildren(children)
+
+        individual = Individual(node, 100000.0)
+
+        return individual
+
+    """
+        Recursive function that will generate the rest of the tree started in GenerateTree
+    """
+
+    def GenRndExpr(self, maxDepth, method) -> Node:
+        if maxDepth == 0 or (method == "METHOD_GROW" and random.random() < self.ramped_half_half):
+            node = TerminalNode("")
+            rand = random.random()
+
+            if rand <= 0.16:
+                # Will make a constant between 0 and 1000 inclusive
+                node.setLabel(str(float(random.randint(0, 1000))))
+            elif rand <= 0.30:
+                node.setLabel("Label")
+            elif rand <= 0.44:
+                node.setLabel("House")
+            elif rand <= 0.58:
+                node.setLabel("Year")
+            elif rand <= 0.72:
+                node.setLabel("Month")
+            elif rand <= 0.86:
+                node.setLabel("Temperature")
+            elif rand <= 1.0:
+                node.setLabel("Daylight")
+
+            return node
+        else:
+            node = FunctionalNode("", [])
+
+            children = []
+            rand = random.random()
+
+            if rand <= 0.25:
+                node.setLabel("+")
+            elif rand <= 0.50:
+                node.setLabel("-")
+            elif rand <= 0.75:
+                node.setLabel("*")
+            elif rand <= 1.0:
+                node.setLabel("/")
+
+            children.append(self.GenRndExpr(maxDepth - 1, method))
+            children.append(self.GenRndExpr(maxDepth - 1, method))
+            node.setChildren(children)
+
+            return node
+
+    """
+        An interpreter that interprets a tree, will return a prediction.
+    """
+
+    def RunGP(self, individual, node, dataEntry) -> float:
+        if re.findall(r"\d+", node.getLabel()):
+            return node.getLabel()
+        match node.getLabel():
+            case "Label":
+                return dataEntry.getLabel()
+            case "House":
+                return dataEntry.getHouse()
+            case "Year":
+                return dataEntry.getYear()
+            case "Month":
+                return dataEntry.getMonth()
+            case "Temperature":
+                return dataEntry.getTemperature()
+            case "Daylight":
+                return dataEntry.getDaylight()
+            case "+":
+                return float(self.RunGP(individual, node.getChildren()[0], dataEntry)) + float(
+                    self.RunGP(individual, node.getChildren()[1],
+                               dataEntry))
+            case "-":
+                return float(self.RunGP(individual, node.getChildren()[0], dataEntry)) - float(
+                    self.RunGP(individual, node.getChildren()[1],
+                               dataEntry))
+            case "*":
+                return float(self.RunGP(individual, node.getChildren()[0], dataEntry)) * float(
+                    self.RunGP(individual, node.getChildren()[1],
+                               dataEntry))
+            case "/":
+                # Check if the denominator is 0, can't divide by 0
+                divisor = float(self.RunGP(individual, node.getChildren()[1], dataEntry))
+                if divisor == 0:
+                    return 100000.0
+                else:
+                    return float(self.RunGP(individual, node.getChildren()[0], dataEntry)) / float(divisor)
+            case _:
+                pass
+
+    """
+        Returns a random individual in matingPool
+    """
+
+    def FitnessProportionateSelection(self) -> Individual:
+        return self.matingPool[random.randint(0, len(self.matingPool) - 1)]
+
+    """
+        Populates the matingPool list with individuals based on its normalisedFitness.
+        An individual who has a better fitness has a higher chance of being picked in FitnessProportionateSelection
+        because it occurs more in the mating pool list
+    """
+
+    def CreateFitnessProportionateSelection(self):
+        for i in range(0, len(self.population)):
+            self.population[i].setNormalisedFitness(self.population[i].getAdjustedFitness() / self.totalAdjusted)
+            numberOfOccurrences = round(self.population[i].getNormalisedFitness() * self.pop_size)
+            for j in range(0, numberOfOccurrences):
+                self.matingPool.append(copy.deepcopy(self.population[i]))
+
+    """
+        Crossover will select 2 random individuals and 2 random points (one point in each individual).
+        The subtrees at each point are then swapped.
+        The resulting tree is added to the next population for teh next generation.
+    """
+
+    def CrossOver(self):
+        parentOne = copy.deepcopy(self.FitnessProportionateSelection())
+        parentTwo = copy.deepcopy(self.FitnessProportionateSelection())
+
+        numNodesOne = parentOne.getRoot().CountNodes()
+        numNodesTwo = parentTwo.getRoot().CountNodes()
+
+        pointOne = random.randint(1, numNodesOne)
+        pointTwo = random.randint(1, numNodesTwo)
+
+        nodeOne = self.CrossOverHelper(parentOne.getRoot(), pointOne)
+        nodeTwo = self.CrossOverHelper(parentTwo.getRoot(), pointTwo)
+
+        nodeOneParent = self.CrossOverParentHelper(parentOne.getRoot(), nodeOne)
+        nodeTwoParent = self.CrossOverParentHelper(parentTwo.getRoot(), nodeTwo)
+
+        tempNodeOne = copy.deepcopy(nodeOne)
+        tempNodeTwo = copy.deepcopy(nodeTwo)
+
+        for i in range(0, len(nodeOneParent.getChildren())):
+            if nodeOneParent.getChildren()[i].getLabel() == nodeOne.getLabel():
+                nodeOneParent.getChildren()[i] = tempNodeOne
+                break
+
+        for i in range(0, len(nodeTwoParent.getChildren())):
+            if nodeTwoParent.getChildren()[i].getLabel() == nodeTwo.getLabel():
+                nodeTwoParent.getChildren()[i] = tempNodeTwo
+                break
+
+        parentOne.resetValues()
+        parentTwo.resetValues()
+        self.newPopulation.append(parentOne)
+        self.newPopulation.append(parentTwo)
+
+    """
+        CrossOverHelper will return the node at the random point provided
+    """
+
+    def CrossOverHelper(self, node, point) -> Node:
+        tempQueue = []
+
+        if point == 1:
+            if node.getChildren() is None:
+                return node.getChildren()[0]
+            else:
+                return node
+
+        tempQueue.append(node)
+        while tempQueue:
+            tempNode = tempQueue.pop(0)
+
+            if point <= 1:
+                #node = tempNode
+                return node
+
+            point -= 1
+
+            if tempNode.getChildren():
+                for i in range(0, len(tempNode.getChildren())):
+                    tempQueue.append(tempNode.getChildren()[i])
+        return None
+
+    """
+        CrossOverParentHelper will return the parent of the node returned in CrossOverHelper
+    """
+
+    def CrossOverParentHelper(self, nodeParent, node) -> Node:
+        tempQueue = [nodeParent]
+
+        while tempQueue:
+            tempNode = tempQueue.pop(0)
+
+            if tempNode == nodeParent:
+                #nodeParent = tempNode
+                return nodeParent
+
+            if tempNode.getChildren():
+                if node in tempNode.getChildren():
+                    nodeParent = tempNode
+                    return nodeParent
+                for i in range(0, len(tempNode.getChildren())):
+                    tempQueue.append(tempNode.getChildren()[i])
+
+        return None
+
+    """
+        Mutation selects a random individual and a random point in the individual.
+        A new subtree is generated and replaces the subtree at the random point.
+    """
+
+    def Mutation(self):
+        parent = copy.deepcopy(self.FitnessProportionateSelection())
+
+        numNodes = parent.getRoot().CountNodes()
+
+        point = random.randint(1, numNodes)
+
+        node = self.CrossOverHelper(parent.getRoot(), point)
+
+        nodeParent = self.CrossOverParentHelper(parent.getRoot(), node)
+
+        newSubTree = self.GenerateTree(self.max_depth, "METHOD_GROW")
+
+        for i in range(0, len(nodeParent.getChildren())):
+            if nodeParent.getChildren()[i].getLabel == node.getLabel():
+                nodeParent.getChildren()[i] = newSubTree.getRoot()
+                break
+
+        parent.resetValues()
+
+        self.newPopulation.append(parent)
+
+    """
+        Reproduction copies over a random individual from the old population to the next population.
+    """
+
+    def Reproduction(self):
+        parent = copy.deepcopy(self.FitnessProportionateSelection())
+        parent.resetValues()
+        self.newPopulation.append(parent)
diff --git a/challenge1/analysis/cpandrianatos/Individual.py b/challenge1/analysis/cpandrianatos/Individual.py
new file mode 100644
index 000000000..e46c94fb5
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/Individual.py
@@ -0,0 +1,77 @@
+from Node import *
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class Individual:
+    root: Node
+    fitness: float
+
+    standardisedFitness: float
+    adjustedFitness: float
+    normalizedFitness: float
+
+    """
+        Each individual is one tree generated by the Genetic Program (GP).
+        The fitness and associated standardisedFitness, adjustedFitness, and normalizedFitness are stored in
+        each individual.
+        The standardisedFitness, adjustedFitness, normalizedFitness are used during Fitness proportionate selection to
+        select new individuals for the next generation.
+        The fitness for an individual is the MAPE over every data entry (row in dataset) and multiplied by 100.
+    """
+
+    def __init__(self, newRoot: Node, newFitness: float):
+        self.root = newRoot
+        self.fitness = newFitness
+        self.standardisedFitness = 0
+        self.adjustedFitness = 0
+        self.normalizedFitness = 0
+
+    def getRoot(self):
+        return self.root
+
+    def getFitness(self):
+        return self.fitness
+
+    def setFitness(self, newFitness):
+        self.fitness = newFitness
+
+    def getStandardisedFitness(self):
+        return self.standardisedFitness
+
+    def setStandardisedFitness(self, newStandardisedFitness):
+        self.standardisedFitness = newStandardisedFitness
+
+    def getAdjustedFitness(self):
+        return self.adjustedFitness
+
+    def setAdjustedFitness(self, newAdjustedFitness):
+        self.adjustedFitness = newAdjustedFitness
+
+    def getNormalisedFitness(self):
+        return self.normalizedFitness
+
+    def setNormalisedFitness(self, newNormalisedFitness):
+        self.normalizedFitness = newNormalisedFitness
+
+    def resetValues(self):
+        self.fitness = 0.0
+        self.standardisedFitness = 0
+        self.adjustedFitness = 0
+        self.normalizedFitness = 0
+
+    # Recursive function will print the tree associated with this individual
+    def printTree(self, node, appender):
+        if node is None:
+            return
+        print(appender, node.getLabel())
+        if node.getLabel() is None:
+            return
+        if node.getChildren() is None:
+            return
+        for n in node.getChildren():
+            if n is not None:
+                self.printTree(n, appender + "-")
diff --git a/challenge1/analysis/cpandrianatos/LoadParameters.py b/challenge1/analysis/cpandrianatos/LoadParameters.py
new file mode 100644
index 000000000..153c64f4c
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/LoadParameters.py
@@ -0,0 +1,43 @@
+import configparser
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class LoadParameters:
+    fileNameTrain: str
+    random_seed: bool
+    seed: int
+    max_generations: int
+    pop_size: int
+    max_depth: int
+    crossover_chance: float
+    mutation_chance: float
+    reproduction_chance: float
+    num_of_runs: int
+    ramped_half_half: float
+    fileNameTest: str
+
+    """
+        Using config parser, parameters are read in using the parameters.config file.
+        These parameters are saved above and are accessed throughout the program.
+    """
+    def ReadInParameters(self):
+        config = configparser.ConfigParser()
+
+        config.read("parameters.config")
+        self.fileNameTrain = config["Training"]["filename"]
+        self.random_seed = True if config["Training"]["random_seed"] == "True" else False
+        self.seed = int(config["Training"]["seed"])
+        self.max_generations = int(config["Training"]["max_generations"])
+        self.pop_size = int(config["Training"]["pop_size"])
+        self.max_depth = int(config["Training"]["max_depth"])
+        self.crossover_chance = float(config["Training"]["crossover_chance"])
+        self.mutation_chance = float(config["Training"]["mutation_chance"])
+        self.reproduction_chance = float(config["Training"]["reproduction_chance"])
+        self.num_of_runs = int(config["Training"]["num_of_runs"])
+        self.ramped_half_half = float(config["Training"]["ramped_half_half"])
+
+        self.fileNameTest = config["Testing"]["filename"]
diff --git a/challenge1/analysis/cpandrianatos/Main.py b/challenge1/analysis/cpandrianatos/Main.py
new file mode 100644
index 000000000..b60d71c35
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/Main.py
@@ -0,0 +1,53 @@
+from Genetic_Program import Genetic_Program
+from Individual import Individual
+from LoadParameters import LoadParameters
+from OutputPredictions import OutputPredictions
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+if __name__ == "__main__":
+    # Load Parameters
+    lp = LoadParameters()
+    lp.ReadInParameters()
+
+    fileName = lp.fileNameTrain
+    seed = lp.seed
+    max_generations = lp.max_generations
+    pop_size = lp.pop_size
+    max_depth = lp.max_depth
+    crossover_chance = lp.crossover_chance
+    mutation_chance = lp.mutation_chance
+    reproduction_chance = lp.reproduction_chance
+    num_of_runs = lp.num_of_runs
+    ramped_half_half = lp.ramped_half_half
+    random_seed = lp.random_seed
+
+    # List to store the best individual for each run
+    data_from_runs = []
+
+    for i in range(0, num_of_runs):
+        gp = Genetic_Program()
+        data_from_runs.append(gp.Perform_Run(fileName, seed, max_generations, pop_size, max_depth, crossover_chance,
+                                             mutation_chance, reproduction_chance, ramped_half_half, random_seed))
+        # This is mainly if you want to have an increasing seed each run, otherwise using a static seed will
+        # result in the same outcome every run
+        seed += 10
+
+    # The best individual is obtained from all runs and sent to the OutputEneryPredictions class
+    bestIndividual: Individual
+    bestIndividualFitness = 100000.0
+    bestIndex = -1
+    for i in range(0, len(data_from_runs)):
+        if data_from_runs[i][0].getFitness() < bestIndividualFitness:
+            bestIndividualFitness = data_from_runs[i][0].getFitness()
+            bestIndex = i
+
+    bestIndividual = data_from_runs[bestIndex][0]
+    bestSeed = data_from_runs[bestIndex][1]
+
+    op = OutputPredictions()
+
+    op.OutputEneryPredictions(lp, bestIndividual, bestSeed)
diff --git a/challenge1/analysis/cpandrianatos/Node.py b/challenge1/analysis/cpandrianatos/Node.py
new file mode 100644
index 000000000..7993c53aa
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/Node.py
@@ -0,0 +1,29 @@
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class Node(object):
+
+    """
+        Base class for functional and terminal nodes
+    """
+
+    def __init__(self):
+        pass
+
+    def getLabel(self) -> str:
+        pass
+
+    def setLabel(self, newLabel: str):
+        pass
+
+    def getChildren(self) -> list:
+        pass
+
+    def setChildren(self, newChildren: list):
+        pass
+
+    def CountNodes(self) -> int:
+        pass
diff --git a/challenge1/analysis/cpandrianatos/OutputPredictions.py b/challenge1/analysis/cpandrianatos/OutputPredictions.py
new file mode 100644
index 000000000..f2af824b6
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/OutputPredictions.py
@@ -0,0 +1,54 @@
+import csv
+from DataReader import DataReader
+from Genetic_Program import Genetic_Program
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class OutputPredictions:
+    """
+        The best individual is used to calculate the EnergyProduction for each house in the test dataset.
+        The Mean Absolute Percentage Error (MAPE) is calculated and output to a txt file.
+        The predictions are saved in the predicted_energy_production.csv file
+    """
+    def OutputEneryPredictions(self, lp, bestIndividual, bestSeed):
+        fileNameTest = lp.fileNameTest
+
+        dr = DataReader(fileNameTest)
+
+        data = dr.ReadInData()
+
+        gpTest = Genetic_Program()
+
+        testFitness = 0.0
+
+        output_house_energy = []
+
+        for i in range(0, len(data)):
+            prediction = gpTest.RunGP(bestIndividual, bestIndividual.getRoot(), data[i])
+
+            testFitness += abs(
+                (float(data[i].getEnergyProduction()) - prediction) / float(data[i].getEnergyProduction()))
+
+            tempOutput = [data[i].getHouse(), data[i].getEnergyProduction()]
+            output_house_energy.append(tempOutput)
+
+        testFitness = testFitness * (1 / len(data))
+
+        bestIndividual.printTree(bestIndividual.getRoot(), "-")
+
+        print("MAPE: ", testFitness)
+        print("Seed: ", bestSeed)
+
+        header = ["House", "EnergyProduction"]
+
+        with open("predicted_energy_production.csv", "w", encoding='UTF8', newline='') as CSV_file:
+            writer = csv.writer(CSV_file)
+            writer.writerow(header)
+            writer.writerows(output_house_energy)
+
+        with open("mape.txt", "w", encoding='UTF8', newline='') as MAPE_txt:
+            MAPE_txt.write(str(testFitness))
diff --git a/challenge1/analysis/cpandrianatos/README.md b/challenge1/analysis/cpandrianatos/README.md
new file mode 100644
index 000000000..a61894d75
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/README.md
@@ -0,0 +1,72 @@
+This submission is from Pavlo Andrianatos on the 11th February 2023
+
+This submission was made using Pycharm 2022.1, Anaconda 4.13.0, and Python 3.10. A environment.yml file is provided to replicate the programming envvironment.
+
+The program can be run by running Run_Script.bat, this will run Main.py
+
+I opted to use Genetic Programming to solve challenge 1, this uses symbolic regression to obtain an optimal mathematical function that
+predicts the EnergyProduction.
+
+This involves generating trees that get interpreted and using the values for each ID (Label, House, Year, Month, Temperature, Daylight) will predict an
+EnergyProduction.
+
+There is a parameter.config file where parameters can be changed to alter the generation of solutions.
+
+The parameters are as followed:
+
+[Training]
+filename = training_dataset_500.csv 	(Filename of training dataset)
+random_seed = True						(If True the program will use a random seed)
+seed = 123								(Seed to use during training, if random_seed is set to 1 the user-provided seed is ignored)
+max_generations = 5						(Number of generations in each run, will perform crossover, mutation, and reproduction at the end of each generation)
+pop_size = 10							(Number of trees in each generation)
+max_depth = 4							(The max depth of trees, Mutation can cause a tree to exceed the max_depth. Recommended 2 - 6, can make it more 
+										but the trees become quite big)
+crossover_chance = 0.7					(Chance crossover will be used to create a new tree for the next generation)
+mutation_chance = 0.2					(Chance mutation will be used to create a new tree for the next generation)
+reproduction_chance = 0.1				(Chance reproduction will be used to create a new tree for the next generation. Don't recommend setting 
+										this too high, because then you are just copying over the last population to the next. We want to apply
+										selection pressure)
+num_of_runs = 3							(Number of runs)
+ramped_half_half = 0.5					(Chance a tree will stop generating a full tree and the generating branch terminates with a terminal node)
+
+[Testing]
+filename = test_dataset_500.csv 		(Filename of test dataset)
+
+The fitness for each individual in the population is the MAPE for the individual given all training data and multiplied by 100. The best fitness is 0.
+
+The fitness is used in fitness proportionate selection where random individual are selected out of a mating pool to be used in either crossover, mutation, or reproduction.
+
+Crossover selects a random point on two different individuals and swaps the sub branches at those nodes.
+
+Mutation selects a random point in an individual and generates a new sub tree at that point.
+
+Reproduction copies over an individual from the old population into the new population.
+
+Best Individual found:
+- -
+-- +
+--- 557.0
+--- Label
+-- +
+--- Label
+--- Temperature
+MAPE:  0.13358284671566079
+Seed: 3054022391716465016
+
+Best Individual parameters:
+[Training]
+filename = training_dataset_500.csv
+random_seed = False
+seed = 3054022391716465016
+max_generations = 3
+pop_size = 10
+max_depth = 3
+crossover_chance = 0.7
+mutation_chance = 0.2
+reproduction_chance = 0.1
+num_of_runs = 3
+ramped_half_half = 0.5
+
+[Testing]
+filename = test_dataset_500.csv
\ No newline at end of file
diff --git a/challenge1/analysis/cpandrianatos/Run_script.bat b/challenge1/analysis/cpandrianatos/Run_script.bat
new file mode 100644
index 000000000..3dae8950a
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/Run_script.bat
@@ -0,0 +1,3 @@
+@echo off
+python Main.py
+pause
\ No newline at end of file
diff --git a/challenge1/analysis/cpandrianatos/TerminalNode.py b/challenge1/analysis/cpandrianatos/TerminalNode.py
new file mode 100644
index 000000000..471e4f1db
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/TerminalNode.py
@@ -0,0 +1,34 @@
+from Node import Node
+
+"""
+    @author Pavlo Andrianatos
+    Date: 11/02/2023
+"""
+
+
+class TerminalNode(Node):
+    label: str
+
+    # variables in data
+    # ID Label House Year Month Temperature Daylight EnergyProduction
+
+    """
+        Each terminal nodes represents one variable in each data entry (row in dataset), excluding the ID and
+        EnergyProduction, you can't use EnergyProduction during training since that is what you are trying to
+        predict.
+        Terminal nodes are considered leaf nodes and do not have children.
+    """
+
+    def __init__(self, label: str):
+        super().__init__()
+        self.label = label
+
+    def getLabel(self) -> str:
+        return self.label
+
+    def setLabel(self, newLabel: str):
+        self.label = newLabel
+
+    # Return 1 since these are leaf nodes in the tree
+    def CountNodes(self) -> int:
+        return 1
diff --git a/challenge1/analysis/cpandrianatos/environment.yml b/challenge1/analysis/cpandrianatos/environment.yml
new file mode 100644
index 000000000..4c6513332
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/environment.yml
@@ -0,0 +1,20 @@
+channels:
+  - defaults
+dependencies:
+  - bzip2=1.0.8=he774522_0
+  - ca-certificates=2023.01.10=haa95532_0
+  - certifi=2022.12.7=py310haa95532_0
+  - libffi=3.4.2=hd77b12b_6
+  - openssl=1.1.1s=h2bbff1b_0
+  - pip=22.3.1=py310haa95532_0
+  - python=3.10.9=h966fe2a_0
+  - setuptools=65.6.3=py310haa95532_0
+  - sqlite=3.40.1=h2bbff1b_0
+  - tk=8.6.12=h2bbff1b_0
+  - tzdata=2022g=h04d1e81_0
+  - vc=14.2=h21ff451_1
+  - vs2015_runtime=14.27.29016=h5e58377_2
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - wincertstore=0.2=py310haa95532_2
+  - xz=5.2.10=h8cc25b3_1
+  - zlib=1.2.13=h8cc25b3_0
diff --git a/challenge1/analysis/cpandrianatos/mape.txt b/challenge1/analysis/cpandrianatos/mape.txt
new file mode 100644
index 000000000..1210a9afe
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/mape.txt
@@ -0,0 +1 @@
+0.13358284671566079
\ No newline at end of file
diff --git a/challenge1/analysis/cpandrianatos/parameters.config b/challenge1/analysis/cpandrianatos/parameters.config
new file mode 100644
index 000000000..96d4043a2
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/parameters.config
@@ -0,0 +1,15 @@
+[Training]
+filename = training_dataset_500.csv
+random_seed = False
+seed = 3054022391716465016
+max_generations = 3
+pop_size = 10
+max_depth = 3
+crossover_chance = 0.7
+mutation_chance = 0.2
+reproduction_chance = 0.1
+num_of_runs = 3
+ramped_half_half = 0.5
+
+[Testing]
+filename = test_dataset_500.csv
\ No newline at end of file
diff --git a/challenge1/analysis/cpandrianatos/predicted_energy_production.csv b/challenge1/analysis/cpandrianatos/predicted_energy_production.csv
new file mode 100644
index 000000000..9956382fe
--- /dev/null
+++ b/challenge1/analysis/cpandrianatos/predicted_energy_production.csv
@@ -0,0 +1,501 @@
+House,EnergyProduction
+1,778
+2,627
+3,735
+4,533
+5,533
+6,670
+7,673
+8,560
+9,517
+10,455
+11,455
+12,670
+13,565
+14,455
+15,778
+16,627
+17,586
+18,518
+19,627
+20,467
+21,684
+22,560
+23,471
+24,670
+25,471
+26,668
+27,673
+28,736
+29,627
+30,523
+31,736
+32,523
+33,736
+34,584
+35,584
+36,565
+37,735
+38,673
+39,518
+40,735
+41,523
+42,471
+43,684
+44,735
+45,668
+46,522
+47,455
+48,471
+49,455
+50,668
+51,455
+52,684
+53,627
+54,517
+55,735
+56,471
+57,585
+58,534
+59,627
+60,736
+61,628
+62,523
+63,670
+64,455
+65,517
+66,565
+67,778
+68,523
+69,467
+70,565
+71,467
+72,586
+73,522
+74,585
+75,534
+76,627
+77,670
+78,534
+79,467
+80,534
+81,778
+82,670
+83,627
+84,627
+85,628
+86,886
+87,455
+88,455
+89,451
+90,670
+91,565
+92,533
+93,533
+94,736
+95,668
+96,886
+97,467
+98,584
+99,517
+100,534
+101,518
+102,460
+103,684
+104,735
+105,586
+106,736
+107,467
+108,517
+109,886
+110,778
+111,533
+112,560
+113,517
+114,627
+115,460
+116,471
+117,451
+118,670
+119,533
+120,533
+121,576
+122,684
+123,523
+124,586
+125,560
+126,585
+127,517
+128,673
+129,560
+130,627
+131,576
+132,684
+133,585
+134,534
+135,565
+136,576
+137,471
+138,533
+139,628
+140,585
+141,673
+142,523
+143,467
+144,518
+145,565
+146,518
+147,576
+148,467
+149,534
+150,576
+151,560
+152,886
+153,668
+154,534
+155,471
+156,483
+157,627
+158,565
+159,522
+160,886
+161,736
+162,517
+163,586
+164,460
+165,467
+166,684
+167,517
+168,460
+169,565
+170,560
+171,576
+172,451
+173,670
+174,736
+175,455
+176,471
+177,778
+178,534
+179,736
+180,534
+181,627
+182,576
+183,585
+184,668
+185,778
+186,522
+187,736
+188,518
+189,735
+190,778
+191,534
+192,735
+193,627
+194,585
+195,673
+196,576
+197,467
+198,471
+199,518
+200,673
+201,517
+202,467
+203,533
+204,560
+205,560
+206,455
+207,460
+208,673
+209,483
+210,735
+211,471
+212,778
+213,460
+214,517
+215,684
+216,522
+217,522
+218,534
+219,455
+220,455
+221,736
+222,673
+223,560
+224,522
+225,518
+226,467
+227,483
+228,523
+229,451
+230,736
+231,533
+232,627
+233,523
+234,668
+235,523
+236,735
+237,517
+238,584
+239,518
+240,517
+241,455
+242,533
+243,518
+244,467
+245,460
+246,522
+247,534
+248,560
+249,576
+250,523
+251,560
+252,460
+253,523
+254,523
+255,455
+256,586
+257,518
+258,684
+259,460
+260,560
+261,517
+262,778
+263,483
+264,455
+265,735
+266,735
+267,455
+268,455
+269,576
+270,455
+271,673
+272,736
+273,565
+274,455
+275,483
+276,560
+277,886
+278,565
+279,523
+280,735
+281,778
+282,627
+283,668
+284,523
+285,517
+286,576
+287,483
+288,523
+289,736
+290,684
+291,735
+292,533
+293,522
+294,471
+295,565
+296,455
+297,585
+298,584
+299,668
+300,628
+301,451
+302,467
+303,560
+304,735
+305,576
+306,565
+307,627
+308,565
+309,460
+310,451
+311,560
+312,517
+313,673
+314,670
+315,628
+316,517
+317,628
+318,628
+319,628
+320,451
+321,460
+322,585
+323,585
+324,627
+325,673
+326,534
+327,565
+328,518
+329,628
+330,523
+331,670
+332,455
+333,736
+334,668
+335,467
+336,627
+337,576
+338,778
+339,628
+340,673
+341,778
+342,736
+343,523
+344,533
+345,467
+346,533
+347,586
+348,736
+349,584
+350,684
+351,735
+352,471
+353,586
+354,523
+355,628
+356,585
+357,483
+358,585
+359,483
+360,736
+361,467
+362,735
+363,628
+364,668
+365,886
+366,668
+367,467
+368,455
+369,460
+370,518
+371,735
+372,627
+373,518
+374,684
+375,670
+376,684
+377,585
+378,628
+379,455
+380,523
+381,735
+382,673
+383,471
+384,684
+385,586
+386,778
+387,565
+388,673
+389,668
+390,522
+391,778
+392,560
+393,673
+394,584
+395,886
+396,627
+397,584
+398,517
+399,735
+400,522
+401,627
+402,455
+403,565
+404,627
+405,627
+406,533
+407,670
+408,684
+409,668
+410,533
+411,673
+412,585
+413,628
+414,576
+415,585
+416,534
+417,735
+418,576
+419,684
+420,523
+421,585
+422,684
+423,735
+424,455
+425,467
+426,670
+427,668
+428,522
+429,522
+430,523
+431,560
+432,517
+433,586
+434,673
+435,522
+436,533
+437,628
+438,483
+439,483
+440,584
+441,483
+442,560
+443,886
+444,451
+445,585
+446,522
+447,778
+448,586
+449,451
+450,584
+451,522
+452,627
+453,684
+454,736
+455,585
+456,534
+457,628
+458,736
+459,668
+460,522
+461,778
+462,471
+463,455
+464,533
+465,627
+466,460
+467,565
+468,576
+469,483
+470,534
+471,736
+472,533
+473,471
+474,584
+475,522
+476,576
+477,684
+478,576
+479,451
+480,585
+481,517
+482,585
+483,471
+484,518
+485,668
+486,560
+487,627
+488,585
+489,560
+490,584
+491,534
+492,670
+493,736
+494,533
+495,455
+496,483
+497,628
+498,673
+499,735
+500,586