Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ca9e4e1
Ignore local configurations
diogodebastos Aug 9, 2017
dd6ffde
Plotting make-up. Add weights to histogram plot for normalization
diogodebastos Aug 9, 2017
f5320e2
Delete localConfig.py
diogodebastos Aug 9, 2017
7ce6350
fix plots
g-marques Aug 10, 2017
9d6b566
mais estetica dos graficos
g-marques Aug 10, 2017
937e6cb
Set2 added + KS test
g-marques Aug 10, 2017
55a9290
Added gridSearch
diogodebastos Aug 11, 2017
a00330c
update with cbeirao
diogodebastos Aug 22, 2017
a75a412
alteration in commonFunctions - different Dev and Val signal samples.
g-marques Aug 22, 2017
6b78537
update
g-marques Aug 22, 2017
ec56f28
DM = 30 data
diogodebastos Aug 22, 2017
bcec950
fixed trainNN print
diogodebastos Aug 22, 2017
24924d6
commonFunctions now with different train and test signal data sets
g-marques Aug 22, 2017
36ab261
train_loop - grid search on #layers and #neurons
g-marques Aug 22, 2017
1c4a7b4
Merge branch 'master' of github.com:g-marques/StopNN
g-marques Aug 22, 2017
06eeddb
Set2 fix
diogodebastos Aug 22, 2017
859fa65
Maximize FOM
diogodebastos Aug 22, 2017
90f8e97
Separated DATA preparation from training and fixed gridsearch
diogodebastos Aug 23, 2017
c925984
optimize trainNN
diogodebastos Aug 23, 2017
e74c1eb
NN testing software
g-marques Aug 23, 2017
e27516f
Added testNN
diogodebastos Aug 23, 2017
28a7e61
trainNN only trains one NN and saves its parameters
diogodebastos Aug 23, 2017
4cc9d1e
created trainNN, testNN, preparedDATA, updated commonFunctions
g-marques Aug 23, 2017
fd0dfae
Delete trainNN_loop.py
g-marques Aug 23, 2017
f1f2aa6
manualGridSearch uploaded
g-marques Aug 23, 2017
bef885b
Merge branch 'master' of github.com:g-marques/StopNN
g-marques Aug 23, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@ keras-tf/*
*.pcm
*.root
*.h5
*.png
*.json
*.pyc
*.txt
*.sav
scaler.sav
localConfig.py
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified Stop4Body.sublime-project
100644 → 100755
Empty file.
Empty file modified applyNN.py
100644 → 100755
Empty file.
113 changes: 88 additions & 25 deletions commonFunctions.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,34 @@
import root_numpy
import pandas
import numpy as np
from math import log

signalMap = {
"DM30" : [],
"DM30" : ["T2DegStop_250_220",
"T2DegStop_275_245",
"T2DegStop_300_270",
"T2DegStop_325_295",
"T2DegStop_350_320",
"T2DegStop_375_345",
"T2DegStop_400_370",
"T2DegStop_425_395",
"T2DegStop_450_420",
"T2DegStop_475_445",
"T2DegStop_500_470",
"T2DegStop_525_495",
"T2DegStop_550_520",
"T2DegStop_575_545",
"T2DegStop_600_570",
"T2DegStop_625_595",
"T2DegStop_650_620",
"T2DegStop_675_645",
"T2DegStop_700_670",
"T2DegStop_725_695",
"T2DegStop_750_720",
"T2DegStop_775_745",
"T2DegStop_800_770"],
"300_270" : ["T2DegStop_300_270"],
"550_520" : ["T2DegStop_550_520"]
}
bkgDatasets = [
"Wjets_70to100",
Expand All @@ -27,9 +52,11 @@
]


def StopDataLoader(path, features, selection="", treename="bdttree", suffix="", signal="DM30", fraction=1.0):
def StopDataLoader(path, features, test="550_520", selection="", treename="bdttree", suffix="", signal="DM30", fraction=1.0):
if signal not in signalMap:
raise KeyError("Unknown signal requested ("+signal+")")
raise KeyError("Unknown training signal requested ("+signal+")")
if test not in signalMap:
raise KeyError("Unknown test signal requested ("+test+")")
if fraction >= 1.0:
fraction = 1.0
if fraction < 0.0:
Expand All @@ -39,43 +66,40 @@ def StopDataLoader(path, features, selection="", treename="bdttree", suffix="",
if "weight" not in features:
features.append("weight")



sigDev = None
sigVal = None
for sigName in signalMap[signal]:
stopM = int(sigName[10:13])


for sigName_test in signalMap[test]:
tmp = root_numpy.root2array(
path + "/train/" + sigName + suffix + ".root",
path + "test/" + sigName_test + suffix + ".root",
treename=treename,
selection=selection,
branches=features
)
if fraction < 1.0:
tmp = tmp[:int(len(tmp)*fraction)]
if sigDev is None:
sigDev = pandas.DataFrame(tmp)
sigDev["stopM"] = stopM
if sigVal is None:
sigVal = pandas.DataFrame(tmp)
else:
tmp2 = pandas.DataFrame(tmp)
tmp2["stopM"] = stopM
sigDev = sigDev.append(tmp2, ignore_index=True)
sigVal = sigVal.append(pandas.DataFrame(tmp), ignore_index=True)


for sigName in signalMap[signal]:
tmp = root_numpy.root2array(
path + "/test/" + sigName + suffix + ".root",
path + "train/" + sigName + suffix + ".root",
treename=treename,
selection=selection,
branches=features
)
if fraction < 1.0:
tmp = tmp[:int(len(tmp)*fraction)]
if sigVal is None:
sigVal = pandas.DataFrame(tmp)
sigVal["stopM"] = stopM
if sigDev is None:
sigDev = pandas.DataFrame(tmp)
else:
tmp2 = pandas.DataFrame(tmp)
tmp2["stopM"] = stopM
sigVal = sigVal.append(tmp2, ignore_index=True)
sigDev = sigDev.append(pandas.DataFrame(tmp), ignore_index=True)



bkgDev = None
bkgVal = None
Expand Down Expand Up @@ -115,9 +139,6 @@ def StopDataLoader(path, features, selection="", treename="bdttree", suffix="",
bkgDev["sampleWeight"] = 1
bkgVal["sampleWeight"] = 1

bkgDev["stopM"] = -1
bkgVal["stopM"] = -1

if fraction < 1.0:
sigDev.weight = sigDev.weight/fraction
sigVal.weight = sigVal.weight/fraction
Expand All @@ -138,5 +159,47 @@ def StopDataLoader(path, features, selection="", treename="bdttree", suffix="",
dev = dev.append(bkgDev.copy(), ignore_index=True)
val = sigVal.copy()
val = val.append(bkgVal.copy(), ignore_index=True)

return dev, val

def FOM1(sIn, bIn):
s, sErr = sIn
b, bErr = bIn
fom = s / (b**0.5)
fomErr = ((sErr / (b**0.5))**2+(bErr*s / (2*(b)**(1.5)) )**2)**0.5
return (fom, fomErr)

def FOM2(sIn, bIn):
s, sErr = sIn
b, bErr = bIn
fom = s / ((s+b)**0.5)
fomErr = ((sErr*(2*b + s)/(2*(b + s)**1.5))**2 + (bErr * s / (2*(b + s)**1.5))**2)**0.5
return (fom, fomErr)

def FullFOM(sIn, bIn, fValue=0.2):
s, sErr = sIn
b, bErr = bIn
fomErr = 0.0 # Add the computation of the uncertainty later
fomA = 2*(s+b)*log(((s+b)*(b + (fValue*b)**2))/(b**2 + (s + b) * (fValue*b)**2))
fomB = log(1 + (s*b*b*fValue*fValue)/(b*(b+(fValue*b)**2)))/(fValue**2)
fom = (fomA - fomB)**0.5
return (fom, fomErr)

def getYields(dataVal, cut=0.5, luminosity=35866, splitFactor=2):
#defines the selected test data
selectedVal = dataVal[dataVal.NN>cut]

#separates the true positives from false negatives
selectedSig = selectedVal[selectedVal.category == 1]
selectedBkg = selectedVal[selectedVal.category == 0]

sigYield = selectedSig.weight.sum()
sigYieldUnc = np.sqrt(np.sum(np.square(selectedSig.weight)))
bkgYield = selectedBkg.weight.sum()
bkgYieldUnc = np.sqrt(np.sum(np.square(selectedBkg.weight)))

sigYield = sigYield * luminosity * splitFactor #The factor 2 comes from the splitting
sigYieldUnc = sigYieldUnc * luminosity * splitFactor
bkgYield = bkgYield * luminosity * splitFactor
bkgYieldUnc = bkgYieldUnc * luminosity * splitFactor

return ((sigYield, sigYieldUnc), (bkgYield, bkgYieldUnc))
62 changes: 62 additions & 0 deletions graphicsNN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#from matplotlib.colors import LogNorm
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

f = open('DATA_loop_test_trash_550_520.txt', 'r')

layers = []
neurons = []
cohen_kappa =[]
FOM_max = []
FOM_cut = []
KS_test_stat = []
KS_test_pval = []
layers_legend = []
line_index=0

for line in f:
if line_index%7==0:
layers.append(float(line,))
if line_index%7==1:
neurons.append(float(line,))
if line_index%7==2:
cohen_kappa.append(float(line,))
if line_index%7==3:
FOM_max.append(float(line,))
if line_index%7==4:
FOM_cut.append(float(line,))
if line_index%7==5:
KS_test_stat.append(float(line,))
if line_index%7==6:
KS_test_pval.append(float(line,))
line_index=line_index+1


for l in list(set(layers)):
layers_legend.append(str(l)+" layers")


nol = len(list(set(layers)))
non = len(list(set(neurons)))



plt.figure(figsize=(7,6))
plt.xlabel('Number of neurons per layer')
plt.ylabel('F.O.M.')
#plt.title("Cohen's kappa: {0}".format(cohen_kappa), fontsize=10)
plt.suptitle("FOM for several configurations of Neural Nets", fontsize=13, fontweight='bold')
#plt.title("Cohen's kappa: {0}\nKolmogorov Smirnov test: {1}".format(cohen_kappa, km_value[1]), fontsize=10)
for x in range(0, nol):
plt.plot(neurons[int(x*non):int((x+1)*non)], FOM_max[int(x*non):int((x+1)*non)])
#plt.plot(neurons[0:18], FOM_max[0:18], "b")
#plt.plot(neurons[18:36], FOM_max[18:36], "r")
#plt.plot(neurons[36:54], FOM_max[36:54], "g")
#plt.plot(neurons[54:72], FOM_max[54:72], "c")
plt.legend(layers_legend, loc='best')
plt.show()

plt.hist2d(neurons, layers, bins=[non,nol], weights=FOM_max)
plt.colorbar()
plt.show()
49 changes: 49 additions & 0 deletions gridSearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import numpy
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
import time
from keras.models import Sequential
from keras.layers import Dense, Dropout, AlphaDropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm

from prepareDATA import *

compileArgs = {'loss': 'binary_crossentropy', 'optimizer': 'adam', 'metrics': ["accuracy"]}

# Fix seed for reproducibility
seed = 42
numpy.random.seed(seed)

# Tune the Number of Neurons in the Hidden Layer
def myClassifier(nIn=len(trainFeatures), nOut=1, compileArgs=compileArgs, layers=1, neurons =1):
model = Sequential()
model.add(Dense(nIn, input_dim=nIn, kernel_initializer='he_normal', activation='relu'))
for i in range(0,layers):
model.add(Dense(neurons, kernel_initializer='he_normal', activation='relu'))
model.add(Dense(nOut, activation="sigmoid", kernel_initializer='glorot_normal'))
model.compile(**compileArgs)
print("Training with %i layers and %i neurons" % (layers, neurons))
return model


model = KerasClassifier(build_fn=myClassifier, epochs = 10, batch_size = 20, verbose = 1)

neurons = [10,12,14,16]
layers = [1,2,3]
#layers = [1,2,3,4,5]
param_grid = dict(neurons=neurons, layers=layers)
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs=-1) #n_jobs = -1 -> Total number of CPU/GPU cores
print("Starting the training")
start = time.time()
grid_result = grid.fit(XDev,YDev)
print("Training took ", time.time()-start, " seconds")

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
3 changes: 0 additions & 3 deletions localConfig.py

This file was deleted.

Loading