-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcycleTest.py
More file actions
122 lines (98 loc) · 3.91 KB
/
cycleTest.py
File metadata and controls
122 lines (98 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from data import load_adult, preprocess_adult
from metrics import DP, FTU
from train import train_decaf
# import tensorflow as tf
import tensorflow as tf
tf.config.run_functions_eagerly(True)
dataset = load_adult()
dataset = preprocess_adult(dataset)
dataset.head()
dataset_train, dataset_test = train_test_split(dataset, test_size=2000, stratify=dataset['income'])
# Define DAG for Adult dataset
dag = [
##can add these edges to make feedback loops
# ['income', 'education'],
# ['income', 'occupation'],
# ['income', 'relationship'],
# Edges from race
['race', 'occupation'],
['race', 'income'],
['race', 'hours-per-week'],
['race', 'education'],
['race', 'marital-status'],
# Edges from age
['age', 'occupation'],
['age', 'hours-per-week'],
['age', 'income'],
['age', 'workclass'],
['age', 'marital-status'],
['age', 'education'],
['age', 'relationship'],
# Edges from sex
['sex', 'occupation'],
['sex', 'marital-status'],
['sex', 'income'],
['sex', 'workclass'],
['sex', 'education'],
['sex', 'relationship'],
# Edges from native country
['native-country', 'marital-status'],
['native-country', 'hours-per-week'],
['native-country', 'education'],
['native-country', 'workclass'],
['native-country', 'income'],
['native-country', 'relationship'],
# Edges from marital status
['marital-status', 'occupation'],
['marital-status', 'hours-per-week'],
['marital-status', 'income'],
['marital-status', 'workclass'],
['marital-status', 'relationship'],
['marital-status', 'education'],
# Edges from education
['education', 'occupation'],
['education', 'hours-per-week'],
['education', 'income'],
['education', 'workclass'],
['education', 'relationship'],
# All remaining edges
['occupation', 'income'],
['hours-per-week', 'income'],
['workclass', 'income'],
['relationship', 'income']
]
def dag_to_idx(df, dag):
"""Convert columns in a DAG to the corresponding indices."""
dag_idx = []
for edge in dag:
dag_idx.append([df.columns.get_loc(edge[0]), df.columns.get_loc(edge[1])])
return dag_idx
# Convert the DAG to one that can be provided to the DECAF model
dag_seed = dag_to_idx(dataset, dag)
def eval_model(dataset_train, dataset_test):
"""Helper function that prints evaluation metrics."""
X_train, y_train = dataset_train.drop(columns=['income']), dataset_train['income']
X_test, y_test = dataset_test.drop(columns=['income']), dataset_test['income']
clf = MLPClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auroc = roc_auc_score(y_test, y_pred)
dp = DP(clf, X_test)
ftu = FTU(clf, X_test)
return {'precision': precision, 'recall': recall, 'auroc': auroc,
'dp': dp, 'ftu': ftu}
synth_data = train_decaf(dataset_train, dag_seed, fairness_type=None, restricted='sex', protected=None, target='income', df=dataset)
results={}
results['biased']=eval_model(synth_data, dataset_test)
synth_data = train_decaf(dataset_train, dag_seed, fairness_type='FTU', restricted='sex', protected=None, target='income', df=dataset)
results['FTU']=eval_model(synth_data, dataset_test)
synth_data = train_decaf(dataset_train, dag_seed, fairness_type='CF', restricted='sex', protected=['occupation', 'hours-per-week', 'workclass', 'education'], target='income', df=dataset)
results['CF']=eval_model(synth_data, dataset_test)
synth_data = train_decaf(dataset_train, dag_seed, fairness_type='DP', restricted='sex', protected=None, target='income', df=dataset)
results['DP']=eval_model(synth_data, dataset_test)
print(results)