Skip to content

Commit 63abfbb

Browse files
authored
Merge pull request #7 from 24Beast/rahul/add-racial-attr
Adding race attribute to our experiments for WACV 2026
2 parents 7378a77 + 48b68d5 commit 63abfbb

1 file changed

Lines changed: 215 additions & 0 deletions

File tree

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
import os
2+
import sys
3+
import torch
4+
import random
5+
import argparse
6+
import numpy as np
7+
import pandas as pd
8+
from LIC import LIC
9+
from utils.text import CaptionProcessor
10+
from utils.datacreator_race import CaptionRaceDataset
11+
from attackerModels.NetModel import LSTM_ANN_Model, RNN_ANN_Model
12+
13+
torch.backends.cudnn.deterministic = True
14+
15+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16+
17+
sys.path.append("/home/nshah96/DIC")
18+
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
19+
20+
print("GPU Available:", torch.cuda.is_available())
21+
# Print the currently active GPU
22+
if torch.cuda.is_available():
23+
print("Current GPU:", torch.cuda.get_device_name(torch.cuda.current_device()))
24+
print("GPU Device Index:", torch.cuda.current_device())
25+
print("Total GPUs:", torch.cuda.device_count())
26+
27+
# Define thresholds for contextual LIC
28+
contextual_thresholds = [round(x * 0.05, 2) for x in range(11, 16)]
29+
30+
# Step 1: Define Race Words and Token
31+
race_words = [
32+
"white",
33+
"caucasian",
34+
"black",
35+
"african",
36+
"asian",
37+
"latino",
38+
"latina",
39+
"latinx",
40+
"hispanic",
41+
"native",
42+
"indigenous",
43+
]
44+
race_token = "race"
45+
46+
47+
def calculate_lic(data_obj, processor, lic_model, mode="non-contextual", threshold=0.5):
48+
print(
49+
f"\nCalculating LIC for mode: {mode}, Threshold: {threshold if threshold else 'N/A'}"
50+
)
51+
52+
combined_data = data_obj.getDataCombined()
53+
print("\nLoaded Combined Dataset:")
54+
print(f"Total Samples: {len(combined_data)}")
55+
56+
# Extract Features
57+
human_ann = combined_data["caption_human"]
58+
model_ann = combined_data["caption_model"]
59+
feat = torch.tensor(
60+
combined_data["race"].values, dtype=torch.float, device=device
61+
).reshape(-1, 1)
62+
63+
print("\nPreprocessing Captions...")
64+
65+
# Calculate LIC Score
66+
67+
lic_score = lic_model.getAmortizedLeakage(
68+
feat, human_ann, model_ann, normalized=False
69+
)
70+
print(f"\nLIC Score for mode {mode}, Threshold {threshold}: {lic_score}")
71+
return lic_score
72+
73+
74+
def main():
75+
parser = argparse.ArgumentParser(
76+
description="Test LIC and Contextual LIC calculations for race"
77+
)
78+
parser.add_argument(
79+
"--human_path", required=True, help="Path to human annotations pickle file"
80+
)
81+
parser.add_argument(
82+
"--model_path", required=True, help="Path to model annotations pickle file"
83+
)
84+
parser.add_argument(
85+
"--glove_path",
86+
required=True,
87+
help="Path to GloVe embeddings in word2vec format",
88+
)
89+
parser.add_argument(
90+
"--output_file", default="lic_scores_race.csv", help="Output file to save LIC scores"
91+
)
92+
parser.add_argument(
93+
"--mode",
94+
required=True,
95+
choices=["contextual", "non-contextual"],
96+
help="Choose mode: 'contextual' or 'non-contextual'",
97+
)
98+
parser.add_argument(
99+
"--use_rnn", action="store_true", help="Use RNN instead of LSTM"
100+
)
101+
parser.add_argument(
102+
"--bidirectional", action="store_true", help="Use bidirectional LSTM/RNN"
103+
)
104+
parser.add_argument(
105+
"--seed",
106+
default=0,
107+
help="Set random seed for the experiment. Helps ensure reproducability.",
108+
)
109+
args = parser.parse_args()
110+
111+
# Setting random seed
112+
random.seed(args.seed)
113+
np.random.seed(args.seed)
114+
torch.manual_seed(args.seed)
115+
116+
# Initialize objects
117+
data_obj = CaptionRaceDataset(args.human_path, args.model_path)
118+
processor = CaptionProcessor(
119+
gender_words=race_words,
120+
obj_words=[],
121+
glove_path=args.glove_path,
122+
tokenizer="nltk",
123+
gender_token=race_token,
124+
)
125+
126+
if args.use_rnn:
127+
model_type = RNN_ANN_Model
128+
model_params = {
129+
"embedding_dim": 250,
130+
"pad_idx": 0,
131+
"rnn_hidden_size": 256,
132+
"rnn_num_layers": 2,
133+
"rnn_bidirectional": args.bidirectional,
134+
"ann_output_size": 1,
135+
"num_ann_layers": 5,
136+
"ann_numFirst": 64,
137+
}
138+
139+
else:
140+
model_type = LSTM_ANN_Model
141+
model_params = {
142+
"embedding_dim": 250,
143+
"pad_idx": 0,
144+
"lstm_hidden_size": 256,
145+
"lstm_num_layers": 2,
146+
"lstm_bidirectional": args.bidirectional,
147+
"ann_output_size": 1,
148+
"num_ann_layers": 5,
149+
"ann_numFirst": 64,
150+
}
151+
152+
# Initialize LIC
153+
lic_model = LIC(
154+
model_params={
155+
"attacker_class": model_type,
156+
"attacker_params": model_params,
157+
},
158+
train_params={
159+
"learning_rate": 0.01,
160+
"loss_function": "bce",
161+
"epochs": 50,
162+
"batch_size": 1024,
163+
},
164+
gender_words=race_words,
165+
obj_words=[],
166+
gender_token=race_token,
167+
obj_token="obj",
168+
glove_path=args.glove_path,
169+
device=device,
170+
eval_metric="bce",
171+
)
172+
173+
# Initialize results storage
174+
results = []
175+
176+
if args.mode == "non-contextual":
177+
non_contextual_lic = calculate_lic(
178+
data_obj, processor, lic_model, mode="non-contextual"
179+
)
180+
results.append(
181+
{
182+
"mode": "non-contextual",
183+
"threshold": "N/A",
184+
"lic_score_mean": non_contextual_lic["Mean"].item(),
185+
"lic_score_std_dev": non_contextual_lic["std"].item(),
186+
"Number of Trials": non_contextual_lic["num_trials"],
187+
}
188+
)
189+
190+
elif args.mode == "contextual":
191+
for threshold in contextual_thresholds:
192+
contextual_lic = calculate_lic(
193+
data_obj, processor, lic_model, mode="contextual", threshold=threshold
194+
)
195+
results.append(
196+
{
197+
"mode": "contextual",
198+
"threshold": threshold,
199+
"lic_score_mean": contextual_lic["Mean"].item(),
200+
"lic_score_std_dev": contextual_lic["std"].item(),
201+
"Number of Trials": contextual_lic["num_trials"],
202+
}
203+
)
204+
205+
# Save results to CSV
206+
output_dir = "/".join(args.output_file.split("/")[:-1])
207+
if not (os.path.isdir(output_dir)):
208+
os.makedirs(output_dir)
209+
results_df = pd.DataFrame(results)
210+
results_df.to_csv(args.output_file, index=False)
211+
print(f"\nResults saved to {args.output_file}")
212+
213+
214+
if __name__ == "__main__":
215+
main()

0 commit comments

Comments
 (0)