Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a1b5886
changed the name of dollarstreet research prompt in json file to doll…
Jul 11, 2025
25df3f7
Fixed env_readonly_files function in environment.py and modified doll…
Nov 1, 2025
43c34b7
Added new task: health_di_race
surbhir08 Nov 1, 2025
4d258ec
dollarstreet: modified train.py to correct relative path
surbhir08 Nov 1, 2025
c045b80
runner.py: printing only the first 10 items of read_only lists as the…
surbhir08 Nov 1, 2025
6333d38
multi_run_experiment: Now have a base path to which logs would be sav…
surbhir08 Nov 1, 2025
d8aa7b8
LLM.py fixed stop sequence to not include observation without colons
AymanBx Nov 2, 2025
68b4085
prepare.py: scaling and renaming columns, domain change- adult to hea…
surbhir08 Nov 4, 2025
2b2d079
Fixed env_read_only_files for all tasks
AymanBx Nov 6, 2025
b4fb6ec
dollarstreet: read_only_files had a tab at the end of one line causin…
AymanBx Nov 11, 2025
50efc22
Activate llm eval x5 times
AymanBx Nov 19, 2025
36f9624
new task: created new non descriptive task using adult data structure…
surbhir08 Nov 19, 2025
8c3707b
Removed unnecessary loading method of llama
AymanBx Dec 6, 2025
ca6b97b
Eval: Now using flags to enable\/disable types of eval. Also if resul…
AymanBx Dec 6, 2025
e823867
Eval: option formating to match the rest
AymanBx Dec 6, 2025
f008c09
fixed dollarstreet eval.py script
surbhir08 Jan 9, 2026
d147e1b
refactored prepare.py script for health-di-race task to include comme…
surbhir08 Jan 10, 2026
2a33702
updated research problem for nondescriptive task
surbhir08 Jan 16, 2026
580ae19
Added new task variations for both health and non_descriptive data an…
surbhir08 Jan 17, 2026
d888209
updated task.json with research problems wrt new health and non descr…
surbhir08 Jan 17, 2026
41661c5
tasks.json: Removed empty line after the final object
AymanBx Jan 19, 2026
596088d
updated prompt sensitivity problems in task.json
surbhir08 Jan 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 6 additions & 22 deletions fairnessBench/LLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,6 @@

# AS: Setup llama
loaded_hf_models = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
# Need export HF_HOME=/datasets/ai/llama3
# llama_= "meta-llama/Llama-3.3-70B-Instruct" # Gave us decent results.
# llama_= "meta-llama/Llama-3.1-405B-Instruct" # Terrible hallusinations
# llama_= "meta-llama/Llama-3.1-8B-Instruct" # Trying smaller models for test runs

tokenizer = AutoTokenizer.from_pretrained(llama_)
quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(llama_, quantization_config = quant_config, device_map="auto",torch_dtype=torch.float16)
loaded_hf_models = {"llama": (model, tokenizer)}
print(f"Loaded local {llama_} successfuly using device: {model.device}.")
except Exception as e:
print(f"Failed to load local llama - Current device:{device}\nIssue: {e}")


def complete_text_hf(prompt, stop_sequences=[], model="llama", max_tokens_to_sample = 2500, temperature=0.5, log_file=None, device=0, **kwargs):
if model in loaded_hf_models:
hf_model, tokenizer = loaded_hf_models[model]
Expand Down Expand Up @@ -606,13 +590,13 @@ def complete_text(prompt, log_file, model, device=0, **kwargs):

if model.startswith("claude"):
# use anthropic API
completion = complete_text_claude(prompt, stop_sequences=[anthropic.HUMAN_PROMPT,"Observation:", "Observation"], log_file=log_file, model=model, **kwargs)
completion = complete_text_claude(prompt, stop_sequences=[anthropic.HUMAN_PROMPT,"Observation:"], log_file=log_file, model=model, **kwargs)
elif model.startswith("gemini"):
completion = complete_text_gemini(prompt, stop_sequences=["Observation:", "Observation"], log_file=log_file, model=model, **kwargs)
completion = complete_text_gemini(prompt, stop_sequences=["Observation:"], log_file=log_file, model=model, **kwargs)
elif model.startswith("llama"):
completion = complete_text_hf(prompt, stop_sequences=["Observation:", "Observation"], log_file=log_file, model=model, device=device, **kwargs)
completion = complete_text_hf(prompt, stop_sequences=["Observation:"], log_file=log_file, model=model, device=device, **kwargs)
elif model.startswith("qwen"):
completion = complete_text_qwen(prompt, stop_sequences=["Observation:", "Observation"], log_file=log_file, model=model, device=device, **kwargs)
completion = complete_text_qwen(prompt, stop_sequences=["Observation:"], log_file=log_file, model=model, device=device, **kwargs)
elif model.startswith("granite"):
completion = complete_text_granite(prompt, stop_sequences=["}"], log_file=log_file, model=model, device=device, **kwargs)
elif model.startswith("deepseek"):
Expand All @@ -621,10 +605,10 @@ def complete_text(prompt, log_file, model, device=0, **kwargs):
completion = complete_text_gemma(prompt, stop_sequences=["}"], log_file=log_file, model=model, device=device, **kwargs)
elif "/" in model:
# use CRFM API since this specifies organization like "openai/..."
completion = complete_text_crfm(prompt, stop_sequences=["Observation:", "Observation"], log_file=log_file, model=model, **kwargs)
completion = complete_text_crfm(prompt, stop_sequences=["Observation:"], log_file=log_file, model=model, **kwargs)
else:
# use OpenAI API
completion = complete_text_openai(prompt, stop_sequences=["Observation:", "Observation"], log_file=log_file, model=model, **kwargs)
completion = complete_text_openai(prompt, stop_sequences=["Observation:"], log_file=log_file, model=model, **kwargs)
return completion


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
4 changes: 2 additions & 2 deletions fairnessBench/benchmarks/dollarstreet-income/env/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def __getitem__(self, idx):
train_csv = "./images_v2_imagenet_train.csv"
test_csv = "./images_v2_imagenet_test.csv"

train_dataset = DollarStreetDataset(train_csv, ".", transform)
test_dataset = DollarStreetDataset(test_csv, ".", transform)
train_dataset = DollarStreetDataset(csv_path=train_csv, root_dir=".", transform=transform)
test_dataset = DollarStreetDataset(csv_path = test_csv, root_dir=".", transform=transform)
num_classes = len(train_dataset.synset2idx)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
assets/*
./images_v2_imagenet_test.csv
./topics_to_imagenet_classes_map.json
./README.md
./images_v2.csv
./images_v2_imagenet_train.csv
7 changes: 4 additions & 3 deletions fairnessBench/benchmarks/dollarstreet-income/scripts/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@

def get_score(agent_files):
submission_path = os.path.join(agent_files, "submission.csv")
submission_df= pd.read_csv(submission_path, index_col=0)
submission_df= pd.read_csv(submission_path)

test_df = pd.read_csv(os.path.join("../env", "images_v2_imagenet_test.csv"))

test_df["imagenet_sysnet_id"] = test_df["imagenet_sysnet_id"].apply(ast.literal_eval)
test_df["synset"] = test_df["imagenet_sysnet_id"].apply(lambda x: x[0])

all_synsets = sorted(set(test_df["synset"]))
synset2idx = {syn: idx for idx, syn in enumerate(all_synsets)}
test_df["true_label_idx"] = test_df["synset"].map(synset2idx)

submission_df["predicted_label"] = submission_df.values.argmax(axis=1)
test_df["predicted_label"] = submission_df["predicted_label"]
test_df["predicted_label"] = submission_df.values.argmax(axis=1)

test_df["correct"] = test_df["predicted_label"] == test_df["true_label_idx"]
test_df["income_q"] = pd.qcut(test_df["income"], q=4, labels=["Q1", "Q2", "Q3", "Q4"])
test_df["income_group"] = test_df["income_q"].apply(lambda q: "Disadvantaged" if q == "Q1" else "Advantaged")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
./assets/*
./images_v2_imagenet_test.csv
assets/*
./images_v2_imagenet_test.csv
./topics_to_imagenet_classes_map.json
./README.md
./images_v2.csv
./images_v2_imagenet_train.csv
./images_v2_imagenet_train.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
train_X.csv
train_y.csv
test_X.csv
test_y.csv
./train_X.csv
./train_y.csv
./test_X.csv
./test_y.csv
125 changes: 125 additions & 0 deletions fairnessBench/benchmarks/health-di-race/env/data_description.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
patient_age: Integer

lifestyle: Categorical feature
Sedentary
Moderately-active
Active
Highly-active
Unhealthy
Healthy
At-risk

insurance_provider : Categorical
UnitedHealthcare
Blue Cross Blue Shield
Aetna
Cigna
Humana
Kaiser Permanente
Anthem
Molina Healthcare
Centene Corporation
Highmark Health
Health Net
WellCare Health Plans
CareFirst
Independence Blue Cross
Fidelis Care
Oscar Health

marital-status: Categorical
Married-civ-spouse,
Divorced,
Never-married,
Separated,
Widowed,
Married-spouse-absent,
Married-AF-spouse

procedure_on_the_visit : Categorical
Administrative-check
Specialist-consultation
Minor-procedure
Major-surgery
General-checkup
Diagnostic-test
Rehabilitation
Emergency
Preventive-screening
Therapy-session
Follow-up
Mental-health-consultation
Chronic-disease-management
Vaccination

relationship : Categorical
Wife,
Own-child,
Husband,
Not-in-family,
Other-relative,
Unmarried

race : Categorical
White,
Asian-Pac-Islander,
Amer-Indian-Eskimo,
Other,
Black

gender : Binary
Female
Male

cholesterol : Integer

blood-pressure : Integer

daily-activity-hours : Integer

reason-for-visit : Categorical
General-checkup
Routine-follow-up
New-symptom
Fever
Cold-Flu-Cough
Headache-migraine
Heart-concerns
Respiratory-issue
Digestive-issue
Pain-related
Skin-concern
Eye
ENT
Mental-health-concerns
Stress
Fatigue-weakness
Weight-concerns
Diabetes/Bloodsugar
BP-concern
Cholesterol-lipid-concern
Allergy-symptoms
Prenatal
Gynecological-concern
Pediatrics
Age-related
Chronic-disease
Medication-side-effect
Injury
Trauma
Preventive-counseling
Genetic-risks
Vaccination-inquiry
Health-counseling
Substance-concern
Surgical-consultation
Specialist-referral
Discharge
Lab-result-discussion'
Second-opinion
Administrative-inquiry
Other

health_risk : binary target
1
0
Loading