-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_nan_check_one_csv.py
More file actions
37 lines (29 loc) · 1.01 KB
/
test_nan_check_one_csv.py
File metadata and controls
37 lines (29 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
import numpy as np
import math
# === SET YOUR FILE PATH HERE ===
CSV_PATH = "datasets/refined/og_unified/unified_dataset.csv" # <-- change this to your actual CSV file
# === Load CSV ===
try:
df = pd.read_csv(CSV_PATH)
except Exception as e:
print(f"❌ Failed to load CSV: {e}")
exit()
# === Check for problematic values per column ===
print("🔍 Scanning for null, NaN, and inf values...\n")
issues_found = False
for col in df.columns:
null_count = df[col].isnull().sum()
inf_count = 0
# Only check for inf in numeric columns
if pd.api.types.is_numeric_dtype(df[col]):
inf_count = np.isinf(df[col]).sum()
total_issues = null_count + inf_count
if total_issues > 0:
issues_found = True
print(f"⚠️ Column: {col}")
print(f" - null / NaN: {null_count}")
print(f" - inf: {inf_count}")
print(f" - total bad rows: {total_issues}\n")
if not issues_found:
print("✅ No null, NaN, or inf values found in any column.")