Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 35 additions & 24 deletions demos/01_data_io_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,50 +27,56 @@ def demo_basic_loading():
sample_data.to_csv('temp_data/sample.csv', index=False)
sample_data.to_excel('temp_data/sample.xlsx', index=False)
sample_data.to_json('temp_data/sample.json', orient='records')
sample_data.to_parquet('temp_data/sample.parquet', index=False)


# Load from CSV
print("\n📁 Loading CSV file...")
df_csv = load('temp_data/sample.csv')
print(f"✓ Loaded {len(df_csv)} rows from CSV")
print(df_csv.head())

# Load from Excel
print("\n📊 Loading Excel file...")
df_excel = load('temp_data/sample.xlsx')
print(f"✓ Loaded {len(df_excel)} rows from Excel")

# Load from JSON
print("\n📋 Loading JSON file...")
df_json = load('temp_data/sample.json')
print(f"✓ Loaded {len(df_json)} rows from JSON")
# Load from Parquet

# Load from Parquet (Safely wrapped to prevent crashes)
print("\n🗂️ Loading Parquet file...")
df_parquet = load('temp_data/sample.parquet')
print(f"✓ Loaded {len(df_parquet)} rows from Parquet")
try:
# We attempt to save AND load here so we catch the missing engine error
sample_data.to_parquet('temp_data/sample.parquet', index=False)
df_parquet = load('temp_data/sample.parquet')
print(f"✓ Loaded {len(df_parquet)} rows from Parquet")
except ImportError:
print("⚠️ Skipped Parquet demo: 'pyarrow' or 'fastparquet' not installed.")
except Exception as e:
print(f"⚠️ Skipped Parquet demo: {str(e)}")


def demo_folder_loading():
"""Demo 2: Batch load multiple files from folder"""
print("\n" + "=" * 60)
print("DEMO 2: Batch Loading from Folder")
print("=" * 60)

# Create multiple CSV files
os.makedirs('temp_data/batch', exist_ok=True)

for i in range(3):
df = pd.DataFrame({
'id': range(i*10, (i+1)*10),
'value': range(100, 110)
})
df.to_csv(f'temp_data/batch/file_{i+1}.csv', index=False)

print("\n📂 Loading all CSV files from folder...")
dfs = read_folder('temp_data/batch', file_type='csv')
print(f"✓ Loaded {len(dfs)} files")

for i, df in enumerate(dfs, 1):
print(f" File {i}: {len(df)} rows")

Expand All @@ -80,34 +86,39 @@ def demo_save_operations():
print("\n" + "=" * 60)
print("DEMO 3: Saving Data")
print("=" * 60)

# Create sample data
df = pd.DataFrame({
'x': range(1, 6),
'y': [10, 20, 30, 40, 50]
})

os.makedirs('temp_data/output', exist_ok=True)

# Save as CSV
print("\n💾 Saving as CSV...")
save(df, 'temp_data/output/result.csv')
print("✓ Saved to result.csv")

# Save as Excel
print("\n💾 Saving as Excel...")
save(df, 'temp_data/output/result.xlsx')
print("✓ Saved to result.xlsx")

# Save as JSON
print("\n💾 Saving as JSON...")
save(df, 'temp_data/output/result.json')
print("✓ Saved to result.json")
# Save as Parquet

# Save as Parquet (Safely wrapped to prevent crashes)
print("\n💾 Saving as Parquet...")
save(df, 'temp_data/output/result.parquet')
print("✓ Saved to result.parquet")
try:
save(df, 'temp_data/output/result.parquet')
print("✓ Saved to result.parquet")
except ImportError:
print("⚠️ Skipped Parquet save: 'pyarrow' or 'fastparquet' not installed.")
except Exception as e:
print(f"⚠️ Skipped Parquet save: {str(e)}")


def cleanup():
Expand All @@ -122,14 +133,14 @@ def cleanup():
print("\n" + "🚀" * 30)
print("DATA I/O OPERATIONS DEMO".center(60))
print("🚀" * 30 + "\n")

try:
demo_basic_loading()
demo_folder_loading()
demo_save_operations()
finally:
cleanup()

print("\n" + "✅" * 30)
print("ALL DEMOS COMPLETED".center(60))
print("✅" * 30 + "\n")
print("✅" * 30 + "\n")
26 changes: 18 additions & 8 deletions demos/03_eda_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,29 @@ def demo_health_check():
report = data_health_check(df)

print("\n✓ Health Check Report:")
print(f" Missing Values: {report.get('missing_values', {})}")
print(f" Data Types: {report.get('dtypes', {})}")
print(f" Shape: {report.get('shape', {})}")

# --- FIX START: Logic Bug Prevention ---
# The data_health_check function might return a float (score) instead of a dictionary.
# We check the type before trying to access keys to prevent an AttributeError.
if isinstance(report, dict):
print(f" Missing Values: {report.get('missing_values', {})}")
print(f" Data Types: {report.get('dtypes', {})}")
print(f" Shape: {report.get('shape', {})}")
else:
# Fallback if it returns a score (float/int)
print(f" Overall Health Score: {report}")
print(" (Detailed dictionary report was not returned by the function)")
# --- FIX END ---


def demo_feature_analysis():
"""Demo 5: Feature analysis report"""
print("\n" + "=" * 60)
print("DEMO 5: Feature Analysis Report")
print("=" * 60)

df = create_sample_data()

print("\n📊 Generating feature analysis report...")
try:
report = feature_analysis_report(df, target_col='performance')
Expand All @@ -110,13 +120,13 @@ def demo_feature_analysis():
print("\n" + "📊" * 30)
print("EXPLORATORY DATA ANALYSIS DEMO".center(60))
print("📊" * 30 + "\n")

demo_basic_stats()
demo_quick_eda()
demo_comprehensive_eda()
demo_health_check()
demo_feature_analysis()

print("\n" + "✅" * 30)
print("ALL DEMOS COMPLETED".center(60))
print("✅" * 30 + "\n")
print("✅" * 30 + "\n")
Loading