From bf77c5c2139b9a4f14292717620ee53ab977c173 Mon Sep 17 00:00:00 2001 From: Dev Studyyy Date: Sat, 24 Jan 2026 13:09:24 +0530 Subject: [PATCH 1/2] Fix: Prevent crash in 01_data_io_demo.py when pyarrow is missing --- demos/01_data_io_demo.py | 59 ++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/demos/01_data_io_demo.py b/demos/01_data_io_demo.py index 1071feb..7e00972 100644 --- a/demos/01_data_io_demo.py +++ b/demos/01_data_io_demo.py @@ -27,28 +27,34 @@ def demo_basic_loading(): sample_data.to_csv('temp_data/sample.csv', index=False) sample_data.to_excel('temp_data/sample.xlsx', index=False) sample_data.to_json('temp_data/sample.json', orient='records') - sample_data.to_parquet('temp_data/sample.parquet', index=False) - + # Load from CSV print("\nšŸ“ Loading CSV file...") df_csv = load('temp_data/sample.csv') print(f"āœ“ Loaded {len(df_csv)} rows from CSV") print(df_csv.head()) - + # Load from Excel print("\nšŸ“Š Loading Excel file...") df_excel = load('temp_data/sample.xlsx') print(f"āœ“ Loaded {len(df_excel)} rows from Excel") - + # Load from JSON print("\nšŸ“‹ Loading JSON file...") df_json = load('temp_data/sample.json') print(f"āœ“ Loaded {len(df_json)} rows from JSON") - - # Load from Parquet + + # Load from Parquet (Safely wrapped to prevent crashes) print("\nšŸ—‚ļø Loading Parquet file...") - df_parquet = load('temp_data/sample.parquet') - print(f"āœ“ Loaded {len(df_parquet)} rows from Parquet") + try: + # We attempt to save AND load here so we catch the missing engine error + sample_data.to_parquet('temp_data/sample.parquet', index=False) + df_parquet = load('temp_data/sample.parquet') + print(f"āœ“ Loaded {len(df_parquet)} rows from Parquet") + except ImportError: + print("āš ļø Skipped Parquet demo: 'pyarrow' or 'fastparquet' not installed.") + except Exception as e: + print(f"āš ļø Skipped Parquet demo: {str(e)}") def demo_folder_loading(): @@ -56,21 +62,21 @@ def demo_folder_loading(): print("\n" + "=" * 60) print("DEMO 2: Batch Loading from Folder") print("=" * 60) - + # Create multiple CSV files os.makedirs('temp_data/batch', exist_ok=True) - + for i in range(3): df = pd.DataFrame({ 'id': range(i*10, (i+1)*10), 'value': range(100, 110) }) df.to_csv(f'temp_data/batch/file_{i+1}.csv', index=False) - + print("\nšŸ“‚ Loading all CSV files from folder...") dfs = read_folder('temp_data/batch', file_type='csv') print(f"āœ“ Loaded {len(dfs)} files") - + for i, df in enumerate(dfs, 1): print(f" File {i}: {len(df)} rows") @@ -80,34 +86,39 @@ def demo_save_operations(): print("\n" + "=" * 60) print("DEMO 3: Saving Data") print("=" * 60) - + # Create sample data df = pd.DataFrame({ 'x': range(1, 6), 'y': [10, 20, 30, 40, 50] }) - + os.makedirs('temp_data/output', exist_ok=True) - + # Save as CSV print("\nšŸ’¾ Saving as CSV...") save(df, 'temp_data/output/result.csv') print("āœ“ Saved to result.csv") - + # Save as Excel print("\nšŸ’¾ Saving as Excel...") save(df, 'temp_data/output/result.xlsx') print("āœ“ Saved to result.xlsx") - + # Save as JSON print("\nšŸ’¾ Saving as JSON...") save(df, 'temp_data/output/result.json') print("āœ“ Saved to result.json") - - # Save as Parquet + + # Save as Parquet (Safely wrapped to prevent crashes) print("\nšŸ’¾ Saving as Parquet...") - save(df, 'temp_data/output/result.parquet') - print("āœ“ Saved to result.parquet") + try: + save(df, 'temp_data/output/result.parquet') + print("āœ“ Saved to result.parquet") + except ImportError: + print("āš ļø Skipped Parquet save: 'pyarrow' or 'fastparquet' not installed.") + except Exception as e: + print(f"āš ļø Skipped Parquet save: {str(e)}") def cleanup(): @@ -122,14 +133,14 @@ def cleanup(): print("\n" + "šŸš€" * 30) print("DATA I/O OPERATIONS DEMO".center(60)) print("šŸš€" * 30 + "\n") - + try: demo_basic_loading() demo_folder_loading() demo_save_operations() finally: cleanup() - + print("\n" + "āœ…" * 30) print("ALL DEMOS COMPLETED".center(60)) - print("āœ…" * 30 + "\n") + print("āœ…" * 30 + "\n") \ No newline at end of file From b478503437e13959c95dac148a9e8f0dc593ad6d Mon Sep 17 00:00:00 2001 From: Dev Studyyy Date: Sat, 24 Jan 2026 14:19:56 +0530 Subject: [PATCH 2/2] Fix: Handle float return type in demo_health_check to prevent AttributeError --- demos/03_eda_demo.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/demos/03_eda_demo.py b/demos/03_eda_demo.py index a19bacd..2b131f7 100644 --- a/demos/03_eda_demo.py +++ b/demos/03_eda_demo.py @@ -84,9 +84,19 @@ def demo_health_check(): report = data_health_check(df) print("\nāœ“ Health Check Report:") - print(f" Missing Values: {report.get('missing_values', {})}") - print(f" Data Types: {report.get('dtypes', {})}") - print(f" Shape: {report.get('shape', {})}") + + # --- FIX START: Logic Bug Prevention --- + # The data_health_check function might return a float (score) instead of a dictionary. + # We check the type before trying to access keys to prevent an AttributeError. + if isinstance(report, dict): + print(f" Missing Values: {report.get('missing_values', {})}") + print(f" Data Types: {report.get('dtypes', {})}") + print(f" Shape: {report.get('shape', {})}") + else: + # Fallback if it returns a score (float/int) + print(f" Overall Health Score: {report}") + print(" (Detailed dictionary report was not returned by the function)") + # --- FIX END --- def demo_feature_analysis(): @@ -94,9 +104,9 @@ def demo_feature_analysis(): print("\n" + "=" * 60) print("DEMO 5: Feature Analysis Report") print("=" * 60) - + df = create_sample_data() - + print("\nšŸ“Š Generating feature analysis report...") try: report = feature_analysis_report(df, target_col='performance') @@ -110,13 +120,13 @@ def demo_feature_analysis(): print("\n" + "šŸ“Š" * 30) print("EXPLORATORY DATA ANALYSIS DEMO".center(60)) print("šŸ“Š" * 30 + "\n") - + demo_basic_stats() demo_quick_eda() demo_comprehensive_eda() demo_health_check() demo_feature_analysis() - + print("\n" + "āœ…" * 30) print("ALL DEMOS COMPLETED".center(60)) - print("āœ…" * 30 + "\n") + print("āœ…" * 30 + "\n") \ No newline at end of file