demos/01_data_io_demo.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -27,50 +27,56 @@ def demo_basic_loading():
  
        sample_data.to_csv('temp_data/sample.csv', index=False)

        sample_data.to_excel('temp_data/sample.xlsx', index=False)

        sample_data.to_json('temp_data/sample.json', orient='records')

        sample_data.to_parquet('temp_data/sample.parquet', index=False)

        # Load from CSV

        print("\n📁 Loading CSV file...")

        df_csv = load('temp_data/sample.csv')

        print(f"✓ Loaded {len(df_csv)} rows from CSV")

        print(df_csv.head())

        # Load from Excel

        print("\n📊 Loading Excel file...")

        df_excel = load('temp_data/sample.xlsx')

        print(f"✓ Loaded {len(df_excel)} rows from Excel")

        # Load from JSON

        print("\n📋 Loading JSON file...")

        df_json = load('temp_data/sample.json')

        print(f"✓ Loaded {len(df_json)} rows from JSON")

        # Load from Parquet

        # Load from Parquet (Safely wrapped to prevent crashes)

        print("\n🗂️ Loading Parquet file...")

        df_parquet = load('temp_data/sample.parquet')

        print(f"✓ Loaded {len(df_parquet)} rows from Parquet")

        try:

            # We attempt to save AND load here so we catch the missing engine error

            sample_data.to_parquet('temp_data/sample.parquet', index=False)

            df_parquet = load('temp_data/sample.parquet')

            print(f"✓ Loaded {len(df_parquet)} rows from Parquet")

        except ImportError:

            print("⚠️ Skipped Parquet demo: 'pyarrow' or 'fastparquet' not installed.")

        except Exception as e:

            print(f"⚠️ Skipped Parquet demo: {str(e)}")

    def demo_folder_loading():

        """Demo 2: Batch load multiple files from folder"""

        print("\n" + "=" * 60)

        print("DEMO 2: Batch Loading from Folder")

        print("=" * 60)

        # Create multiple CSV files

        os.makedirs('temp_data/batch', exist_ok=True)

        for i in range(3):

            df = pd.DataFrame({

                'id': range(i*10, (i+1)*10),

                'value': range(100, 110)

            })

            df.to_csv(f'temp_data/batch/file_{i+1}.csv', index=False)

        print("\n📂 Loading all CSV files from folder...")

        dfs = read_folder('temp_data/batch', file_type='csv')

        print(f"✓ Loaded {len(dfs)} files")

        for i, df in enumerate(dfs, 1):

            print(f"  File {i}: {len(df)} rows")

    @@ -80,34 +86,39 @@ def demo_save_operations():
  
        print("\n" + "=" * 60)

        print("DEMO 3: Saving Data")

        print("=" * 60)

        # Create sample data

        df = pd.DataFrame({

            'x': range(1, 6),

            'y': [10, 20, 30, 40, 50]

        })

        os.makedirs('temp_data/output', exist_ok=True)

        # Save as CSV

        print("\n💾 Saving as CSV...")

        save(df, 'temp_data/output/result.csv')

        print("✓ Saved to result.csv")

        # Save as Excel

        print("\n💾 Saving as Excel...")

        save(df, 'temp_data/output/result.xlsx')

        print("✓ Saved to result.xlsx")

        # Save as JSON

        print("\n💾 Saving as JSON...")

        save(df, 'temp_data/output/result.json')

        print("✓ Saved to result.json")

        # Save as Parquet

        # Save as Parquet (Safely wrapped to prevent crashes)

        print("\n💾 Saving as Parquet...")

        save(df, 'temp_data/output/result.parquet')

        print("✓ Saved to result.parquet")

        try:

            save(df, 'temp_data/output/result.parquet')

            print("✓ Saved to result.parquet")

        except ImportError:

            print("⚠️ Skipped Parquet save: 'pyarrow' or 'fastparquet' not installed.")

        except Exception as e:

            print(f"⚠️ Skipped Parquet save: {str(e)}")

    def cleanup():

    @@ -122,14 +133,14 @@ def cleanup():
  
        print("\n" + "🚀" * 30)

        print("DATA I/O OPERATIONS DEMO".center(60))

        print("🚀" * 30 + "\n")

        try:

            demo_basic_loading()

            demo_folder_loading()

            demo_save_operations()

        finally:

            cleanup()

        print("\n" + "✅" * 30)

        print("ALL DEMOS COMPLETED".center(60))

        print("✅" * 30 + "\n")

        print("✅" * 30 + "\n")

demos/03_eda_demo.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -84,19 +84,29 @@ def demo_health_check():
  
        report = data_health_check(df)

        print("\n✓ Health Check Report:")

        print(f"  Missing Values: {report.get('missing_values', {})}")

        print(f"  Data Types: {report.get('dtypes', {})}")

        print(f"  Shape: {report.get('shape', {})}")

        # --- FIX START: Logic Bug Prevention ---

        # The data_health_check function might return a float (score) instead of a dictionary.

        # We check the type before trying to access keys to prevent an AttributeError.

        if isinstance(report, dict):

            print(f"  Missing Values: {report.get('missing_values', {})}")

            print(f"  Data Types: {report.get('dtypes', {})}")

            print(f"  Shape: {report.get('shape', {})}")

        else:

            # Fallback if it returns a score (float/int)

            print(f"  Overall Health Score: {report}")

            print("  (Detailed dictionary report was not returned by the function)")

        # --- FIX END ---

    def demo_feature_analysis():

        """Demo 5: Feature analysis report"""

        print("\n" + "=" * 60)

        print("DEMO 5: Feature Analysis Report")

        print("=" * 60)

        df = create_sample_data()

        print("\n📊 Generating feature analysis report...")

        try:

            report = feature_analysis_report(df, target_col='performance')

    @@ -110,13 +120,13 @@ def demo_feature_analysis():
  
        print("\n" + "📊" * 30)

        print("EXPLORATORY DATA ANALYSIS DEMO".center(60))

        print("📊" * 30 + "\n")

        demo_basic_stats()

        demo_quick_eda()

        demo_comprehensive_eda()

        demo_health_check()

        demo_feature_analysis()

        print("\n" + "✅" * 30)

        print("ALL DEMOS COMPLETED".center(60))

        print("✅" * 30 + "\n")

        print("✅" * 30 + "\n")

Fix: Improve robustness in 06_modeling_demo.py (Types, Dependencies, & Pandas Ambiguity)Fix modeling demo types #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

xx-devvv wants to merge 3 commits into Programmers-Paradise:main from xx-devvv:fix-modeling-demo-types

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix: Improve robustness in 06_modeling_demo.py (Types, Dependencies, & Pandas Ambiguity)Fix modeling demo types #16

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Fix: Improve robustness in 06_modeling_demo.py (Types, Dependencies, & Pandas Ambiguity)Fix modeling demo types #16

Are you sure you want to change the base?

Uh oh!

Fix: Improve robustness in 06_modeling_demo.py (Types, Dependencies, & Pandas Ambiguity)Fix modeling demo types #16

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!