FLock-io · astridesa · Jul 15, 2025 · Jul 15, 2025 · Jul 21, 2025 · Jul 21, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "githubPullRequests.ignoredPullRequestBranches": [
+        "main"
+    ]
+}
diff --git a/configs/onnx.json b/configs/onnx.json
@@ -0,0 +1,7 @@
+{
+    "per_device_eval_batch_size": 8,
+    "sequence_length": 64,
+    "model_type": "forecasting",
+    "evaluation_metrics": ["mae", "rmse", "mape", "smape"],
+    "output_dir": "/tmp/onnx_validation"
+}
diff --git a/test_onnx.py b/test_onnx.py
@@ -0,0 +1,186 @@
+import numpy as np
+import pandas as pd
+from unittest.mock import MagicMock, patch
+from pathlib import Path
+import sys
+
+
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+
+from validator.validation_runner import ValidationRunner
+from validator.modules.onnx import (
+    ONNXValidationModule,
+    ONNXConfig,
+    ONNXInputData,
+    ONNXMetrics,
+)
+
+
+def load_and_preprocess_demo_data():
+    """Load demo CSV data and apply feature engineering"""
+    from pathlib import Path
+
+    demo_path = (
+        Path(__file__).parent
+        / "validator"
+        / "modules"
+        / "onnx"
+        / "demo_data"
+        / "test.csv"
+    )
+
+    if not demo_path.exists():
+        print(f"Demo data not found at: {demo_path}")
+        return None
+
+    # Read the demo CSV file
+    df = pd.read_csv(demo_path)
+    print(f"Loaded demo data with shape: {df.shape}")
+    print(f"Original columns: {df.columns.tolist()}")
+
+    df["Date"] = pd.to_datetime(df["Date"])
+    df["year"] = df["Date"].dt.year
+    df["month"] = df["Date"].dt.month
+    df["day"] = df["Date"].dt.day
+    df["dayofweek"] = df["Date"].dt.dayofweek
+    df["dayofyear"] = df["Date"].dt.dayofyear
+
+    df = df.sort_values(["store", "product", "Date"])
+
+    for lag in [1, 2, 3, 7]:
+        df[f"number_sold_lag_{lag}"] = df.groupby(["store", "product"])[
+            "number_sold"
+        ].shift(lag)
+
+    for window in [3, 7, 14]:
+        df[f"number_sold_rolling_{window}"] = (
+            df.groupby(["store", "product"])["number_sold"]
+            .rolling(window=window)
+            .mean()
+            .values
+        )
+
+    df = df.dropna()
+
+    # Select only numerical feature columns
+    feature_columns = [
+        "store",
+        "product",
+        "year",
+        "month",
+        "day",
+        "dayofweek",
+        "dayofyear",
+        "number_sold_lag_1",
+        "number_sold_lag_2",
+        "number_sold_lag_3",
+        "number_sold_lag_7",
+        "number_sold_rolling_3",
+        "number_sold_rolling_7",
+        "number_sold_rolling_14",
+        "number_sold",  # Keep target column
+    ]
+
+    df_final = df[feature_columns]
+
+    print(f"After feature engineering: {df_final.shape}")
+    print(f"Final columns: {df_final.columns.tolist()}")
+
+    processed_csv = df_final.to_csv(index=False)
+    return processed_csv
+
+
+@patch("validator.validation_runner.FedLedger")
+@patch("requests.get")
+def test_onnx_validation_works(mock_requests, mock_fedledger):
+    """Test that ONNX validation can complete successfully using real HuggingFace model"""
+
+    test_csv = load_and_preprocess_demo_data()
+    if test_csv is None:
+        print("Failed to load demo data")
+        return False
+
+    # Mock API
+    mock_api = MagicMock()
+    mock_api.list_tasks.return_value = [
+        {"id": 1, "task_type": "onnx", "title": "Test", "data": {}}
+    ]
+    mock_api.mark_assignment_as_failed = MagicMock()
+    mock_fedledger.return_value = mock_api
+
+    # Mock HTTP requests for CSV data (use real HuggingFace download for model)
+    def mock_get_side_effect(url):
+        response = MagicMock()
+        response.raise_for_status.return_value = None
+        response.text = test_csv  # CSV contains both features and target
+        return response
+
+    mock_requests.side_effect = mock_get_side_effect
+
+    runner = ValidationRunner(
+        module="onnx",
+        task_ids=[1],
+        flock_api_key="test_key",
+        hf_token="test_token",
+        test_mode=True,
+    )
+
+    input_data = ONNXInputData(
+        model_repo_id="Fan9494/test_onnx",
+        model_filename="model.onnx",
+        revision="main",
+        test_data_url="https://example.com/test.csv",
+        target_column="number_sold",
+        task_type="forecasting",
+        task_id=1,
+        required_metrics=[
+            "mae",
+            "rmse",
+            "mape",
+            "smape",
+            "r2_score",
+            "directional_accuracy",
+        ],
+    )
+
+    # Perform validation
+    print("Running ONNX validation...")
+    metrics = runner.perform_validation("assignment_123", 1, input_data)
+
+    print(f"Validation result: {metrics}")
+
+    if metrics is None:
+        print("Validation returned None - something went wrong")
+        print("Checking mocks:")
+        print(f"  - HTTP requests called: {mock_requests.call_count}")
+        return False
+    else:
+        print("Validation completed successfully!")
+        print(f"   - Type: {type(metrics)}")
+        if hasattr(metrics, "mae"):
+            print(f"   - MAE: {metrics.mae}")
+        if hasattr(metrics, "rmse"):
+            print(f"   - RMSE: {metrics.rmse}")
+        if hasattr(metrics, "mape"):
+            print(f"   - MAPE: {metrics.mape}")
+        if hasattr(metrics, "smape"):
+            print(f"   - SMAPE: {metrics.smape}")
+
+        return True
+
+
+if __name__ == "__main__":
+    print("Testing ONNX Module")
+    print("=" * 50)
+
+    # Run tests
+    print()
+    test_passed = test_onnx_validation_works()
+
+    if test_passed:
+        print("\nAll ONNX tests passed!")
+        sys.exit(0)
+    else:
+        print("\nSome tests failed")
+        sys.exit(1)
diff --git a/validator/.DS_Store b/validator/.DS_Store
diff --git a/validator/modules/.DS_Store b/validator/modules/.DS_Store
diff --git a/validator/modules/base.py b/validator/modules/base.py
@@ -1,15 +1,19 @@
 from abc import ABC, abstractmethod
 from pydantic import BaseModel
 
+
 class BaseConfig(BaseModel, frozen=True):
     pass
 
+
 class BaseInputData(BaseModel, frozen=True):
     pass
 
+
 class BaseMetrics(BaseModel, frozen=True):
     pass
 
+
 class BaseValidationModule(ABC):
     config_schema: type[BaseConfig]
     input_data_schema: type[BaseInputData]
@@ -18,17 +22,13 @@ class BaseValidationModule(ABC):
 
     @abstractmethod
     def __init__(self, config: BaseConfig, **kwargs):
-        """
+        """.
-        """.
+        """
-        """.
+        """
         Perform any global, one-time setup needed for this module.
         """
         pass
 
     @abstractmethod
-    def validate(
-        self,
-        data: BaseInputData,
-        **kwargs
-    ) -> BaseMetrics:
+    def validate(self, data: BaseInputData, **kwargs) -> BaseMetrics:
         """
         Download/prep the repo/revision, run validation, and return metrics parsed into a Pydantic model.
         """
@@ -39,4 +39,4 @@ def cleanup(self):
         """
         Clean up any resources (e.g., temp files, models in memory).
         """
-        pass
+        pass
diff --git a/validator/modules/onnx/.DS_Store b/validator/modules/onnx/.DS_Store