metadetect-ai
diff --git a/‎bin/.DS_Store‎
4 KB b/‎bin/.DS_Store‎
4 KB
diff --git a/‎citations.md‎
Lines changed: 117 additions & 0 deletions b/‎citations.md‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎pom.xml‎
Lines changed: 11 additions & 0 deletions b/‎pom.xml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/main/java/dev/coms4156/project/metadetect/config/SecurityConfig.java‎
Lines changed: 22 additions & 3 deletions b/‎src/main/java/dev/coms4156/project/metadetect/config/SecurityConfig.java‎
Lines changed: 22 additions & 3 deletions
diff --git a/‎src/main/java/dev/coms4156/project/metadetect/dto/Dtos.java‎
Lines changed: 3 additions & 1 deletion b/‎src/main/java/dev/coms4156/project/metadetect/dto/Dtos.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/main/java/dev/coms4156/project/metadetect/service/AnalyzeService.java‎
Lines changed: 31 additions & 3 deletions b/‎src/main/java/dev/coms4156/project/metadetect/service/AnalyzeService.java‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎src/main/java/dev/coms4156/project/metadetect/service/FeatureExtractor.java‎
Lines changed: 2 additions & 0 deletions b/‎src/main/java/dev/coms4156/project/metadetect/service/FeatureExtractor.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/main/java/dev/coms4156/project/metadetect/service/LogisticRegressionService.java‎
Lines changed: 76 additions & 0 deletions b/‎src/main/java/dev/coms4156/project/metadetect/service/LogisticRegressionService.java‎
Lines changed: 76 additions & 0 deletions
@@ -1,3 +1,47 @@
+### **Commit / Ticket Reference**
+- **Commit:** test: add coverage for model loader and logistic regression service
+- **Ticket:** none
+- **Date:** 2026-02-17
+- **Team Member:** Jalen Stephens
+
+---
+
+### **AI Tool Information**
+- **Tool Used:** OpenAI ChatGPT (GPT-5) via Codex CLI
+- **Access Method:** Local Codex CLI (sandboxed; no paid API calls)
+- **Configuration:** Default model settings
+- **Cost:** $0 (course-provided access)
+
+---
+
+### **Purpose of AI Assistance**
+Added unit tests to raise branch/instruction coverage for model loading and logistic regression inference, including cache validation, path resolution, invalid weight handling, C2PA flag behavior, and sigmoid branches.
+
+---
+
+### **Prompts / Interaction Summary**
+- “give me a commit message and fill out a template in citations.md for the work we did”
+- “can you write test for these to increase branch and instruction coverage please”
+
+---
+
+### **Resulting Artifacts**
+- `src/test/java/dev/coms4156/project/metadetect/service/ModelLoaderTest.java`
+- `src/test/java/dev/coms4156/project/metadetect/service/LogisticRegressionServiceTest.java`
+- `src/test/resources/model/test-model.json`
+
+---
+
+### **Verification**
+- `./mvnw -q -Dtest=ModelLoaderTest,LogisticRegressionServiceTest test`
+
+---
+
+### **Attribution Statement**
+> Portions of this work were generated with assistance from OpenAI ChatGPT (GPT-5) on 2026-02-17. All AI-generated content was reviewed and finalized by the development team.
+
+---
+
 ### **Commit / Ticket Reference**
 - **Commit:** fix(storage): encode Supabase paths and normalize project base URL
 - **Ticket:** N/A (prod bugfix)
@@ -3062,3 +3106,76 @@ Expanded CI coverage and optional live E2E hook:
 > Portions of this work were generated with assistance from OpenAI ChatGPT (GPT-5) on 2026-02-17. All AI-generated content was reviewed and finalized by the development team.
 
 ---
+
+### **Commit / Ticket Reference**
+- **Commit:** [feat] Implemented and Trained Logistic Regression Model  
+- **Ticket:** (#66) Implementation of Logistic Regression ML Model for Confidence Score  
+- **Date:** 12/1/2025  
+- **Team Member:** Isaac Schmidt
+
+---
+
+### **AI Tool Information**
+- **Tool Used:** OpenAI ChatGPT (GPT-5.1)  
+- **Access Method:** ChatGPT Web (.edu academic access)  
+- **Configuration:** Default model settings  
+- **Cost:** $0 (no paid API calls)
+
+---
+
+### **Purpose of AI Assistance**
+AI assistance was used to design, structure, and validate the machine-learning component of the MetaDetect system. This included help with:
+- Creating a feature extraction–based ML pipeline for AI-image detection  
+- Designing the workflow for offline model training (without including Python code in the repository)  
+- Advising on the correct model type, dataset preparation, cross-validation strategy, and model export  
+- Generating the Java inference architecture (ModelLoader, LogisticRegressionModel, AnalyzeService integration)  
+- Debugging dataset preparation issues and ensuring compatibility between training-time features and runtime inference
+
+---
+
+### **Prompts / Interaction Summary**
+Key interactions included:
+- Requesting recommendations for ML models appropriate for OpenCV feature vectors  
+- Asking how to train a logistic regression model offline and export weights for Java inference  
+- Debugging DatasetBuilder and CSV formatting issues to generate valid ML training data  
+- Setting up cross-validation for model evaluation  
+- Requesting a final AnalyzeService integration that correctly combines C2PA overrides with ML fallback  
+- Asking how and where `model.json` should be loaded in the service layer  
+- Requesting fixes and refactoring for ModelLoader, LogisticRegressionService, and FeatureExtractor interactions  
+- Clarifying model runtime behavior, including how C2PA features interact with ML predictions
+
+---
+
+### **Resulting Artifacts**
+The following deliverables were created or refined with AI assistance:
+- **DatasetBuilder.java** — Generates ML-ready feature CSVs from raw images and metadata  
+- **train_model.py (offline use only)** — Script used externally to train the logistic regression model  
+- **export_model.py (offline use only)** — Exports trained LR weights to a Java-readable `model.json`  
+- **model.json** — Serialized logistic regression weights and bias used in production  
+- **LogisticRegressionModel.java** — Runtime inference implementation compatible with exported weights  
+- **ModelLoader.java** — Loads `model.json` from classpath and constructs the inference model  
+- **LogisticRegressionService.java** — Bridges feature extraction and ML prediction  
+- **Updated AnalyzeService.java** — Integrates C2PA logic + ML fallback with clear override hierarchy  
+- Various debugging utilities, architectural recommendations, and corrections to CSV parsing logic
+
+---
+
+### **Verification**
+AI-assisted work was validated by:
+- Manual inspection and testing of DatasetBuilder output  
+- Successful cross-validation runs on ~80,000 training samples  
+- Confirming stable and consistent LR validation metrics across folds  
+- Verifying that exported weights from Python produced correct inference behavior in Java  
+- Manually testing AnalyzeService end-to-end with multiple categories of images:
+  - Images with valid AI manifests  
+  - Images with valid camera manifests  
+  - Images with no C2PA manifest  
+  - Images with corrupted or tampered manifests  
+- Ensuring the Java inference pipeline correctly loads model.json from classpath and returns deterministic probability scores
+
+---
+
+### **Attribution Statement**
+> Portions of this commit or configuration were generated with assistance from OpenAI ChatGPT (GPT-5) on 12/1/2025. All AI-generated content was reviewed, verified, and finalized by the development team.
+
+---
@@ -226,6 +226,17 @@
                 </configuration>
             </plugin>
 
+            <!-- ML Model -->
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>3.1.0</version>
+                <configuration>
+                    <cleanupDaemonThreads>false</cleanupDaemonThreads>
+                </configuration>
+            </plugin>
+
+
             <!-- PMD -->
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
 
@@ -8,6 +8,7 @@
 import org.springframework.context.annotation.Configuration;
 import org.springframework.core.annotation.Order;
 import org.springframework.http.HttpMethod;
+import org.springframework.http.HttpStatus;
 import org.springframework.security.config.Customizer;
 import org.springframework.security.config.annotation.web.builders.HttpSecurity;
 import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
@@ -19,6 +20,7 @@
 import org.springframework.security.oauth2.jwt.JwtValidators;
 import org.springframework.security.oauth2.jwt.NimbusJwtDecoder;
 import org.springframework.security.web.SecurityFilterChain;
+import org.springframework.security.web.authentication.HttpStatusEntryPoint;
 import org.springframework.web.cors.CorsConfiguration;
 import org.springframework.web.cors.CorsConfigurationSource;
 import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
@@ -54,6 +56,8 @@ public SecurityFilterChain apiSecurityFilterChain(HttpSecurity http) throws Exce
             // Everything else under /api/** requires auth
             .anyRequest().authenticated()
         )
+        .exceptionHandling(e -> e.authenticationEntryPoint(
+            new HttpStatusEntryPoint(HttpStatus.UNAUTHORIZED)))
         .oauth2ResourceServer(oauth -> oauth.jwt(Customizer.withDefaults()));
 
     return http.build();
@@ -90,12 +94,27 @@ public SecurityFilterChain webSecurityFilterChain(HttpSecurity http) throws Exce
                   "/js/**",
                   "/images/**",
                   "/fonts/**",
-                  "/webjars/**"
+                  "/webjars/**",
+
+                  // Swagger / OpenAPI docs
+                  "/swagger-ui.html",
+                  "/swagger-ui/**",
+                  "/v3/api-docs/**",
+                  "/api-docs/**"
+              ).permitAll()
+
+              // Public non-API endpoints (health/auth pages used by tests + clients)
+              .requestMatchers(
+                  "/health",
+                  "/actuator/**",
+                  "/auth/**"
               ).permitAll()
 
-              // Everything else (non-API) is allowed
-              .anyRequest().permitAll()
+              // Everything else (non-API) requires authentication
+              .anyRequest().authenticated()
         );
+    http.exceptionHandling(e -> e.authenticationEntryPoint(
+        new HttpStatusEntryPoint(HttpStatus.UNAUTHORIZED)));
 
     return http.build();
   }
 
@@ -67,7 +67,9 @@ public record AnalysisManifestResponse(
   public record AnalyzeConfidenceResponse(
       String analysisId,
       String status,
-      Double score              // nullable until we implement a real scorer
+      Double confidenceScore,
+      boolean c2paUsed,
+      String modelVersion
   ) { }
 
   /**
 
@@ -2,13 +2,15 @@
 
 import static dev.coms4156.project.metadetect.model.AnalysisReport.ReportStatus;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import dev.coms4156.project.metadetect.c2pa.C2paToolInvoker;
 import dev.coms4156.project.metadetect.dto.Dtos;
 import dev.coms4156.project.metadetect.model.AnalysisReport;
 import dev.coms4156.project.metadetect.model.AnalysisReport.ReportStatus;
 import dev.coms4156.project.metadetect.model.Image;
 import dev.coms4156.project.metadetect.repository.AnalysisReportRepository;
+import dev.coms4156.project.metadetect.service.LogisticRegressionService.InferenceResult;
 import dev.coms4156.project.metadetect.service.errors.MissingStoragePathException;
 import dev.coms4156.project.metadetect.service.errors.NotFoundException;
 import java.io.File;
@@ -44,6 +46,7 @@ public class AnalyzeService {
   private final AnalysisReportRepository analysisRepo;
   private final SupabaseStorageService storage;
   private final UserService userService;
+  private final LogisticRegressionService logisticRegressionService;
   private final Clock clock;
 
   // Lightweight mapper for error JSON assembly.
@@ -64,12 +67,14 @@ public AnalyzeService(C2paToolInvoker c2paToolInvoker,
                         AnalysisReportRepository analysisRepo,
                         SupabaseStorageService storage,
                         UserService userService,
+                        LogisticRegressionService logisticRegressionService,
                         Clock clock) {
     this.c2paToolInvoker = c2paToolInvoker;
     this.imageService = imageService;
     this.analysisRepo = analysisRepo;
     this.storage = storage;
     this.userService = userService;
+    this.logisticRegressionService = logisticRegressionService;
     this.clock = clock;
   }
 
@@ -163,7 +168,9 @@ public Dtos.AnalyzeConfidenceResponse getConfidence(UUID analysisId) {
     return new Dtos.AnalyzeConfidenceResponse(
       report.getId().toString(),
       report.getStatus().name(),
-      report.getConfidence()   // null until a real scorer exists
+      report.getConfidence(),
+      deriveC2paUsed(report.getDetails()),
+      logisticRegressionService.getModelVersion()
     );
   }
 
@@ -207,11 +214,17 @@ private void runExtractionAndFinalize(UUID analysisId, String storagePath) {
       // 2) Run C2PA extraction into ML-ready metadata
       C2paToolInvoker.C2paMetadata meta = c2paToolInvoker.extractMetadata(tempFile);
 
-      // 3) Serialize metadata and mark COMPLETED
+      // 3) Compute logistic-regression score using OpenCV + C2PA features
+      InferenceResult inference = logisticRegressionService.predict(
+          tempFile.getAbsolutePath(),
+          meta
+      );
+
+      // 4) Serialize metadata and mark COMPLETED with a confidence score
       String json = objectMapper.writeValueAsString(meta);
 
       // The details field now stores the C2PA metadata schema, not raw manifest JSON.
-      markCompleted(analysisId, json, /*confidence*/ null);
+      markCompleted(analysisId, json, inference.confidenceScore());
 
     } catch (IOException ioe) {
       // IO-level failures (download, JSON serialization) are genuine failures.
@@ -298,6 +311,21 @@ private Instant now() {
     return Instant.now(clock);
   }
 
+  private boolean deriveC2paUsed(String detailsJson) {
+    if (!StringUtils.hasText(detailsJson)) {
+      return false;
+    }
+    try {
+      JsonNode node = objectMapper.readTree(detailsJson);
+      int hasManifest = node.path("c2paHasManifest").asInt(0);
+      int errorFlag = node.path("c2paErrorFlag").asInt(0);
+      return hasManifest == 1 && errorFlag == 0;
+    } catch (Exception e) {
+      // If parsing fails, default to false so the field is conservative.
+      return false;
+    }
+  }
+
   /** Truncates a string to a maximum length, null-safe. */
   private static String truncate(String s, int max) {
     if (s == null) {
 
@@ -13,6 +13,7 @@
 import org.opencv.core.Size;
 import org.opencv.imgcodecs.Imgcodecs;
 import org.opencv.imgproc.Imgproc;
+import org.springframework.stereotype.Service;
 
 
 /**
@@ -29,6 +30,7 @@
  * NOTE: C2PA metadata is obtained separately via C2paToolInvoker. This class does
  * not call C2PA directly, but is designed to combine its results into the final feature vector.
  */
+@Service
 public class FeatureExtractor {
 
   static {
 
@@ -0,0 +1,76 @@
+package dev.coms4156.project.metadetect.service;
+
+import dev.coms4156.project.metadetect.c2pa.C2paToolInvoker.C2paMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Service;
+
+/**
+ * Performs logistic regression inference against the feature vector produced by
+ * {@link FeatureExtractor}. The model weights and bias are loaded once from JSON via
+ * {@link ModelLoader}.
+ */
+@Service
+public class LogisticRegressionService {
+
+  private static final Logger log = LoggerFactory.getLogger(LogisticRegressionService.class);
+
+  private final FeatureExtractor featureExtractor;
+  private final ModelLoader modelLoader;
+
+  public LogisticRegressionService(FeatureExtractor featureExtractor, ModelLoader modelLoader) {
+    this.featureExtractor = featureExtractor;
+    this.modelLoader = modelLoader;
+  }
+
+  /**
+   * Generates an AI confidence score for the given image.
+   *
+   * @param imagePath path to the downloaded image on disk
+   * @param c2pa pre-extracted C2PA metadata (never null in current pipeline)
+   * @return inference result containing the probability, c2pa usage flag, and model version
+   */
+  public InferenceResult predict(String imagePath, C2paMetadata c2pa) {
+    ModelLoader.ModelParameters model = modelLoader.loadModel();
+    double[] features = featureExtractor.extractAllFeatures(imagePath, c2pa);
+    double z = dot(model.weights(), features) + model.bias();
+    double probability = sigmoid(z);
+    boolean c2paUsed = c2pa != null
+        && c2pa.getc2paHasManifest() == 1
+        && c2pa.getc2paErrorFlag() == 0;
+
+    return new InferenceResult(probability, c2paUsed, model.version());
+  }
+
+  /** Returns the loaded model version to surface in responses. */
+  public String getModelVersion() {
+    return modelLoader.loadModel().version();
+  }
+
+  private double dot(double[] weights, double[] features) {
+    int len = Math.min(weights.length, features.length);
+    if (weights.length != features.length) {
+      log.warn("Model/feature length mismatch (w={}, f={}); truncating to {}", weights.length,
+          features.length, len);
+    }
+
+    double sum = 0.0;
+    for (int i = 0; i < len; i++) {
+      sum += weights[i] * features[i];
+    }
+    return sum;
+  }
+
+  /** Stable sigmoid implementation to avoid overflow for large magnitudes. */
+  private double sigmoid(double z) {
+    if (z >= 0) {
+      double exp = Math.exp(-z);
+      return 1.0 / (1.0 + exp);
+    }
+    double exp = Math.exp(z);
+    return exp / (1.0 + exp);
+  }
+
+  /** Immutable inference result. */
+  public record InferenceResult(double confidenceScore, boolean c2paUsed, String modelVersion) { }
+}