Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
a277d3b
initial migration
laurensWe Jan 20, 2026
5090857
matlab vs python profile_correlator tests
vergep Jan 23, 2026
c4837a8
rename tests resources + make them connect
laurensWe Jan 23, 2026
3d4f735
Fix absolute imports
laurensWe Jan 23, 2026
6087e9a
Change tests + removed fluff + finetune types to use
laurensWe Jan 23, 2026
7c77246
Change docstrings + remove redundant code
laurensWe Jan 23, 2026
00f1cb5
Merge branch 'feature/step_3' into feature/migrate_ProfileCorrelateSi…
laurensWe Jan 23, 2026
d7642bb
use already implemented filtering + resolve unit differences in unit …
laurensWe Jan 23, 2026
cc3d6b8
remove the filtering.py and use existing functionality + remove remov…
laurensWe Jan 23, 2026
cb36067
Merge branch 'feature/step_3' into feature/migrate_ProfileCorrelateSi…
laurensWe Jan 23, 2026
7f59620
matlab file tests with ccf
vergep Jan 26, 2026
8428968
in between push
laurensWe Jan 27, 2026
c00b06d
further debugging
laurensWe Jan 27, 2026
317b140
Slightly different optimization algo
laurensWe Jan 28, 2026
5a37910
more docs for deviation
laurensWe Jan 28, 2026
651cbc4
Merge branch 'main' into feature/migrate_ProfileCorrelateSingle.m
laurensWe Jan 29, 2026
098d3ef
delete debug files + add tests with synthetic data
laurensWe Jan 29, 2026
526e7bd
Add extra test
laurensWe Jan 29, 2026
415de0d
only brute force, no distinction between partial and full profile, mi…
vergep Jan 30, 2026
6b4c529
Remove is_partial_profile distinction, use position_shift for visuali…
laurensWe Feb 2, 2026
d1e194c
Add visualization images for MATLAB test cases
laurensWe Feb 2, 2026
1d1cbb7
Fix visualization to handle profiles with different pixel sizes
laurensWe Feb 2, 2026
52ab924
Improve on image generation for comparing striation profiles
laurensWe Feb 2, 2026
a449bf3
Remove Dead code
laurensWe Feb 2, 2026
742c21c
Remove legacy code + divert docs to own python implementation
laurensWe Feb 2, 2026
27881f1
Remove the references to the multi-scale (matlab) approach
laurensWe Feb 3, 2026
0966776
remove dead transform code + unify the resampling/interpolating
laurensWe Feb 3, 2026
df423fe
test cases Coen for profile_correlator
vergep Feb 3, 2026
a89e132
cleanup the sample profiles
laurensWe Feb 3, 2026
0630cd8
Increase minimum overlap region to 350 micrometer
laurensWe Feb 3, 2026
258600b
bring back corr vs overlap
laurensWe Feb 3, 2026
2abe7ca
Add the profile numbers to the plot
laurensWe Feb 3, 2026
60a81db
remove all the images
laurensWe Feb 4, 2026
d4f9cc0
cleanup tests
laurensWe Feb 4, 2026
8ee639f
Merge branch 'main' into feature/migrate_ProfileCorrelateSingle.m
laurensWe Feb 4, 2026
4302210
Have datatype consistent with the project + use np pearson correlatio…
laurensWe Feb 4, 2026
e322d61
Change variable depth_data to heights
laurensWe Feb 4, 2026
ecf61df
move resample to resample.py + make apply_scaling more clear
laurensWe Feb 4, 2026
ec15b8d
add anti aliasing to the downsampling for the correlator
laurensWe Feb 4, 2026
e0e2c69
Make data_type required or return None
laurensWe Feb 4, 2026
f774543
Break method to smaller functions + consolidate correlation functions
laurensWe Feb 4, 2026
7afa4be
feedback cfs-data
laurensWe Feb 4, 2026
2e80c73
Merge branch 'main' into feature/migrate_ProfileCorrelateSingle.m
laurensWe Feb 4, 2026
c06b31b
making scoring agnostic to ref or comp, simplified rescaling
vergep Feb 5, 2026
32aa384
do not copy overlap_ref and comp multiple times
vergep Feb 5, 2026
a47e4a4
comments Thomas.
vergep Feb 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from conversion.preprocess_impression.tilt import apply_tilt_correction
from conversion.preprocess_impression.utils import update_mark_data, Point2D
from conversion.resample import get_scaling_factors, resample_image_array
from conversion.resample import get_scaling_factors, resample_array_2d


def preprocess_impression_mark(
Expand Down Expand Up @@ -77,7 +77,7 @@ def preprocess_impression_mark(
factors = get_scaling_factors(
scales=original_scales, target_scale=params.pixel_size
)
fitted_surface = resample_image_array(fitted_surface, factors=factors)
fitted_surface = resample_array_2d(fitted_surface, factors=factors)

# Stage 7: High-pass filter
if params.highpass_cutoff is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Profile correlator module for striated mark comparison.

This module provides functions for comparing 1D profiles of striated marks
(scratch marks, toolmarks, etc.) using global brute-force search and
correlation analysis.

The main entry point is :func:`correlate_profiles`, which handles the complete
comparison workflow including:

- Sampling distance equalization
- Global brute-force search over all shift positions and scale factors
- Selection of alignment with maximum cross-correlation
- Computation of comparison metrics

Submodules
----------
- data_types: Core data structures (Profile, AlignmentParameters, StriationComparisonResults)
- correlator: Main entry point function (global brute-force search)
- transforms: Resampling operations for pixel scale equalization
- statistics: Statistical metrics (correlation, roughness, overlap ratio)
"""

# Core data types
from conversion.profile_correlator.data_types import (
AlignmentParameters,
StriationComparisonResults,
Profile,
)

# Main entry point
from conversion.profile_correlator.correlator import correlate_profiles

# Transform functions
from conversion.profile_correlator.transforms import equalize_pixel_scale

# Statistics functions
from conversion.profile_correlator.statistics import (
compute_cross_correlation,
compute_overlap_ratio,
compute_roughness_sa,
compute_roughness_sq,
compute_normalized_square_based_roughness_differences,
)

# Re-export cutoff_to_gaussian_sigma from conversion.filter for convenience
from conversion.filter.gaussian import cutoff_to_gaussian_sigma

__all__ = [
# Main entry point
"correlate_profiles",
# Data types
"Profile",
"AlignmentParameters",
"StriationComparisonResults",
# Transforms
"equalize_pixel_scale",
# Statistics
"compute_cross_correlation",
"compute_overlap_ratio",
"compute_roughness_sa",
"compute_roughness_sq",
"compute_normalized_square_based_roughness_differences",
# Filtering (re-exported from conversion.filter)
"cutoff_to_gaussian_sigma",
]
274 changes: 274 additions & 0 deletions packages/scratch-core/src/conversion/profile_correlator/correlator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
"""
Main entry point for profile correlation.
This module provides the primary interface for comparing striated mark profiles.
It uses a brute-force approach: try all possible shifts and scales between
profiles and select the one with maximum cross-correlation, subject to a
minimum overlap constraint.
All length and height measurements are in meters (SI units).
Algorithm Overview
------------------
The algorithm uses a global brute-force search strategy:
1. **Equalizes pixel scales** between profiles (downsample to common resolution)
2. **Tries multiple scale factors** (e.g., 0.95, 0.97, ..., 1.03, 1.05)
3. **For each scale, tries all shifts** with at least min_overlap_distance overlap
4. **Computes correlation** at each shift position
5. **Selects maximum** correlation as the optimal alignment
This approach is guaranteed to find the global maximum correlation, which may
be at a position far from zero shift for repetitive patterns.
"""

import numpy as np

from container_models.base import FloatArray1D
from conversion.profile_correlator.data_types import (
AlignmentParameters,
AlignmentResult,
StriationComparisonResults,
Profile,
RoughnessMetrics,
)
from conversion.profile_correlator.transforms import equalize_pixel_scale
from conversion.profile_correlator.statistics import (
compute_cross_correlation,
compute_overlap_ratio,
compute_roughness_sa,
compute_roughness_sq,
compute_normalized_square_based_roughness_differences,
)
from conversion.resample import resample_array_1d


def correlate_profiles(
profile_ref: Profile,
profile_comp: Profile,
params: AlignmentParameters = AlignmentParameters(),
) -> StriationComparisonResults | None:
"""
Compare two striated mark profiles and compute similarity metrics.
This function performs profile alignment using a brute-force approach:
it tries multiple scale factors and all possible shifts between the profiles
(subject to a minimum overlap constraint) and selects the combination with
maximum cross-correlation.
The workflow is:
1. **Equalize sampling distances**: If the two profiles have different pixel
sizes, the higher-resolution one is downsampled to match the lower resolution.
2. **Brute-force search**: Try all shift/scale combinations and compute
cross-correlation at each position. For each scale factor in the range
defined by params.max_scaling, determine valid shifts that maintain at
least min_overlap_distance of overlap, and select the shift/scale with
maximum correlation.
3. **Compute metrics**: Calculate comprehensive comparison metrics including
correlation coefficient, roughness parameters, and signature differences.
All measurements are in meters (SI units).
:param profile_ref: Reference profile to compare against.
:param profile_comp: Compared profile to align to the reference.
:param params: Alignment parameters. Key parameters:
- max_scaling: Maximum scaling deviation (e.g., 0.05 for ±5%)
- min_overlap_distance: Minimum overlap distance in meters (default 200 μm)
:returns: StriationComparisonResults containing all computed metrics, or None if no
valid alignment could be found.
"""
# Step 1: Equalize pixel scales
profile_ref_eq, profile_comp_eq = equalize_pixel_scale(profile_ref, profile_comp)
pixel_size = profile_ref_eq.pixel_size

heights_ref = profile_ref_eq.heights
heights_comp = profile_comp_eq.heights
len_ref = len(heights_ref)
len_comp = len(heights_comp)

# Minimum overlap in samples
min_overlap_samples = int(np.ceil(params.min_overlap_distance / pixel_size))

# Early exit if either profile is too short
if len_ref < min_overlap_samples or len_comp < min_overlap_samples:
return None

# Generate scale factors to try (7 steps gives ~1.7% intervals for ±5%)
scale_factors = np.linspace(
1.0 - params.max_scaling, 1.0 + params.max_scaling, params.n_scale_steps
)

# make scaling symmetric to what you choose as ref or comp
scale_factors = np.unique(np.concatenate((scale_factors, 1 / scale_factors)))
Copy link
Collaborator

@cfs-data cfs-data Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments in code usually suggests you should perhaps use more functions:

def _get_scale_factors(max_scaling: float, n_steps: int) -> NDArray:
    """Generate symmetric scale factors."""
    scales = np.linspace(1.0 - max_scaling, 1.0 + max_scaling, n_steps)
    scales = np.unique(np.concatenate((scale_factors, 1 / scale_factors)))
    return scales


# Step 2: Find best alignment
alignment = _find_best_alignment(
heights_ref, heights_comp, scale_factors, min_overlap_samples
)
if alignment is None:
return None

# Step 3: Compute and return metrics
return _compute_metrics(alignment, pixel_size, len_ref, len_comp)


def _calculate_idx_parameters(
shift: int, len_small: int, len_large: int
) -> tuple[int, int, int]:
"""
Find starting idx for both striations, and compute overlap length
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Find starting idx for both striations, and compute overlap length
Find starting index for both striations, and compute overlap length

"""

if shift >= 0:
idx_large_start = shift
idx_small_start = 0
overlap_length = min(len_large - shift, len_small)
else:
idx_large_start = 0
idx_small_start = -shift
overlap_length = min(len_large, len_small + shift)

return idx_small_start, idx_large_start, overlap_length
Copy link
Collaborator

@cfs-data cfs-data Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand it was like this in MATLAB, so this is not a suggestion for this PR, but: shouldn't the overlap length be a variable as well (ranging from min_overlap_samples to maximum_overlap? Now the comparison profile is cut/truncated either from the beginning or from the end, which allows for "small" overlapping regions with high correlation at the endings of the profiles, but not anywhere else. This seems arbitrary?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@laurensWe @vergep What do you think?



def _find_best_alignment(
heights_ref: FloatArray1D,
heights_comp: FloatArray1D,
scale_factors: FloatArray1D,
min_overlap_samples: int,
) -> AlignmentResult | None:
"""
Find the best alignment between two profiles using brute-force search.
Tries all combinations of scale factors and shifts, returning the one
with maximum correlation.
:param heights_ref: Reference profile heights.
:param heights_comp: Comparison profile heights.
:param scale_factors: Array of scale factors to try.
:param min_overlap_samples: Minimum required overlap in samples.
:returns: Best alignment result, or None if no valid alignment found.
"""
len_ref = len(heights_ref)

best_correlation = -np.inf
best_shift = None
best_scale = None

for scale in scale_factors:
heights_comp_scaled = resample_array_1d(heights_comp, scale)
len_comp = len(heights_comp_scaled)
# Calculate shift range (ensure minimum overlap)
min_shift = -(len_comp - min_overlap_samples)
max_shift = len_ref - min_overlap_samples

for shift in range(min_shift, max_shift + 1):
idx_comp_start, idx_ref_start, overlap_length = _calculate_idx_parameters(
shift, len_comp, len_ref
) # Calculate overlap region for this shift

if overlap_length < min_overlap_samples:
continue

partial_ref = heights_ref[idx_ref_start : idx_ref_start + overlap_length]
partial_comp = heights_comp_scaled[
idx_comp_start : idx_comp_start + overlap_length
]

correlation = compute_cross_correlation(partial_ref, partial_comp)

if correlation and correlation > best_correlation:
best_correlation = correlation
best_shift = shift
best_scale = scale

if best_correlation is None:
return None

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NTH: You could consider splitting the function at the point into two separate functions by returning the parameters found here. Now your function is doing more than one thing (which is generally undesirable). I think for now it's okay, since the intended meaning is quite clear, but in general the functions become very "script"-like if they do too many things

# Redo computations for best_cale and best_shift (instead of copying partial_ref and partial_comp above multiple times. This saves time.)
heights_comp_scaled = resample_array_1d(heights_comp, best_scale)
len_comp = len(heights_comp_scaled)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line can be removed

idx_comp_start, idx_ref_start, overlap_length = _calculate_idx_parameters(
best_shift, len_comp, len_ref
)

best_ref_overlap = heights_ref[idx_ref_start : idx_ref_start + overlap_length]
best_comp_overlap = heights_comp_scaled[
idx_comp_start : idx_comp_start + overlap_length
]

return AlignmentResult(
correlation=best_correlation,
shift=best_shift,
scale=best_scale,
ref_overlap=best_ref_overlap,
comp_overlap=best_comp_overlap,
)


def _compute_metrics(
alignment: AlignmentResult,
pixel_size: float,
len_ref: int,
len_comp: int,
) -> StriationComparisonResults:
"""
Compute comparison metrics from an alignment result.
:param alignment: The best alignment found.
:param pixel_size: Pixel size in meters.
:param len_ref: Length of reference profile in samples.
:param len_comp: Length of comparison profile in samples.
:returns: Full comparison results.
"""
ref_overlap = alignment.ref_overlap
comp_overlap = alignment.comp_overlap

# Convert to meters
position_shift = alignment.shift * pixel_size
overlap_length = len(ref_overlap) * pixel_size
ref_length = len_ref * pixel_size
comp_length = len_comp * pixel_size

overlap_ratio = compute_overlap_ratio(overlap_length, ref_length, comp_length)

# Roughness metrics
sa_ref = compute_roughness_sa(ref_overlap)
mean_square_ref = compute_roughness_sq(ref_overlap)
sa_comp = compute_roughness_sa(comp_overlap)
mean_square_comp = compute_roughness_sq(comp_overlap)

# Difference profile roughness
diff_profile = comp_overlap - ref_overlap
sa_diff = compute_roughness_sa(diff_profile)
mean_square_of_difference = compute_roughness_sq(diff_profile)

# Signature differences
roughness = RoughnessMetrics(
mean_square_ref=mean_square_ref,
mean_square_comp=mean_square_comp,
mean_square_of_difference=mean_square_of_difference,
)
signature_diff = compute_normalized_square_based_roughness_differences(roughness)

return StriationComparisonResults(
pixel_size=pixel_size,
position_shift=position_shift,
scale_factor=alignment.scale,
similarity_value=alignment.correlation,
overlap_length=overlap_length,
overlap_ratio=overlap_ratio,
correlation_coefficient=alignment.correlation,
sa_ref=sa_ref,
mean_square_ref=mean_square_ref,
sa_comp=sa_comp,
mean_square_comp=mean_square_comp,
sa_diff=sa_diff,
mean_square_of_difference=mean_square_of_difference,
ds_roughness_normalized_to_reference=signature_diff.roughness_normalized_to_reference,
ds_roughness_normalized_to_compared=signature_diff.roughness_normalized_to_compared,
ds_roughness_normalized_to_reference_and_compared=signature_diff.roughness_normalized_to_reference_and_compared,
)
Loading
Loading