From 2592f2b378d02b8d252ffc1a88f119447cd4f4a5 Mon Sep 17 00:00:00 2001
From: Dor Adiv <doradiv12@gmail.com>
Date: Mon, 17 Nov 2025 14:46:35 +0200
Subject: [PATCH 1/3] Change module path from 'adrg' to 'dorzzz'

Changed URL for fork
---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/go.mod b/go.mod
index 9a61806..b5b4608 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module github.com/adrg/strutil
+module github.com/dorzzz/strutil
 
 go 1.19
 

From 329b7f8a5c85d5e5d4f291915b9bf392d26e0bc9 Mon Sep 17 00:00:00 2001
From: Dor Adiv <dor@fincom.co>
Date: Mon, 17 Nov 2025 15:03:55 +0200
Subject: [PATCH 2/3] Fixed import URL

---
 README.md                              | 36 +++++++++++++-------------
 example_test.go                        |  4 +--
 internal/mathutil/mathutil_test.go     |  2 +-
 internal/ngram/ngram.go                |  2 +-
 internal/ngram/ngram_test.go           |  2 +-
 internal/stringutil/stringutil_test.go |  2 +-
 metrics/examples_test.go               |  2 +-
 metrics/jaccard.go                     |  2 +-
 metrics/jaro.go                        |  2 +-
 metrics/jaro_winkler.go                |  2 +-
 metrics/levenshtein.go                 |  2 +-
 metrics/metrics_test.go                |  2 +-
 metrics/overlap_coefficient.go         |  4 +--
 metrics/smith_waterman_gotoh.go        |  2 +-
 metrics/sorensen_dice.go               |  2 +-
 strutil.go                             |  8 +++---
 16 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 0a269d8..714459b 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,22 @@
 <h1 align="center">strutil</h1>
 
 <p align="center">
-    <a href="https://github.com/adrg/strutil/actions/workflows/tests.yml">
-        <img alt="Tests status" src="https://github.com/adrg/strutil/actions/workflows/tests.yml/badge.svg">
+    <a href="https://github.com/dorzzz/strutil/actions/workflows/tests.yml">
+        <img alt="Tests status" src="https://github.com/dorzzz/strutil/actions/workflows/tests.yml/badge.svg">
     </a>
     <a href="https://codecov.io/gh/adrg/strutil">
         <img alt="Code coverage" src="https://codecov.io/gh/adrg/strutil/branch/master/graphs/badge.svg?branch=master" />
     </a>
-    <a href="https://pkg.go.dev/github.com/adrg/strutil">
-        <img alt="pkg.go.dev documentation" src="https://pkg.go.dev/badge/github.com/adrg/strutil" />
+    <a href="https://pkg.go.dev/github.com/dorzzz/strutil">
+        <img alt="pkg.go.dev documentation" src="https://pkg.go.dev/badge/github.com/dorzzz/strutil" />
     </a>
     <a href="https://opensource.org/licenses/MIT" rel="nofollow">
         <img alt="MIT license" src="https://img.shields.io/github/license/adrg/strutil" />
     </a>
-    <a href="https://goreportcard.com/report/github.com/adrg/strutil">
-        <img alt="Go report card" src="https://goreportcard.com/badge/github.com/adrg/strutil" />
+    <a href="https://goreportcard.com/report/github.com/dorzzz/strutil">
+        <img alt="Go report card" src="https://goreportcard.com/badge/github.com/dorzzz/strutil" />
     </a>
-    <a href="https://github.com/adrg/strutil/issues">
+    <a href="https://github.com/dorzzz/strutil/issues">
         <img alt="GitHub issues" src="https://img.shields.io/github/issues/adrg/strutil" />
     </a>
     <a href="https://ko-fi.com/T6T72WATK">
@@ -26,12 +26,12 @@
 
 strutil provides a collection of string metrics for calculating string similarity as well as
 other string utility functions.  
-Full documentation can be found at https://pkg.go.dev/github.com/adrg/strutil.
+Full documentation can be found at https://pkg.go.dev/github.com/dorzzz/strutil.
 
 ## Installation
 
 ```
-go get github.com/adrg/strutil
+go get github.com/dorzzz/strutil
 ```
 
 ## String metrics
@@ -60,7 +60,7 @@ func Similarity(a, b string, metric StringMetric) float64 {
 ```
 
 All defined string metrics can be found in the
-[metrics](https://pkg.go.dev/github.com/adrg/strutil/metrics) package.
+[metrics](https://pkg.go.dev/github.com/dorzzz/strutil/metrics) package.
 
 #### Hamming
 
@@ -77,7 +77,7 @@ fmt.Printf("%d\n", ham.Distance("one", "once")) // Output: 2
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#Hamming).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#Hamming).
 
 #### Levenshtein
 
@@ -106,7 +106,7 @@ fmt.Printf("%d\n", lev.Distance("graph", "giraffe")) // Output: 4
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#Levenshtein).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#Levenshtein).
 
 #### Jaro
 
@@ -116,7 +116,7 @@ fmt.Printf("%.2f\n", similarity) // Output: 0.78
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#Jaro).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#Jaro).
 
 #### Jaro-Winkler
 
@@ -126,7 +126,7 @@ fmt.Printf("%.2f\n", similarity) // Output: 0.80
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#JaroWinkler).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#JaroWinkler).
 
 #### Smith-Waterman-Gotoh
 
@@ -152,7 +152,7 @@ fmt.Printf("%.2f\n", similarity) // Output: 0.96
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#SmithWatermanGotoh).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#SmithWatermanGotoh).
 
 #### Sorensen-Dice
 
@@ -174,7 +174,7 @@ fmt.Printf("%.2f\n", similarity) // Output: 0.53
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#SorensenDice).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#SorensenDice).
 
 #### Jaccard
 
@@ -214,7 +214,7 @@ where SD is the Sorensen-Dice coefficient and J is the Jaccard index.
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#Jaccard).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#Jaccard).
 
 #### Overlap Coefficient
 
@@ -236,7 +236,7 @@ fmt.Printf("%.2f\n", similarity) // Output: 0.57
 ```
 
 More information and additional examples can be found on
-[pkg.go.dev](https://pkg.go.dev/github.com/adrg/strutil/metrics#OverlapCoefficient).
+[pkg.go.dev](https://pkg.go.dev/github.com/dorzzz/strutil/metrics#OverlapCoefficient).
 
 ## References
 
diff --git a/example_test.go b/example_test.go
index 3da3d77..22858d8 100644
--- a/example_test.go
+++ b/example_test.go
@@ -3,8 +3,8 @@ package strutil_test
 import (
 	"fmt"
 
-	"github.com/adrg/strutil"
-	"github.com/adrg/strutil/metrics"
+	"github.com/dorzzz/strutil"
+	"github.com/dorzzz/strutil/metrics"
 )
 
 func ExampleSimilarity() {
diff --git a/internal/mathutil/mathutil_test.go b/internal/mathutil/mathutil_test.go
index 32a1475..94c4fe1 100644
--- a/internal/mathutil/mathutil_test.go
+++ b/internal/mathutil/mathutil_test.go
@@ -3,7 +3,7 @@ package mathutil_test
 import (
 	"testing"
 
-	"github.com/adrg/strutil/internal/mathutil"
+	"github.com/dorzzz/strutil/internal/mathutil"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/internal/ngram/ngram.go b/internal/ngram/ngram.go
index 10e9041..5fc31be 100644
--- a/internal/ngram/ngram.go
+++ b/internal/ngram/ngram.go
@@ -1,6 +1,6 @@
 package ngram
 
-import "github.com/adrg/strutil/internal/mathutil"
+import "github.com/dorzzz/strutil/internal/mathutil"
 
 // Count returns the n-gram count of the specified size for the
 // provided term. An n-gram size of 1 is used if the provided size is
diff --git a/internal/ngram/ngram_test.go b/internal/ngram/ngram_test.go
index c9e0332..3a1600b 100644
--- a/internal/ngram/ngram_test.go
+++ b/internal/ngram/ngram_test.go
@@ -3,7 +3,7 @@ package ngram_test
 import (
 	"testing"
 
-	"github.com/adrg/strutil/internal/ngram"
+	"github.com/dorzzz/strutil/internal/ngram"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/internal/stringutil/stringutil_test.go b/internal/stringutil/stringutil_test.go
index b04dcd2..5175744 100644
--- a/internal/stringutil/stringutil_test.go
+++ b/internal/stringutil/stringutil_test.go
@@ -3,7 +3,7 @@ package stringutil_test
 import (
 	"testing"
 
-	"github.com/adrg/strutil/internal/stringutil"
+	"github.com/dorzzz/strutil/internal/stringutil"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/metrics/examples_test.go b/metrics/examples_test.go
index 9496e28..16f78e8 100644
--- a/metrics/examples_test.go
+++ b/metrics/examples_test.go
@@ -3,7 +3,7 @@ package metrics_test
 import (
 	"fmt"
 
-	"github.com/adrg/strutil/metrics"
+	"github.com/dorzzz/strutil/metrics"
 )
 
 func ExampleHamming() {
diff --git a/metrics/jaccard.go b/metrics/jaccard.go
index cf3215a..d4eb795 100644
--- a/metrics/jaccard.go
+++ b/metrics/jaccard.go
@@ -3,7 +3,7 @@ package metrics
 import (
 	"strings"
 
-	"github.com/adrg/strutil/internal/ngram"
+	"github.com/dorzzz/strutil/internal/ngram"
 )
 
 // Jaccard represents the Jaccard index for measuring the similarity
diff --git a/metrics/jaro.go b/metrics/jaro.go
index 7b394b2..1cf1e7d 100644
--- a/metrics/jaro.go
+++ b/metrics/jaro.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 	"unicode/utf8"
 
-	"github.com/adrg/strutil/internal/mathutil"
+	"github.com/dorzzz/strutil/internal/mathutil"
 )
 
 // Jaro represents the Jaro metric for measuring the similarity
diff --git a/metrics/jaro_winkler.go b/metrics/jaro_winkler.go
index e89f00d..d2d4a65 100644
--- a/metrics/jaro_winkler.go
+++ b/metrics/jaro_winkler.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 	"unicode/utf8"
 
-	"github.com/adrg/strutil/internal/stringutil"
+	"github.com/dorzzz/strutil/internal/stringutil"
 )
 
 // JaroWinkler represents the Jaro-Winkler metric for measuring the similarity
diff --git a/metrics/levenshtein.go b/metrics/levenshtein.go
index e0e9e9c..3a19432 100644
--- a/metrics/levenshtein.go
+++ b/metrics/levenshtein.go
@@ -3,7 +3,7 @@ package metrics
 import (
 	"strings"
 
-	"github.com/adrg/strutil/internal/mathutil"
+	"github.com/dorzzz/strutil/internal/mathutil"
 )
 
 // Levenshtein represents the Levenshtein metric for measuring the similarity
diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go
index f2bf94e..b6502a1 100644
--- a/metrics/metrics_test.go
+++ b/metrics/metrics_test.go
@@ -4,7 +4,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/adrg/strutil/metrics"
+	"github.com/dorzzz/strutil/metrics"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/metrics/overlap_coefficient.go b/metrics/overlap_coefficient.go
index caeb350..2095c0a 100644
--- a/metrics/overlap_coefficient.go
+++ b/metrics/overlap_coefficient.go
@@ -3,8 +3,8 @@ package metrics
 import (
 	"strings"
 
-	"github.com/adrg/strutil/internal/mathutil"
-	"github.com/adrg/strutil/internal/ngram"
+	"github.com/dorzzz/strutil/internal/mathutil"
+	"github.com/dorzzz/strutil/internal/ngram"
 )
 
 // OverlapCoefficient represents the overlap coefficient for measuring the
diff --git a/metrics/smith_waterman_gotoh.go b/metrics/smith_waterman_gotoh.go
index ab6b49e..0f4089a 100644
--- a/metrics/smith_waterman_gotoh.go
+++ b/metrics/smith_waterman_gotoh.go
@@ -3,7 +3,7 @@ package metrics
 import (
 	"strings"
 
-	"github.com/adrg/strutil/internal/mathutil"
+	"github.com/dorzzz/strutil/internal/mathutil"
 )
 
 // SmithWatermanGotoh represents the Smith-Waterman-Gotoh metric for measuring
diff --git a/metrics/sorensen_dice.go b/metrics/sorensen_dice.go
index c48b751..506a4e9 100644
--- a/metrics/sorensen_dice.go
+++ b/metrics/sorensen_dice.go
@@ -3,7 +3,7 @@ package metrics
 import (
 	"strings"
 
-	"github.com/adrg/strutil/internal/ngram"
+	"github.com/dorzzz/strutil/internal/ngram"
 )
 
 // SorensenDice represents the Sorensen-Dice metric for measuring the
diff --git a/strutil.go b/strutil.go
index 8e69d5c..155586e 100644
--- a/strutil.go
+++ b/strutil.go
@@ -1,7 +1,7 @@
 /*
 Package strutil provides string metrics for calculating string similarity as
 well as other string utility functions. Documentation for all the metrics can
-be found at https://pkg.go.dev/github.com/adrg/strutil/metrics.
+be found at https://pkg.go.dev/github.com/dorzzz/strutil/metrics.
 
 Included string metrics:
   - Hamming
@@ -17,8 +17,8 @@ Included string metrics:
 package strutil
 
 import (
-	"github.com/adrg/strutil/internal/ngram"
-	"github.com/adrg/strutil/internal/stringutil"
+	"github.com/dorzzz/strutil/internal/ngram"
+	"github.com/dorzzz/strutil/internal/stringutil"
 )
 
 // StringMetric represents a metric for measuring the similarity between
@@ -32,7 +32,7 @@ import (
 //  - Jaccard
 //  - Overlap coefficient
 //
-// For more information see https://pkg.go.dev/github.com/adrg/strutil/metrics.
+// For more information see https://pkg.go.dev/github.com/dorzzz/strutil/metrics.
 type StringMetric interface {
 	Compare(a, b string) float64
 }

From 08e927c4dd92afa10ef1963d7ac78bedf3fd1b0a Mon Sep 17 00:00:00 2001
From: Dor Adiv <dor@fincom.co>
Date: Mon, 17 Nov 2025 17:56:01 +0200
Subject: [PATCH 3/3] feat: make Jaro and Jaro-Winkler implementations more
 canonical and formula-accurate

---
 metrics/jaro.go         | 123 +++++++++++++++++++++++++++++++++++++---
 metrics/jaro_winkler.go |  16 +++++-
 2 files changed, 130 insertions(+), 9 deletions(-)

diff --git a/metrics/jaro.go b/metrics/jaro.go
index 1cf1e7d..07e4602 100644
--- a/metrics/jaro.go
+++ b/metrics/jaro.go
@@ -12,31 +12,36 @@ import (
 //   For more information see https://en.wikipedia.org/wiki/Jaro-Winkler_distance.
 type Jaro struct {
 	// CaseSensitive specifies if the string comparison is case sensitive.
-	CaseSensitive bool
+	CaseSensitive     bool
+	UseStandardWindow int
 }
 
 // NewJaro returns a new Jaro string metric.
 //
 // Default options:
 //   CaseSensitive: true
+//   UseStandardWindow: 0 (uses original strutil algorithm)
 func NewJaro() *Jaro {
 	return &Jaro{
-		CaseSensitive: true,
+		CaseSensitive:     true,
+		UseStandardWindow: 0,
 	}
 }
 
 // Compare returns the Jaro similarity of a and b. The returned similarity is
 // a number between 0 and 1. Larger similarity numbers indicate closer matches.
 func (m *Jaro) Compare(a, b string) float64 {
-	// Check if both terms are empty.
+	// Use rune counts (UTF-8 code points) for lengths.
 	lenA, lenB := utf8.RuneCountInString(a), utf8.RuneCountInString(b)
+
+	// Check if both terms are empty.
 	if lenA == 0 && lenB == 0 {
-		return 1
+		return 1.0
 	}
 
 	// Check if one of the terms is empty.
 	if lenA == 0 || lenB == 0 {
-		return 0
+		return 0.0
 	}
 
 	// Lower terms if case insensitive comparison is specified.
@@ -45,7 +50,28 @@ func (m *Jaro) Compare(a, b string) float64 {
 		b = strings.ToLower(b)
 	}
 
-	// Get matching runes.
+	// Choose algorithm based on UseStandardWindow
+	if m.UseStandardWindow == 1 {
+		// Apache Commons implementation
+		if a == b {
+			return 1.0
+		}
+
+		ra := []rune(a)
+		rb := []rune(b)
+
+		matches, halfTranspositions := jaroMatches(ra, rb, m.UseStandardWindow)
+		if matches == 0 {
+			return 0.0
+		}
+
+		mFloat := float64(matches)
+		return (mFloat/float64(lenA) +
+			mFloat/float64(lenB) +
+			(mFloat-float64(halfTranspositions)/2.0)/mFloat) / 3.0
+	}
+
+	// Original strutil implementation (default)
 	halfLen := mathutil.Max(0, mathutil.Max(lenA, lenB)/2)
 	mrA := matchingRunes(a, b, halfLen)
 	mrB := matchingRunes(b, a, halfLen)
@@ -55,12 +81,78 @@ func (m *Jaro) Compare(a, b string) float64 {
 		return 0.0
 	}
 
-	// Return similarity.
 	return (float64(fmLen)/float64(lenA) +
 		float64(smLen)/float64(lenB) +
 		float64(fmLen-transpositions(mrA, mrB)/2)/float64(fmLen)) / 3.0
 }
 
+// jaroMatches mirrors Apache's JaroWinklerSimilarity.matches(...) logic,
+// but operating on rune slices instead of Java chars.
+func jaroMatches(first, second []rune, useStandardWindow int) (matches int, halfTranspositions int) {
+	var maxRunes, minRunes []rune
+	if len(first) > len(second) {
+		maxRunes = first
+		minRunes = second
+	} else {
+		maxRunes = second
+		minRunes = first
+	}
+
+	// range = Math.max(max.length()/2 - 1, 0)
+	rng := maxInt(len(maxRunes)/2-useStandardWindow, 0)
+
+	matchIndexes := make([]int, len(minRunes))
+	for i := range matchIndexes {
+		matchIndexes[i] = -1
+	}
+	matchFlags := make([]bool, len(maxRunes))
+
+	// Find matches
+	for mi, c1 := range minRunes {
+		start := maxInt(mi-rng, 0)
+		end := minInt(mi+rng+1, len(maxRunes))
+		for xi := start; xi < end; xi++ {
+			if !matchFlags[xi] && c1 == maxRunes[xi] {
+				matchIndexes[mi] = xi
+				matchFlags[xi] = true
+				matches++
+				break
+			}
+		}
+	}
+
+	// Build the two matched sequences ms1, ms2
+	ms1 := make([]rune, matches)
+	ms2 := make([]rune, matches)
+
+	si := 0
+	for i := 0; i < len(minRunes); i++ {
+		if matchIndexes[i] != -1 {
+			ms1[si] = minRunes[i]
+			si++
+		}
+	}
+
+	si = 0
+	for i := 0; i < len(maxRunes); i++ {
+		if matchFlags[i] {
+			ms2[si] = maxRunes[i]
+			si++
+		}
+	}
+
+	// Count half-transpositions
+	for i := 0; i < len(ms1); i++ {
+		if ms1[i] != ms2[i] {
+			halfTranspositions++
+		}
+	}
+
+	return matches, halfTranspositions
+}
+
+// matchingRunes returns the matching runes between a and b within the specified limit.
+// This is the original strutil implementation.
 func matchingRunes(a, b string, limit int) []rune {
 	var (
 		runesA      = []rune(a)
@@ -83,6 +175,8 @@ func matchingRunes(a, b string, limit int) []rune {
 	return runesCommon
 }
 
+// transpositions counts the number of transpositions between two rune slices.
+// This is the original strutil implementation.
 func transpositions(a, b []rune) int {
 	var count int
 
@@ -95,3 +189,18 @@ func transpositions(a, b []rune) int {
 
 	return count
 }
+
+// local int helpers
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/metrics/jaro_winkler.go b/metrics/jaro_winkler.go
index d2d4a65..cdc2ae4 100644
--- a/metrics/jaro_winkler.go
+++ b/metrics/jaro_winkler.go
@@ -12,16 +12,22 @@ import (
 //   For more information see https://en.wikipedia.org/wiki/Jaro-Winkler_distance.
 type JaroWinkler struct {
 	// CaseSensitive specifies if the string comparison is case sensitive.
-	CaseSensitive bool
+	CaseSensitive     bool
+	Threshold         float64
+	UseStandardWindow int
 }
 
 // NewJaroWinkler returns a new Jaro-Winkler string metric.
 //
 // Default options:
 //   CaseSensitive: true
+//   Threshold: 0 (always applies Winkler bonus)
+//   UseStandardWindow: 0 (uses original strutil algorithm)
 func NewJaroWinkler() *JaroWinkler {
 	return &JaroWinkler{
-		CaseSensitive: true,
+		CaseSensitive:     true,
+		Threshold:         0,
+		UseStandardWindow: 0,
 	}
 }
 
@@ -43,8 +49,14 @@ func (m *JaroWinkler) Compare(a, b string) float64 {
 
 	jaro := NewJaro()
 	jaro.CaseSensitive = m.CaseSensitive
+	jaro.UseStandardWindow = m.UseStandardWindow
 
 	// Return similarity.
 	similarity := jaro.Compare(a, b)
+	if similarity < m.Threshold {
+		return similarity
+	}
+
+	// Apply Winkler bonus.
 	return similarity + (0.1 * float64(lenPrefix) * (1.0 - similarity))
 }