-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_advanced.py
More file actions
147 lines (131 loc) · 4.67 KB
/
example_advanced.py
File metadata and controls
147 lines (131 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
ExisEcho API - Advanced Example
This example demonstrates advanced matching options including:
- Weighted columns
- Phonetic matching
- Custom preprocessing
- Multiple columns
Get your FREE test API key by emailing: exisllc@gmail.com
Documentation: https://www.fuzzy-logic.com/Api
"""
from exisecho import ExisEchoClient
# Replace with your API key
API_KEY = "your-api-key-here"
def main():
# Initialize the client
client = ExisEchoClient(API_KEY)
# Sample customer records with multiple fields
records = [
{
"id": "1",
"fields": {
"first_name": "Robert",
"last_name": "Smith",
"phone": "(555) 123-4567",
"address": "123 Main Street"
}
},
{
"id": "2",
"fields": {
"first_name": "Bob",
"last_name": "Smyth",
"phone": "555-123-4567",
"address": "123 Main St."
}
},
{
"id": "3",
"fields": {
"first_name": "Dr. Robert",
"last_name": "Smith Jr.",
"phone": "5551234567",
"address": "123 Main Street, Apt 4"
}
},
{
"id": "4",
"fields": {
"first_name": "Jennifer",
"last_name": "Johnson",
"phone": "(555) 987-6543",
"address": "456 Oak Avenue"
}
},
{
"id": "5",
"fields": {
"first_name": "Jenny",
"last_name": "Johnson",
"phone": "555.987.6543",
"address": "456 Oak Ave"
}
},
]
print("ExisEcho API - Advanced Example")
print("=" * 50)
print()
print("Using weighted columns and specialized matching options...")
print()
# Advanced column options with different settings per column
column_options = {
"first_name": {
"weight": 2.0, # Name is important
"usePhoneticMatching": True, # Robert = Bob sounds similar
"equateSynonyms": True, # Robert = Bob as synonyms
"removeHonorifics": True, # Remove Dr., Mr., etc.
"ignoreCase": True
},
"last_name": {
"weight": 2.5, # Last name is most important
"usePhoneticMatching": True, # Smith = Smyth
"removePunctuation": True, # Remove Jr., Sr., etc.
"ignoreCase": True
},
"phone": {
"weight": 1.5,
"removeNonDigits": True # Only compare digits
},
"address": {
"weight": 1.0,
"equateSynonyms": True, # Street = St, Avenue = Ave
"equateAddressAbbreviations": True, # NYC = New York, SF = San Francisco
"removePunctuation": True,
"ignoreCase": True
}
}
result = client.find_duplicates(
records=records,
threshold=0.65,
column_options=column_options
)
if result["success"]:
summary = result["summary"]
print(f"Results:")
print(f" - Records analyzed: {summary['totalRecordsAnalyzed']}")
print(f" - Duplicates found: {summary['totalMatches']}")
print(f" - Average score: {summary['averageScore']:.0%}")
print(f" - Processing time: {summary['processingTimeMs']}ms")
print()
if result["matches"]:
print("Potential Duplicates:")
print("-" * 60)
for match in result["matches"]:
rec1 = next(r for r in records if r["id"] == match["record1Id"])
rec2 = next(r for r in records if r["id"] == match["record2Id"])
print(f"Match Score: {match['score']:.0%} ({match['quality']})")
print(f" Record {match['record1Id']}:")
print(f" Name: {rec1['fields']['first_name']} {rec1['fields']['last_name']}")
print(f" Phone: {rec1['fields']['phone']}")
print(f" Address: {rec1['fields']['address']}")
print(f" Record {match['record2Id']}:")
print(f" Name: {rec2['fields']['first_name']} {rec2['fields']['last_name']}")
print(f" Phone: {rec2['fields']['phone']}")
print(f" Address: {rec2['fields']['address']}")
print()
usage = result["usage"]
print(f"API Usage: {usage['unitsConsumed']} units consumed, {usage['unitsRemaining']} remaining")
else:
print(f"Error: {result.get('error', 'Unknown error')}")
if __name__ == "__main__":
main()