-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathverify_installation.py
More file actions
227 lines (180 loc) · 6.66 KB
/
verify_installation.py
File metadata and controls
227 lines (180 loc) · 6.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python3
"""
Quick verification script to test the InferQ installation and basic functionality.
"""
import sys
import os
# Add src to path for direct execution
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import pandas as pd
import numpy as np
def test_imports():
"""Test that all modules can be imported."""
print("Testing imports...")
try:
from inferq import get_default_registry, QualityMetricRegistry
from inferq.quality_metrics import (
compute_completeness,
compute_outlier_rate,
compute_duplicate_rate,
)
from inferq.utils import (
detect_outliers_iqr,
validate_range_constraint,
compute_data_profile,
)
print("✓ All imports successful")
return True
except Exception as e:
print(f"✗ Import failed: {e}")
return False
def test_basic_metrics():
"""Test basic metric computation."""
print("\nTesting basic metrics...")
try:
from inferq import get_default_registry
# Create sample data
df = pd.DataFrame({
'a': [1, 2, 3, None, 5],
'b': [10, 20, 30, 40, 50]
})
registry = get_default_registry()
# Test completeness
completeness = registry.compute('completeness', df)
assert 0.0 <= completeness <= 1.0, "Completeness out of range"
# Test outlier rate
outlier_rate = registry.compute('outlier_rate', df)
assert 0.0 <= outlier_rate <= 1.0, "Outlier rate out of range"
# Test duplicate rate
duplicate_rate = registry.compute('duplicate_rate', df)
assert 0.0 <= duplicate_rate <= 1.0, "Duplicate rate out of range"
print(f"✓ Basic metrics working (completeness: {completeness:.3f})")
return True
except Exception as e:
print(f"✗ Basic metrics failed: {e}")
return False
def test_registry():
"""Test registry functionality."""
print("\nTesting registry...")
try:
from inferq import get_default_registry
registry = get_default_registry()
# Check registry has metrics
assert len(registry) > 0, "Registry is empty"
# Check categories exist
categories = registry.list_categories()
assert 'completeness' in categories, "Missing completeness category"
assert 'outliers' in categories, "Missing outliers category"
# Check specific metrics exist
assert 'completeness' in registry, "Missing completeness metric"
assert 'outlier_rate' in registry, "Missing outlier_rate metric"
print(f"✓ Registry working ({len(registry)} metrics, {len(categories)} categories)")
return True
except Exception as e:
print(f"✗ Registry failed: {e}")
return False
def test_custom_metric():
"""Test custom metric registration."""
print("\nTesting custom metrics...")
try:
from inferq.quality_metrics import QualityMetricRegistry
registry = QualityMetricRegistry()
# Create and register custom metric
def my_metric(df):
return 0.75
registry.register_function(
name='test_metric',
func=my_metric,
category='test'
)
# Test it
df = pd.DataFrame({'a': [1, 2, 3]})
result = registry.compute('test_metric', df)
assert result == 0.75, "Custom metric returned wrong value"
print("✓ Custom metrics working")
return True
except Exception as e:
print(f"✗ Custom metrics failed: {e}")
return False
def test_constraints():
"""Test constraint validation."""
print("\nTesting constraint validation...")
try:
from inferq import get_default_registry
df = pd.DataFrame({
'age': [25, 30, -5, 150],
'status': ['active', 'inactive', 'active', 'invalid']
})
registry = get_default_registry()
# Test range constraint
constraints = [
{'type': 'range', 'column': 'age', 'min': 0, 'max': 120}
]
violations = registry.compute('constraint_violation', df, constraints=constraints)
assert violations > 0, "Should detect violations"
# Test enum constraint
constraints = [
{'type': 'enum', 'column': 'status', 'values': ['active', 'inactive']}
]
violations = registry.compute('constraint_violation', df, constraints=constraints)
assert violations > 0, "Should detect violations"
print("✓ Constraint validation working")
return True
except Exception as e:
print(f"✗ Constraint validation failed: {e}")
return False
def test_utils():
"""Test utility functions."""
print("\nTesting utilities...")
try:
from inferq.utils import (
detect_outliers_iqr,
validate_range_constraint,
normalize_numeric_data,
)
# Test outlier detection
data = pd.Series([1, 2, 3, 100])
outliers = detect_outliers_iqr(data)
assert outliers.any(), "Should detect outliers"
# Test range validation
valid = validate_range_constraint(data, min_val=0, max_val=50)
assert not valid.all(), "Should detect invalid values"
# Test normalization
normalized = normalize_numeric_data(data, method='minmax')
assert normalized.min() >= 0 and normalized.max() <= 1, "Normalization failed"
print("✓ Utilities working")
return True
except Exception as e:
print(f"✗ Utilities failed: {e}")
return False
def main():
"""Run all verification tests."""
print("=" * 70)
print("InferQ Installation Verification")
print("=" * 70)
tests = [
test_imports,
test_basic_metrics,
test_registry,
test_custom_metric,
test_constraints,
test_utils,
]
results = []
for test in tests:
results.append(test())
print("\n" + "=" * 70)
passed = sum(results)
total = len(results)
if passed == total:
print(f"✅ ALL TESTS PASSED ({passed}/{total})")
print("=" * 70)
print("\nInferQ is ready to use!")
print("Run 'python examples/basic_usage.py' to see it in action.")
return 0
else:
print(f"⚠️ SOME TESTS FAILED ({passed}/{total} passed)")
print("=" * 70)
return 1
if __name__ == '__main__':
sys.exit(main())