-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathzad33.py
More file actions
84 lines (60 loc) · 2.16 KB
/
zad33.py
File metadata and controls
84 lines (60 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import random
import matplotlib.pyplot as plt
class EstimateUniqueWithKStatistics:
def __init__(self, k: int):
self.k = k
self.smallest_hashes = []
# Dowód, że to k-niezlaeżna rodzina funkcji: https://www.mimuw.edu.pl/~kowalik/teach/mpa/hash1.pdf
self.p = 2**89 - 1
self.a = random.randint(1, self.p - 1)
self.b = random.randint(0, self.p - 1)
def _hash(self, x: int) -> float:
h_int = (self.a * x + self.b) % self.p
return h_int / self.p
def add(self, item):
h_val = self._hash(item)
if len(self.smallest_hashes) == self.k and h_val >= self.smallest_hashes[-1]:
return
if h_val not in self.smallest_hashes:
self.smallest_hashes.append(h_val)
self.smallest_hashes.sort()
if len(self.smallest_hashes) > self.k:
self.smallest_hashes.pop()
def estimate(self) -> int:
if len(self.smallest_hashes) < self.k:
return len(self.smallest_hashes)
U_k = self.smallest_hashes[-1]
if U_k == 0: return 0
return int((self.k - 1) / U_k)
k = 400
true_n_limit = 20000
steps = 50
euk = EstimateUniqueWithKStatistics(k)
true_counts = []
estimated_counts = []
relative_errors = []
stream = range(true_n_limit)
for i, item in enumerate(stream):
euk.add(item)
current_n = i + 1
est = euk.estimate()
true_counts.append(current_n)
estimated_counts.append(est)
err = (est - current_n) / current_n
relative_errors.append(err)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
ax1.plot(true_counts, true_counts, 'g-', label='Rzeczywista liczność', linewidth=2)
ax1.plot(true_counts, estimated_counts, 'b--', label='Estymacja', alpha=0.8)
ax1.set_title(f'Dokładność estymacji')
ax1.set_ylabel('Liczba unikalnych elementów')
ax1.legend()
ax1.grid(True)
ax2.plot(true_counts, relative_errors, 'r.-')
ax2.axhline(0, color='black', lw=1)
ax2.set_title('Błąd względny estymacji')
ax2.set_xlabel('Liczba przetworzonych elementów')
ax2.set_ylabel('Błąd')
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.show()