-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathzad26.py
More file actions
111 lines (85 loc) · 2.83 KB
/
zad26.py
File metadata and controls
111 lines (85 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import random
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import chisquare
import collections
class ReservoirSampler:
def __init__(self):
self.sample = None
self.count = 0
def add(self, item, global_index):
self.count += 1
if random.random() < (1.0 / self.count):
self.sample = (item, global_index)
def get_content(self):
return self.sample
class EquivalentWidthPartitions:
def __init__(self, window_size):
self.W = window_size
self.t = 0
self.bucket_A = None
self.bucket_C = ReservoirSampler()
def process(self, item):
self.bucket_C.add(item, self.t)
self.t += 1
if self.bucket_C.count >= self.W:
self.bucket_A = self.bucket_C
self.bucket_C = ReservoirSampler()
def get_window_sample(self):
if self.t < self.W:
return self.bucket_C.get_content()
window_start = self.t - self.W
if self.bucket_A:
sample_A = self.bucket_A.get_content()
if sample_A is not None:
_, idx = sample_A
if idx >= window_start:
return sample_A
return self.bucket_C.get_content()
W_hist = 5
N_hist = 10000
sampler = EquivalentWidthPartitions(W_hist)
positions = []
for i in range(N_hist):
sampler.process(i)
if i >= W_hist:
res = sampler.get_window_sample()
if res is not None:
idx_sampled = res[1]
relative_pos = i - idx_sampled
positions.append(relative_pos)
plt.figure(figsize=(10, 6))
plt.hist(positions, bins=range(W_hist + 1), density=True, edgecolor='black', label='Symulacja')
plt.title(f"Histogram ppb pozycji wylosowanego elementu \nWindow={W_hist}, Iteracje={N_hist}")
plt.xlabel("Pozycja w oknie")
plt.ylabel("Gęstość")
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
W_test = 5
N_test = 10000
sampler_test = EquivalentWidthPartitions(W_test)
observed_counts = collections.defaultdict(int)
valid_samples = 0
for i in range(N_test):
sampler_test.process(i)
if i >= W_test:
res = sampler_test.get_window_sample()
if res is not None:
idx_sampled = res[1]
pos = i - idx_sampled
if 0 <= pos < W_test:
observed_counts[pos] += 1
valid_samples += 1
obs = [observed_counts[j] for j in range(W_test)]
exp = [valid_samples / W_test] * W_test
chi2, p_val = chisquare(obs, exp)
critical_val = 11.345
print(f"\nWynik testu:")
print(f"Chi2 stat: {chi2:.4f}")
print(f"p-value: {p_val:.4f}")
print(f"Wartość krytyczna: {critical_val}")
if chi2 < critical_val:
print(f"Nie ma podstaw do odrzucenia H0 - rozkład jest jednostajny")
else:
print(f"Rozkład nie jest jednostajny.")