-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbd_explore_1.py
More file actions
67 lines (58 loc) · 2.27 KB
/
bd_explore_1.py
File metadata and controls
67 lines (58 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import MDS
DATA_DIR = "datasets/raw/1h"
def load_asset_closes_polars(folder_path):
dfs = []
for filename in os.listdir(folder_path):
if filename.endswith(".csv"):
asset_name = filename.replace(".csv", "")
filepath = os.path.join(folder_path, filename)
try:
df = pl.read_csv(filepath, columns=["timestamp", "close"]).rename({"close": asset_name})
df = df.with_columns(
pl.col("timestamp").str.strptime(pl.Datetime) # Let Polars auto-detect
)
dfs.append(df)
except Exception as e:
print(f"Skipping {filename}: {e}")
return dfs
def merge_assets_polars(dfs):
if not dfs:
raise ValueError("No dataframes were loaded. Check for parse errors or missing files.")
df_merged = dfs[0]
for df in dfs[1:]:
df_merged = df_merged.join(df, on="timestamp", how="full", coalesce=True)
return df_merged.sort("timestamp")
def compute_correlation_matrix(df_merged):
df_pandas = df_merged.to_pandas().set_index("timestamp")
return df_pandas.corr()
def plot_correlation_heatmap(corr_matrix):
plt.figure(figsize=(16, 14))
sns.heatmap(corr_matrix, cmap='coolwarm', center=0, square=True)
plt.title("Asset Correlation Heatmap")
plt.tight_layout()
plt.show()
def plot_mds_projection(corr_matrix):
dissimilarity = 1 - corr_matrix.fillna(0).abs()
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=42)
pos = mds.fit_transform(dissimilarity)
plt.figure(figsize=(12, 10))
for i, name in enumerate(corr_matrix.columns):
plt.scatter(pos[i, 0], pos[i, 1], label=name)
plt.text(pos[i, 0], pos[i, 1], name, fontsize=8)
plt.title("MDS 2D Projection of Asset Correlations")
plt.grid(True)
plt.tight_layout()
plt.show()
# --- MAIN ---
dfs = load_asset_closes_polars(DATA_DIR)
if not dfs:
print("🚨 No data loaded! Please check your dataset files and format.")
else:
df_merged = merge_assets_polars(dfs)
corr_matrix = compute_correlation_matrix(df_merged)
plot_correlation_heatmap(corr_matrix)
plot_mds_projection(corr_matrix)