-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOCT_raw_data_inspection.py
More file actions
49 lines (42 loc) · 1.69 KB
/
OCT_raw_data_inspection.py
File metadata and controls
49 lines (42 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import polars as pl
import os
import sys
from datetime import datetime
DATA_DIR = "datasets/raw/full_scope/1h" # adjust if needed
TOKEN = "LINKUSDT"
EPS = 1e-9 # tiny epsilon to avoid divide by zero
def load_raw_link() -> pl.DataFrame:
path = os.path.join(DATA_DIR, f"{TOKEN}_1h_historical_data.csv")
if not os.path.exists(path):
sys.exit(f"ERROR: Missing file {path}")
df = pl.read_csv(path)
required = ["timestamp", "open", "high", "low", "close", "volume",
"quote_asset_volume", "number_of_trades",
"taker_buy_base_asset_volume", "taker_buy_quote_asset_volume"]
for c in required:
if c not in df.columns:
sys.exit(f"ERROR: column {c} missing in {TOKEN} file")
# Ensure timestamp parsed
df = df.with_columns(
pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
)
return df.sort("timestamp")
def compute_low_vs_prev_close(df: pl.DataFrame) -> pl.DataFrame:
"""
Compute log return of current low vs. previous close:
log(low) - log(prev_close).
"""
expr = (pl.col("low") + EPS).log() - (pl.col("close").shift(1) + EPS).log()
return df.with_columns(expr.alias("low_vs_prev_close"))
def flag_anomalies(df: pl.DataFrame, threshold: float = -5):
flagged = df.filter(pl.col("low_vs_prev_close") < threshold)
if flagged.is_empty():
print("No anomalies found.")
else:
print("Anomalies detected:")
for ts, val in zip(flagged["timestamp"], flagged["low_vs_prev_close"]):
print(f"Timestamp: {ts}, Value: {val}")
if __name__ == "__main__":
df = load_raw_link()
df = compute_low_vs_prev_close(df)
flag_anomalies(df, threshold=-5)