-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingest.py
More file actions
49 lines (40 loc) · 1.23 KB
/
ingest.py
File metadata and controls
49 lines (40 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gzip
import json
import io
import pandas as pd
from datetime import datetime, timedelta
from qcloud_cos import CosConfig, CosS3Client
from dotenv import load_dotenv
import os
load_dotenv()
config = CosConfig(
Region=os.getenv("COS_REGION"),
SecretId=os.getenv("TENCENT_SECRET_ID"),
SecretKey=os.getenv("TENCENT_SECRET_KEY")
)
client = CosS3Client(config)
def fetch_logs(days_back=0):
date = datetime.now() - timedelta(days=days_back)
prefix = f"{os.getenv('COS_LOG_PREFIX')}/{date.strftime('%Y%m%d')}/"
response = client.list_objects(
Bucket=os.getenv("COS_BUCKET"),
Prefix=prefix
)
logs = []
for obj in response.get("Contents", []):
key = obj["Key"]
if not key.endswith(".gz"):
continue
res = client.get_object(Bucket=os.getenv("COS_BUCKET"), Key=key)
compressed = res["Body"].get_raw_stream().read()
with gzip.open(io.BytesIO(compressed)) as f:
for line in f:
try:
logs.append(json.loads(line))
except:
pass
return pd.DataFrame(logs)
if __name__ == "__main__":
df = fetch_logs()
print(df.head())
print(f"\n총 {len(df)} 개 로그 로드됨")