Stock/exercise.py at main · seriserious/Stock · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from transformers import pipeline
from keybert import KeyBERT
from datetime import datetime, timedelta
import pytz
import time
import csv
import yfinance as yf

# ========================================
# 📌 ETF 키워드 매핑 룰셋
# ========================================
ETF_RULES = {
    'oil':         {'ticker': 'XLE',  'comment': '유가 발언, 최근 흐름 따라 판단 → 조건부 매수/매도'},
    'energy':      {'ticker': 'XLE',  'comment': '에너지 섹터 언급 → 조건부 매수'},
    'tariff':      {'ticker': 'XLI',  'comment': '관세 정책 → 산업 보호 기대 → 매수'},
    'inflation':   {'ticker': 'XLF',  'comment': '금리 인상 우려 → 금융 약세 → 매도'},
    'interest':    {'ticker': 'XLF',  'comment': '금리 인하 압박 → 금융 약세 가능 → 매도'},
    'tech':        {'ticker': 'XLK',  'comment': '기술 낙관론이면 매수, 규제면 매도'},
    'AI':          {'ticker': 'XLK',  'comment': 'AI 호재 기대감 → 매수'},
    'healthcare':  {'ticker': 'XLV',  'comment': '의료 개입 언급 → 규제면 매도, 감세면 매수'},
    'virus':       {'ticker': 'XLV',  'comment': '보건 위기 → 단기 방어적 수요 → 매수'},
    'military':    {'ticker': 'XLI',  'comment': '국방 투자 증가 기대 → 산업 매수'},
    'defense':     {'ticker': 'XLI',  'comment': '국방 강화 메시지 → 산업 매수'},
    'media':       {'ticker': 'XLC',  'comment': '미디어 언급 증가 → 통신 섹터 수요 증가 → 매수'},
    'communications': {'ticker': 'XLC', 'comment': '언론·SNS 관련 언급 → XLC 연관'},
    'consumer':    {'ticker': 'XLY',  'comment': '소비 확대 발언 → 소비재(경기민감) 매수'},
    'retail':      {'ticker': 'XLY',  'comment': '소매 소비 회복 신호 → XLY 매수'},
    'electricity': {'ticker': 'XLU',  'comment': '전력망 안정성 강조 → XLU 방어적 수요'},
    'utilities':   {'ticker': 'XLU',  'comment': '공공요금/전력 언급 → XLU 중립 또는 매수'},
    'real estate': {'ticker': 'XLRE', 'comment': '부동산 언급 → 금리 흐름 따라 반대 방향 판단'},
    'mortgage':    {'ticker': 'XLRE', 'comment': '모기지 부담 해소 발언 → XLRE 매수'},
    'American stocks': {'ticker': 'IVV', 'comment': '미국 주식 매수 권유 → IVV 매수'},
    'stock market': {'ticker': 'IVV', 'comment': '시장 전반 호재 → IVV, QQQ, DIA 긍정'},
}

# ========================================
# 📌 1. 트윗 본문 추출 함수
# ========================================
def extract_text_from_article(article):
    try:
        spans = article.find_elements(By.XPATH, './/div[@data-testid="tweetText"]//span')
        return ' '.join([s.text for s in spans])
    except:
        return ''

# ========================================
# 📌 2. 트윗 수집기 (본문 + 작성시각 포함)
# ========================================
def get_trump_posts_with_timestamp(max_scrolls=5):
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--disable-blink-features=AutomationControlled")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    url = "https://twitter.com/realDonaldTrump"
    driver.get(url)
    time.sleep(3)

    collected = []

    for _ in range(max_scrolls):
        articles = driver.find_elements(By.XPATH, '//article[@data-testid="tweet"]')
        for article in articles:
            content = extract_text_from_article(article)
            try:
                time_tag = article.find_element(By.XPATH, './/time')
                utc_time_str = time_tag.get_attribute("datetime")
                utc_time = datetime.fromisoformat(utc_time_str.replace("Z", "+00:00"))
                est_time = utc_time.astimezone(pytz.timezone("US/Eastern"))
                timestamp = est_time.strftime("%Y-%m-%d %H:%M")
            except:
                timestamp = None

            if content and not any(p['text'] == content for p in collected):
                collected.append({"timestamp": timestamp, "text": content})

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    driver.quit()
    return collected

# ========================================
# 📌 3. NLP 분석기 (요약 + 키워드)
# ========================================
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
kw_model = KeyBERT(model="all-MiniLM-L6-v2")

def analyze_tweet(text, max_length=60):
    if len(text.split()) < 20:
        summary = text
    else:
        summary = summarizer(text, max_length=max_length, min_length=15, do_sample=False)[0]['summary_text']
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', top_n=5)
    keyword_list = [kw[0] for kw in keywords]
    return summary, keyword_list

# ========================================
# 📌 4. ETF 매핑 및 수익률 계산
# ========================================
def map_keywords_to_etfs(keywords):
    matched = []
    for kw in keywords:
        for rule, info in ETF_RULES.items():
            if rule.lower() in kw.lower():
                matched.append((info['ticker'], info['comment']))
    return list(set(matched))

def get_etf_returns(ticker, start_date):
    try:
        data = yf.download(ticker, start=start_date, end=start_date + timedelta(days=10))
        close = data['Close']
        base = close.iloc[0]
        return {
            '1d': round((close.shift(-1) / base - 1).iloc[0] * 100, 2) if len(close) > 1 else None,
            '3d': round((close.shift(-3) / base - 1).iloc[0] * 100, 2) if len(close) > 3 else None,
            '7d': round((close.shift(-7) / base - 1).iloc[0] * 100, 2) if len(close) > 7 else None,
        }
    except:
        return {'1d': None, '3d': None, '7d': None}

# ========================================
# ✅ 메인 실행 흐름
# ========================================
if __name__ == "__main__":
    print("🚀 트럼프 트윗 수집 중...")
    posts = get_trump_posts_with_timestamp(max_scrolls=10)

    print("🧠 NLP 분석 및 수익률 계산:")
    for i, post in enumerate(posts, 1):
        print(f"\n[{i}번 트윗]")
        print()

        print("🕒 작성시각:", post['timestamp'])
        print()

        print("📌 원문:", post['text'].strip())
        print()

        summary, keywords = analyze_tweet(post['text'])
        print("📝 요약:", summary)
        print()

        print("🔑 키워드:", keywords)
        print()
        mapped = map_keywords_to_etfs(keywords)
        if mapped:
            dt = datetime.strptime(post['timestamp'], "%Y-%m-%d %H:%M")
            for ticker, comment in mapped:
                returns = get_etf_returns(ticker, dt)
                print(f"➡️ 관련 ETF: {ticker} | {comment}")
                print(f"📈 수익률: +1d: {returns['1d']}% | +3d: {returns['3d']}% | +7d: {returns['7d']}%")
        else:
            print("⚠️ 키워드와 매핑된 ETF 없음")