From 262cd044b921e3c2fa1c49fae21238d4eb1877a8 Mon Sep 17 00:00:00 2001
From: twq110 <aaa11093@gmail.com>
Date: Mon, 6 Apr 2026 14:55:32 +0900
Subject: [PATCH] =?UTF-8?q?[AI]=20[FEAT]=20=EA=B3=84=EC=A0=95=20ID=20?=
 =?UTF-8?q?=EC=83=9D=EC=84=B1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../compare_transformer_feature_schemas.py    | 498 ++++++++++++++++++
 ...mer_refactor_change_analysis_2026-04-03.md | 119 +++++
 .../run_config.json                           |  23 +
 AI/config/trading.default.json                |   1 +
 AI/config/trading.py                          |   4 +
 AI/libs/database/repository.py                | 138 ++++-
 AI/pipelines/daily_routine.py                 |   5 +-
 schema.sql                                    |  58 +-
 8 files changed, 820 insertions(+), 26 deletions(-)
 create mode 100644 AI/backtests/compare_transformer_feature_schemas.py
 create mode 100644 AI/backtests/out/transformer_refactor_change_analysis_2026-04-03.md
 create mode 100644 AI/backtests/out/transformer_schema_compare_smoke/run_config.json

diff --git a/AI/backtests/compare_transformer_feature_schemas.py b/AI/backtests/compare_transformer_feature_schemas.py
new file mode 100644
index 00000000..3edc5563
--- /dev/null
+++ b/AI/backtests/compare_transformer_feature_schemas.py
@@ -0,0 +1,498 @@
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import pickle
+import random
+import sys
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.append(str(PROJECT_ROOT))
+
+from AI.modules.signal.core.data_loader import DataLoader
+from AI.modules.signal.models.transformer.architecture import build_transformer_model
+
+
+PRE307_DYNAMIC_23 = [
+    "log_return",
+    "open_ratio",
+    "high_ratio",
+    "low_ratio",
+    "vol_change",
+    "ma_5_ratio",
+    "ma_20_ratio",
+    "ma_60_ratio",
+    "rsi",
+    "macd_ratio",
+    "bb_position",
+    "us10y",
+    "yield_spread",
+    "vix_close",
+    "dxy_close",
+    "credit_spread_hy",
+    "nh_nl_index",
+    "ma200_pct",
+    "sentiment_score",
+    "risk_keyword_cnt",
+    "per",
+    "pbr",
+    "roe",
+]
+
+CURRENT_FIXED_17 = [
+    "log_return",
+    "open_ratio",
+    "high_ratio",
+    "low_ratio",
+    "vol_change",
+    "ma5_ratio",
+    "ma20_ratio",
+    "ma60_ratio",
+    "rsi",
+    "macd_ratio",
+    "bb_position",
+    "week_ma20_ratio",
+    "week_rsi",
+    "week_bb_pos",
+    "week_vol_change",
+    "month_ma12_ratio",
+    "month_rsi",
+]
+
+
+@dataclass
+class TrainArtifacts:
+    schema_name: str
+    requested_features: list[str]
+    effective_features: list[str]
+    n_features: int
+    n_samples: int
+    epochs_ran: int
+    history_csv: str
+    summary_json: str
+    model_path: str
+    scaler_path: str
+    best_val_loss: float | None
+    best_epoch: int | None
+    final_val_loss: float | None
+
+
+def _set_global_seed(seed: int) -> None:
+    random.seed(seed)
+    np.random.seed(seed)
+    tf.keras.utils.set_random_seed(seed)
+    try:
+        tf.config.experimental.enable_op_determinism()
+    except Exception:
+        pass
+
+
+def _safe_float(value: Any) -> float | None:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _parse_horizons(raw_horizons: str) -> list[int]:
+    horizons = [int(item.strip()) for item in raw_horizons.split(",") if item.strip()]
+    if not horizons:
+        raise ValueError("prediction horizons must not be empty.")
+    return horizons
+
+
+def _prepare_output_dir(output_dir: str | None) -> Path:
+    if output_dir:
+        target = Path(output_dir).resolve()
+    else:
+        stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        target = (PROJECT_ROOT / "AI" / "backtests" / "out" / f"transformer_schema_compare_{stamp}").resolve()
+    target.mkdir(parents=True, exist_ok=True)
+    return target
+
+
+def _fit_single_schema(
+    *,
+    schema_name: str,
+    requested_features: list[str],
+    case_output_dir: Path,
+    db_name: str,
+    lookback: int,
+    horizons: list[int],
+    data_start_date: str,
+    train_end_date: str,
+    test_size: float,
+    split_seed: int,
+    global_seed: int,
+    epochs: int,
+    batch_size: int,
+    patience: int,
+    lr_patience: int,
+    lr_factor: float,
+    min_lr: float,
+    verbose: int,
+) -> TrainArtifacts:
+    _set_global_seed(global_seed)
+    tf.keras.backend.clear_session()
+
+    loader = DataLoader(db_name=db_name, lookback=lookback, horizons=horizons)
+    full_df = loader.load_data_from_db(start_date=data_start_date)
+    raw_df = full_df[full_df["date"] <= train_end_date].copy()
+    if raw_df.empty:
+        raise ValueError(f"[{schema_name}] no rows found in selected training range.")
+
+    X_ts, X_ticker, X_sector, y_class, _, info = loader.create_dataset(raw_df, feature_columns=requested_features)
+    if len(y_class) == 0:
+        raise ValueError(f"[{schema_name}] dataset is empty after preprocessing.")
+
+    (
+        X_ts_train,
+        X_ts_val,
+        X_tick_train,
+        X_tick_val,
+        X_sec_train,
+        X_sec_val,
+        y_train,
+        y_val,
+    ) = train_test_split(
+        X_ts,
+        X_ticker,
+        X_sector,
+        y_class,
+        test_size=test_size,
+        shuffle=True,
+        random_state=split_seed,
+    )
+
+    model = build_transformer_model(
+        input_shape=(X_ts.shape[1], X_ts.shape[2]),
+        n_tickers=info["n_tickers"],
+        n_sectors=info["n_sectors"],
+        n_outputs=len(info.get("horizons", horizons)),
+    )
+
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
+
+    case_output_dir.mkdir(parents=True, exist_ok=True)
+    model_path = case_output_dir / "multi_horizon_model.keras"
+    scaler_path = case_output_dir / "multi_horizon_scaler.pkl"
+
+    callbacks = [
+        ModelCheckpoint(
+            filepath=str(model_path),
+            monitor="val_loss",
+            save_best_only=True,
+            verbose=1 if verbose > 0 else 0,
+        ),
+        EarlyStopping(
+            monitor="val_loss",
+            patience=patience,
+            restore_best_weights=True,
+        ),
+        ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=lr_factor,
+            patience=lr_patience,
+            min_lr=min_lr,
+            verbose=1 if verbose > 0 else 0,
+        ),
+    ]
+
+    history = model.fit(
+        x=[X_ts_train, X_tick_train, X_sec_train],
+        y=y_train,
+        validation_data=([X_ts_val, X_tick_val, X_sec_val], y_val),
+        epochs=epochs,
+        batch_size=batch_size,
+        shuffle=True,
+        callbacks=callbacks,
+        verbose=verbose,
+    )
+
+    with scaler_path.open("wb") as handle:
+        pickle.dump(info["scaler"], handle)
+
+    history_df = pd.DataFrame(history.history)
+    history_df.insert(0, "epoch", np.arange(1, len(history_df) + 1))
+    history_csv = case_output_dir / "history.csv"
+    history_df.to_csv(history_csv, index=False)
+
+    val_loss_series = history_df["val_loss"] if "val_loss" in history_df.columns else pd.Series(dtype="float64")
+    if val_loss_series.empty:
+        best_val_loss = None
+        best_epoch = None
+        final_val_loss = None
+    else:
+        min_idx = int(val_loss_series.idxmin())
+        best_val_loss = _safe_float(val_loss_series.iloc[min_idx])
+        best_epoch = int(history_df.loc[min_idx, "epoch"])
+        final_val_loss = _safe_float(val_loss_series.iloc[-1])
+
+    summary = {
+        "schema_name": schema_name,
+        "requested_features": requested_features,
+        "effective_features": info.get("feature_names", []),
+        "n_features": int(info.get("n_features", 0)),
+        "n_samples": int(len(y_class)),
+        "lookback": int(lookback),
+        "horizons": [int(h) for h in info.get("horizons", horizons)],
+        "train_rows": int(len(raw_df)),
+        "train_date_min": str(raw_df["date"].min()),
+        "train_date_max": str(raw_df["date"].max()),
+        "epochs_ran": int(len(history_df)),
+        "best_val_loss": best_val_loss,
+        "best_epoch": best_epoch,
+        "final_val_loss": final_val_loss,
+        "history_csv": str(history_csv),
+        "model_path": str(model_path),
+        "scaler_path": str(scaler_path),
+    }
+
+    summary_json = case_output_dir / "summary.json"
+    summary_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False), encoding="utf-8")
+
+    return TrainArtifacts(
+        schema_name=schema_name,
+        requested_features=list(requested_features),
+        effective_features=list(info.get("feature_names", [])),
+        n_features=int(info.get("n_features", 0)),
+        n_samples=int(len(y_class)),
+        epochs_ran=int(len(history_df)),
+        history_csv=str(history_csv),
+        summary_json=str(summary_json),
+        model_path=str(model_path),
+        scaler_path=str(scaler_path),
+        best_val_loss=best_val_loss,
+        best_epoch=best_epoch,
+        final_val_loss=final_val_loss,
+    )
+
+
+def _save_curve_plot(
+    *,
+    output_dir: Path,
+    dynamic_df: pd.DataFrame,
+    fixed_df: pd.DataFrame,
+) -> str | None:
+    try:
+        import matplotlib.pyplot as plt
+    except Exception:
+        return None
+
+    if "val_loss" not in dynamic_df.columns or "val_loss" not in fixed_df.columns:
+        return None
+
+    figure_path = output_dir / "val_loss_curve.png"
+    plt.figure(figsize=(10, 6))
+    plt.plot(dynamic_df["epoch"], dynamic_df["val_loss"], label="dynamic23")
+    plt.plot(fixed_df["epoch"], fixed_df["val_loss"], label="fixed17")
+    plt.xlabel("Epoch")
+    plt.ylabel("Validation Loss")
+    plt.title("Transformer Val Loss Curve: dynamic23 vs fixed17")
+    plt.grid(alpha=0.3)
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(figure_path, dpi=140)
+    plt.close()
+    return str(figure_path)
+
+
+def _write_comparison_report(
+    *,
+    output_dir: Path,
+    dynamic_artifacts: TrainArtifacts,
+    fixed_artifacts: TrainArtifacts,
+    curve_csv: Path,
+    curve_plot: str | None,
+) -> Path:
+    dynamic_best = dynamic_artifacts.best_val_loss
+    fixed_best = fixed_artifacts.best_val_loss
+    if dynamic_best is None or fixed_best is None:
+        best_gap = None
+    else:
+        best_gap = fixed_best - dynamic_best
+
+    comparison = {
+        "dynamic23": asdict(dynamic_artifacts),
+        "fixed17": asdict(fixed_artifacts),
+        "best_val_loss_gap_fixed17_minus_dynamic23": best_gap,
+        "val_curve_csv": str(curve_csv),
+        "val_curve_plot": curve_plot,
+    }
+    comparison_json = output_dir / "comparison_summary.json"
+    comparison_json.write_text(json.dumps(comparison, indent=2, ensure_ascii=False), encoding="utf-8")
+
+    lines = [
+        "# Transformer Feature Schema Retrain Comparison",
+        "",
+        f"- dynamic23 best val_loss: {dynamic_artifacts.best_val_loss}",
+        f"- fixed17 best val_loss: {fixed_artifacts.best_val_loss}",
+        f"- best val_loss gap (fixed17 - dynamic23): {best_gap}",
+        f"- dynamic23 epochs ran: {dynamic_artifacts.epochs_ran}",
+        f"- fixed17 epochs ran: {fixed_artifacts.epochs_ran}",
+        f"- dynamic23 best epoch: {dynamic_artifacts.best_epoch}",
+        f"- fixed17 best epoch: {fixed_artifacts.best_epoch}",
+        "",
+        "## Artifacts",
+        f"- curve csv: {curve_csv}",
+        f"- curve plot: {curve_plot if curve_plot else 'not generated (matplotlib unavailable or val_loss missing)'}",
+        f"- dynamic23 summary: {dynamic_artifacts.summary_json}",
+        f"- fixed17 summary: {fixed_artifacts.summary_json}",
+        f"- comparison json: {comparison_json}",
+    ]
+    report_path = output_dir / "comparison_report.md"
+    report_path.write_text("\n".join(lines), encoding="utf-8")
+    return report_path
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Retrain transformer with pre-#307 dynamic23 vs current fixed17 and compare val curves.",
+    )
+    parser.add_argument("--db-name", default="db", help="Database connection profile name.")
+    parser.add_argument("--lookback", type=int, default=60, help="Sequence length.")
+    parser.add_argument("--horizons", default="1,3,5,7", help="Prediction horizons, comma separated.")
+    parser.add_argument("--data-start-date", default="2015-01-01", help="Data fetch start date (YYYY-MM-DD).")
+    parser.add_argument("--train-end-date", default="2023-12-31", help="Training cutoff date (YYYY-MM-DD).")
+    parser.add_argument("--epochs", type=int, default=50, help="Max epochs.")
+    parser.add_argument("--batch-size", type=int, default=32, help="Batch size.")
+    parser.add_argument("--test-size", type=float, default=0.2, help="Validation split ratio.")
+    parser.add_argument("--split-seed", type=int, default=42, help="Seed for train/val split.")
+    parser.add_argument("--global-seed", type=int, default=42, help="Global random seed.")
+    parser.add_argument("--patience", type=int, default=10, help="EarlyStopping patience.")
+    parser.add_argument("--lr-patience", type=int, default=5, help="ReduceLROnPlateau patience.")
+    parser.add_argument("--lr-factor", type=float, default=0.5, help="ReduceLROnPlateau factor.")
+    parser.add_argument("--min-lr", type=float, default=1e-6, help="Minimum learning rate.")
+    parser.add_argument("--verbose", type=int, default=2, choices=[0, 1, 2], help="Keras fit verbosity.")
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Optional output directory. Default: AI/backtests/out/transformer_schema_compare_<timestamp>",
+    )
+    return parser
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    output_dir = _prepare_output_dir(args.output_dir)
+    horizons = _parse_horizons(args.horizons)
+
+    run_config = {
+        "db_name": args.db_name,
+        "lookback": args.lookback,
+        "horizons": horizons,
+        "data_start_date": args.data_start_date,
+        "train_end_date": args.train_end_date,
+        "epochs": args.epochs,
+        "batch_size": args.batch_size,
+        "test_size": args.test_size,
+        "split_seed": args.split_seed,
+        "global_seed": args.global_seed,
+        "patience": args.patience,
+        "lr_patience": args.lr_patience,
+        "lr_factor": args.lr_factor,
+        "min_lr": args.min_lr,
+        "verbose": args.verbose,
+        "output_dir": str(output_dir),
+    }
+    (output_dir / "run_config.json").write_text(json.dumps(run_config, indent=2, ensure_ascii=False), encoding="utf-8")
+
+    print(f"[SchemaCompare] Output directory: {output_dir}")
+    print("[SchemaCompare] Training case 1/2: dynamic23 (pre-#307 schema)")
+    dynamic_artifacts = _fit_single_schema(
+        schema_name="dynamic23",
+        requested_features=PRE307_DYNAMIC_23,
+        case_output_dir=output_dir / "dynamic23",
+        db_name=args.db_name,
+        lookback=args.lookback,
+        horizons=horizons,
+        data_start_date=args.data_start_date,
+        train_end_date=args.train_end_date,
+        test_size=args.test_size,
+        split_seed=args.split_seed,
+        global_seed=args.global_seed,
+        epochs=args.epochs,
+        batch_size=args.batch_size,
+        patience=args.patience,
+        lr_patience=args.lr_patience,
+        lr_factor=args.lr_factor,
+        min_lr=args.min_lr,
+        verbose=args.verbose,
+    )
+
+    print("[SchemaCompare] Training case 2/2: fixed17 (current schema)")
+    fixed_artifacts = _fit_single_schema(
+        schema_name="fixed17",
+        requested_features=CURRENT_FIXED_17,
+        case_output_dir=output_dir / "fixed17",
+        db_name=args.db_name,
+        lookback=args.lookback,
+        horizons=horizons,
+        data_start_date=args.data_start_date,
+        train_end_date=args.train_end_date,
+        test_size=args.test_size,
+        split_seed=args.split_seed,
+        global_seed=args.global_seed,
+        epochs=args.epochs,
+        batch_size=args.batch_size,
+        patience=args.patience,
+        lr_patience=args.lr_patience,
+        lr_factor=args.lr_factor,
+        min_lr=args.min_lr,
+        verbose=args.verbose,
+    )
+
+    dynamic_df = pd.read_csv(dynamic_artifacts.history_csv)
+    fixed_df = pd.read_csv(fixed_artifacts.history_csv)
+
+    curve_df = pd.DataFrame({"epoch": np.arange(1, max(len(dynamic_df), len(fixed_df)) + 1)})
+    curve_df = curve_df.merge(
+        dynamic_df[["epoch", "val_loss"]].rename(columns={"val_loss": "dynamic23_val_loss"}),
+        on="epoch",
+        how="left",
+    )
+    curve_df = curve_df.merge(
+        fixed_df[["epoch", "val_loss"]].rename(columns={"val_loss": "fixed17_val_loss"}),
+        on="epoch",
+        how="left",
+    )
+    curve_csv = output_dir / "val_curve_comparison.csv"
+    curve_df.to_csv(curve_csv, index=False)
+
+    curve_plot = _save_curve_plot(output_dir=output_dir, dynamic_df=dynamic_df, fixed_df=fixed_df)
+    report_path = _write_comparison_report(
+        output_dir=output_dir,
+        dynamic_artifacts=dynamic_artifacts,
+        fixed_artifacts=fixed_artifacts,
+        curve_csv=curve_csv,
+        curve_plot=curve_plot,
+    )
+
+    print("[SchemaCompare] Completed.")
+    print(f"[SchemaCompare] Comparison report: {report_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/AI/backtests/out/transformer_refactor_change_analysis_2026-04-03.md b/AI/backtests/out/transformer_refactor_change_analysis_2026-04-03.md
new file mode 100644
index 00000000..a55e7e81
--- /dev/null
+++ b/AI/backtests/out/transformer_refactor_change_analysis_2026-04-03.md
@@ -0,0 +1,119 @@
+# Transformer 변경점 심층 분석 (기준: 2026-04-03)
+
+## 1) 분석 범위
+- 비교 기준: `20260314 #307 daily routine 코드 리펙토링` (merge commit `5d752d2`) 전후 + 이후 관련 커밋
+- 확인 파일:
+  - `AI/modules/signal/models/transformer/train.py`
+  - `AI/modules/signal/core/data_loader.py`
+  - `AI/modules/signal/models/transformer/wrapper.py`
+  - `AI/pipelines/components/model_manager.py`
+  - `AI/modules/signal/core/artifact_paths.py`
+  - `AI/modules/finder/screener.py`
+
+## 2) 핵심 타임라인 (Transformer 관점)
+- `6fd3b39` (`#307` 브랜치 내부):  
+  - `train.py` import 경로 변경  
+    - `PatchTST.architecture` -> `transformer.architecture`
+  - 기본 저장 경로를 `.../transformer/tests/`로 변경
+- `c9ed165` (`#307` 브랜치 내부):  
+  - `TRANSFORMER_TRAIN_FEATURES` 17개 고정 도입
+  - `create_dataset(..., feature_columns=TRANSFORMER_TRAIN_FEATURES)` 도입
+  - `DataLoader`에서 `add_multi_timeframe_features()` 실제 호출 시작
+- `867999e` (`#307` 브랜치 내부):  
+  - wrapper에 입력 길이 검증 추가 (`len(df) < seq_len` 방어)
+- `5d752d2` (main에 squash merge): 위 3개 변화가 사실상 반영
+- `96a4640`:
+  - config 구조화(`trading.py`, `trading.default.json`)
+- `ae48e58`:
+  - legacy h5 체크포인트의 input shape 추론 로직 추가
+- `92bc3b3`:
+  - artifact path 해석 로직(`artifact_paths.py`) 도입
+  - `AI_MODEL_WEIGHTS_DIR` 환경변수 기반 아티팩트 루트 전환 가능
+
+## 3) 피처 스키마 변화 (가장 큰 변화)
+
+### 이전(동적 후보, `#307` 직전 `5d752d2^`)
+`data_loader.py`의 `potential_features` 23개:
+- `log_return, open_ratio, high_ratio, low_ratio, vol_change`
+- `ma_5_ratio, ma_20_ratio, ma_60_ratio`
+- `rsi, macd_ratio, bb_position`
+- `us10y, yield_spread, vix_close, dxy_close, credit_spread_hy`
+- `nh_nl_index, ma200_pct`
+- `sentiment_score, risk_keyword_cnt`
+- `per, pbr, roe`
+
+### 현재(고정 17, `train.py`)
+- `log_return, open_ratio, high_ratio, low_ratio, vol_change`
+- `ma5_ratio, ma20_ratio, ma60_ratio`
+- `rsi, macd_ratio, bb_position`
+- `week_ma20_ratio, week_rsi, week_bb_pos, week_vol_change`
+- `month_ma12_ratio, month_rsi`
+
+### Set 비교
+- 공통 8개:
+  - `log_return, open_ratio, high_ratio, low_ratio, vol_change, rsi, macd_ratio, bb_position`
+- 이전 대비 제거 15개:
+  - `ma_5_ratio, ma_20_ratio, ma_60_ratio`
+  - `us10y, yield_spread, vix_close, dxy_close, credit_spread_hy`
+  - `nh_nl_index, ma200_pct`
+  - `sentiment_score, risk_keyword_cnt`
+  - `per, pbr, roe`
+- 현재에서 신규 9개:
+  - `ma5_ratio, ma20_ratio, ma60_ratio`
+  - `week_ma20_ratio, week_rsi, week_bb_pos, week_vol_change`
+  - `month_ma12_ratio, month_rsi`
+
+## 4) 중요한 정합성 포인트
+
+### (A) MA 피처 네이밍 불일치 이슈의 구조
+- `add_technical_indicators()`는 `ma{window}_ratio` (예: `ma5_ratio`) 생성
+- 과거 동적 후보에는 `ma_5_ratio` 형태가 들어가 있어, 동적 경로에서는 MA 3개가 빠질 여지가 있었음
+- `#307` 이후 고정 17로 `ma5_ratio`를 직접 지정하면서 이 불일치가 실질 해소됨
+
+### (B) 멀티타임프레임 피처의 “정의는 있었지만 사용은 안 됨” -> “사용 시작”
+- `add_multi_timeframe_features()` 함수 자체는 이전에도 존재
+- 다만 `DataLoader.create_dataset()`에서 실제 호출된 것은 `#307` 라인에서 시작
+- 즉, 주봉/월봉 6개 피처는 `#307` 이후 학습 입력에 본격 반영
+
+### (C) 아티팩트 경로/로드 방식 변화
+- `#307` 이후 + 후속 커밋에서 아래가 바뀜:
+  - 테스트/프로덕션 파일명 분기 (`*_test`, `*_prod`)
+  - 환경변수 기반 weights 루트 전환
+  - legacy `.keras`(실제 h5) 로드 폴백 강화
+- 따라서 “같은 코드라도 어떤 체크포인트를 읽었는지”가 결과에 크게 영향 가능
+
+## 5) 현재 실제 스케일러 상태 확인 (로컬 파일 기반)
+
+확인 파일:
+- `AI/data/weights/transformer/tests/multi_horizon_scaler_test.pkl`
+- `AI/data/weights/transformer/prod/multi_horizon_scaler_prod.pkl`
+
+공통 결과:
+- `n_features_in_ = 17`
+- `feature_names_in_`가 현재 고정 17과 동일
+
+관찰 포인트:
+- `vol_change`, `week_vol_change`의 max가 매우 큼 (긴 꼬리)
+  - test: `vol_change max ~15420`, `week_vol_change max ~25361`
+  - prod: `vol_change max ~15420`, `week_vol_change max ~120508`
+- MinMaxScaler 사용 시 극단치가 있으면 대부분 샘플이 좁은 구간으로 압축될 수 있음
+
+## 6) “val_loss=0.690 정체”에 대한 코드 관점 해석
+- BCE에서 `0.690~0.693` 고정은 보통 확률 0.5 근처(랜덤 수준) 신호
+- 이번 코드 이력에서 그럴 만한 강한 변화는 아래 3가지:
+  1. 입력 스키마가 사실상 재정의됨(23 동적 혼합 -> 17 고정 기술/멀티타임프레임)
+  2. MA 네이밍 정합성 정리로 실제 입력 컬럼 조합이 바뀜
+  3. 체크포인트/스케일러 경로 분기 변경으로 다른 아티팩트를 읽었을 가능성
+
+즉, “단순 리팩토링”이 아니라 **학습 입력 분포와 로딩 경로가 동시에 바뀐 리팩토링**에 가까움.
+
+## 7) 추가 확인 시도 결과
+- DB 기반 재현 실행은 현재 환경에서 실패:
+  - `localhost:15432` PostgreSQL 연결 거부
+- 따라서 샘플 수/실제 선택 컬럼/라벨 분포를 런타임으로 재현하지는 못했고, 코드/커밋/스케일러 파일 단서 중심으로 분석함.
+
+## 8) 스크리너 확장 관련 별도 관찰 (현재 코드)
+- `AI/config/trading.py`에 확장 필드(`include_tickers`, `exclude_tickers`, `sticky_slots`, 가중치 등)는 존재
+- `AI/modules/finder/screener.py`의 쿼리에는 아직 미반영
+- 따라서 “GEV 같은 특정 티커 보존/우선” 요구는 현재 config만으로는 완전 제어 불가
+
diff --git a/AI/backtests/out/transformer_schema_compare_smoke/run_config.json b/AI/backtests/out/transformer_schema_compare_smoke/run_config.json
new file mode 100644
index 00000000..d9a0fa5d
--- /dev/null
+++ b/AI/backtests/out/transformer_schema_compare_smoke/run_config.json
@@ -0,0 +1,23 @@
+{
+  "db_name": "db",
+  "lookback": 60,
+  "horizons": [
+    1,
+    3,
+    5,
+    7
+  ],
+  "data_start_date": "2015-01-01",
+  "train_end_date": "2023-12-31",
+  "epochs": 1,
+  "batch_size": 32,
+  "test_size": 0.2,
+  "split_seed": 42,
+  "global_seed": 42,
+  "patience": 10,
+  "lr_patience": 5,
+  "lr_factor": 0.5,
+  "min_lr": 1e-06,
+  "verbose": 2,
+  "output_dir": "C:\\Users\\jaebin\\Documents\\GitHub\\sisc-web\\AI\\backtests\\out\\transformer_schema_compare_smoke"
+}
\ No newline at end of file
diff --git a/AI/config/trading.default.json b/AI/config/trading.default.json
index ffe77ce2..cff0b134 100644
--- a/AI/config/trading.default.json
+++ b/AI/config/trading.default.json
@@ -1,6 +1,7 @@
 {
   "pipeline": {
     "db_name": "db",
+    "account_code": "chart_based_signal_model",
     "default_mode": "simulation",
     "enable_xai": true,
     "data_start_date": "2023-01-01",
diff --git a/AI/config/trading.py b/AI/config/trading.py
index a90723ab..16163a6e 100644
--- a/AI/config/trading.py
+++ b/AI/config/trading.py
@@ -26,6 +26,7 @@ class MacroFallbackConfig:
 @dataclass(frozen=True, slots=True)
 class PipelineConfig:
     db_name: str
+    account_code: str
     default_mode: str
     enable_xai: bool
     data_start_date: str
@@ -162,6 +163,7 @@ def _build_config(raw: dict[str, Any]) -> TradingConfig:
     config = TradingConfig(
         pipeline=PipelineConfig(
             db_name=raw["pipeline"]["db_name"],
+            account_code=str(raw["pipeline"].get("account_code", "chart_based_signal_model")).strip(),
             default_mode=raw["pipeline"]["default_mode"],
             enable_xai=raw["pipeline"]["enable_xai"],
             data_start_date=raw["pipeline"]["data_start_date"],
@@ -212,6 +214,8 @@ def _build_config(raw: dict[str, Any]) -> TradingConfig:
     )
     if config.pipeline.initial_capital <= 0:
         raise ValueError("pipeline.initial_capital must be greater than 0")
+    if not config.pipeline.account_code:
+        raise ValueError("pipeline.account_code must be a non-empty string")
     return config
 
 
diff --git a/AI/libs/database/repository.py b/AI/libs/database/repository.py
index 6dd31cb3..7c2acfb6 100644
--- a/AI/libs/database/repository.py
+++ b/AI/libs/database/repository.py
@@ -21,7 +21,7 @@ class PortfolioRepository:
     DB와의 상호작용(조회 및 저장)을 캡슐화하여 제공합니다.
     """
 
-    def __init__(self, db_name: str = "db"):
+    def __init__(self, db_name: str = "db", account_code: Optional[str] = None):
         """
         [초기화 메서드]
         객체 생성 시 사용할 데이터베이스의 이름을 설정합니다.
@@ -31,6 +31,9 @@ def __init__(self, db_name: str = "db"):
                            테스트 시에는 "test_db" 등으로 변경하여 주입(의존성 주입)할 수 있습니다.
         """
         self.db_name = db_name
+        normalized_account_code = (account_code or "").strip()
+        self.account_code = normalized_account_code if normalized_account_code else None
+        self._account_id_cache: Optional[int] = None
 
     def get_latest_total_asset(self, target_date: str, default_asset: float = 100_000_000) -> float:
         """
@@ -75,6 +78,57 @@ def _get_connection(self):
         """
         return get_db_conn(self.db_name)
 
+    def _resolve_account_id(self, conn=None) -> Optional[int]:
+        """
+        Resolve account_id from account_code and cache it for reuse.
+        """
+        if self.account_code is None:
+            return None
+
+        if self._account_id_cache is not None:
+            return self._account_id_cache
+
+        own_connection = conn is None
+        local_conn = conn or self._get_connection()
+        if local_conn is None:
+            raise RuntimeError(
+                f"[PortfolioRepository][Error] Failed to resolve account_id for account_code={self.account_code!r}: DB connection unavailable."
+            )
+
+        try:
+            with local_conn.cursor() as cursor:
+                cursor.execute(
+                    """
+                        SELECT id, is_active
+                        FROM public.account_names
+                        WHERE account_code = %s
+                        ORDER BY id ASC
+                    """,
+                    (self.account_code,),
+                )
+                rows = cursor.fetchall()
+        finally:
+            if own_connection and local_conn:
+                local_conn.close()
+
+        if not rows:
+            raise ValueError(
+                f"[PortfolioRepository][Error] account_code={self.account_code!r} not found in public.account_names."
+            )
+        if len(rows) > 1:
+            raise ValueError(
+                f"[PortfolioRepository][Error] duplicate rows found for account_code={self.account_code!r}."
+            )
+
+        account_id, is_active = rows[0]
+        if not bool(is_active):
+            raise ValueError(
+                f"[PortfolioRepository][Error] account_code={self.account_code!r} is inactive in public.account_names."
+            )
+
+        self._account_id_cache = int(account_id)
+        return self._account_id_cache
+
     def get_current_position(self, ticker: str, target_date: str = None, initial_cash: float = 10000) -> Dict[str, Any]:
         """
         [현재 포지션 조회] 
@@ -102,6 +156,10 @@ def get_current_position(self, ticker: str, target_date: str = None, initial_cas
             WHERE ticker = %s 
         """
         params = [ticker]
+        account_id = self._resolve_account_id(conn)
+        if account_id is not None:
+            query += " AND account_id = %s "
+            params.append(account_id)
         
         # target_date가 주어지면 그 날짜 '이하'의 체결내역만 필터링하여 미래 데이터를 차단합니다.
         if target_date:
@@ -182,19 +240,29 @@ def get_current_cash(self, target_date: str = None, initial_cash: float = 100000
 
         try:
             with conn.cursor() as cursor:
+                account_id = self._resolve_account_id(conn)
                 if target_date:
                     exec_cash_query = """
                         SELECT cash_after
                         FROM public.executions
                         WHERE fill_date <= %s
+                    """
+                    exec_cash_params: list[Any] = [target_date]
+                    if account_id is not None:
+                        exec_cash_query += " AND account_id = %s "
+                        exec_cash_params.append(account_id)
+                    exec_cash_query += """
                         ORDER BY fill_date DESC, created_at DESC, id DESC
                         LIMIT 1
                     """
-                    cursor.execute(exec_cash_query, (target_date,))
+                    cursor.execute(exec_cash_query, tuple(exec_cash_params))
                     exec_cash = cursor.fetchone()
                     if exec_cash and exec_cash[0] is not None:
                         return float(exec_cash[0])
 
+                if account_id is not None:
+                    return float(initial_cash)
+
                 summary_cash_query = """
                     SELECT cash
                     FROM public.portfolio_summary
@@ -224,10 +292,17 @@ def get_open_tickers(self, target_date: str) -> List[str]:
         if conn is None:
             return []
 
+        account_id = self._resolve_account_id(conn)
         query = """
             SELECT ticker
             FROM public.executions
             WHERE fill_date <= %s
+        """
+        query_params: list[Any] = [target_date]
+        if account_id is not None:
+            query += " AND account_id = %s "
+            query_params.append(account_id)
+        query += """
             GROUP BY ticker
             HAVING SUM(
                 CASE
@@ -241,7 +316,7 @@ def get_open_tickers(self, target_date: str) -> List[str]:
 
         try:
             with conn.cursor() as cursor:
-                cursor.execute(query, (target_date,))
+                cursor.execute(query, tuple(query_params))
                 rows = cursor.fetchall()
             return [str(row[0]) for row in rows]
         except Exception as e:
@@ -266,27 +341,46 @@ def reset_run_data(self, run_id: str, target_date: Optional[str] = None) -> None
 
         try:
             with conn.cursor() as cursor:
+                account_id = self._resolve_account_id(conn)
                 if target_date:
                     # Safety-first default: only clear current run artifacts.
                     # Global chain reset can remove unrelated simulations sharing the same DB.
                     allow_global_chain_reset = os.environ.get("AI_ALLOW_GLOBAL_CHAIN_RESET", "0") == "1"
                     if allow_global_chain_reset:
-                        cursor.execute(
-                            "DELETE FROM public.executions WHERE fill_date >= %s AND run_id LIKE 'daily_%%'",
-                            (target_date,),
-                        )
-                        cursor.execute(
-                            "DELETE FROM public.xai_reports WHERE date >= %s AND run_id LIKE 'daily_%%'",
-                            (target_date,),
-                        )
-                        cursor.execute("DELETE FROM public.portfolio_positions WHERE date >= %s", (target_date,))
-                        cursor.execute("DELETE FROM public.portfolio_summary WHERE date >= %s", (target_date,))
+                        if account_id is None:
+                            cursor.execute(
+                                "DELETE FROM public.executions WHERE fill_date >= %s AND run_id LIKE 'daily_%%'",
+                                (target_date,),
+                            )
+                            cursor.execute(
+                                "DELETE FROM public.xai_reports WHERE date >= %s AND run_id LIKE 'daily_%%'",
+                                (target_date,),
+                            )
+                            cursor.execute("DELETE FROM public.portfolio_positions WHERE date >= %s", (target_date,))
+                            cursor.execute("DELETE FROM public.portfolio_summary WHERE date >= %s", (target_date,))
+                        else:
+                            cursor.execute(
+                                "DELETE FROM public.executions WHERE fill_date >= %s AND run_id LIKE 'daily_%%' AND account_id = %s",
+                                (target_date, account_id),
+                            )
                     else:
+                        if account_id is None:
+                            cursor.execute("DELETE FROM public.executions WHERE run_id = %s", (run_id,))
+                            cursor.execute("DELETE FROM public.xai_reports WHERE run_id = %s", (run_id,))
+                        else:
+                            cursor.execute(
+                                "DELETE FROM public.executions WHERE run_id = %s AND account_id = %s",
+                                (run_id, account_id),
+                            )
+                else:
+                    if account_id is None:
                         cursor.execute("DELETE FROM public.executions WHERE run_id = %s", (run_id,))
                         cursor.execute("DELETE FROM public.xai_reports WHERE run_id = %s", (run_id,))
-                else:
-                    cursor.execute("DELETE FROM public.executions WHERE run_id = %s", (run_id,))
-                    cursor.execute("DELETE FROM public.xai_reports WHERE run_id = %s", (run_id,))
+                    else:
+                        cursor.execute(
+                            "DELETE FROM public.executions WHERE run_id = %s AND account_id = %s",
+                            (run_id, account_id),
+                        )
             conn.commit()
             if target_date:
                 if os.environ.get("AI_ALLOW_GLOBAL_CHAIN_RESET", "0") == "1":
@@ -362,6 +456,7 @@ def _normalize_run_id(value: Any) -> Optional[str]:
         cursor = conn.cursor()
 
         try:
+            resolved_account_id = self._resolve_account_id(conn)
             run_ids = sorted(
                 {
                     str(run_id).strip()
@@ -370,11 +465,17 @@ def _normalize_run_id(value: Any) -> Optional[str]:
                 }
             )
             if run_ids:
-                cursor.execute("DELETE FROM public.executions WHERE run_id = ANY(%s)", (run_ids,))
+                if resolved_account_id is None:
+                    cursor.execute("DELETE FROM public.executions WHERE run_id = ANY(%s)", (run_ids,))
+                else:
+                    cursor.execute(
+                        "DELETE FROM public.executions WHERE run_id = ANY(%s) AND account_id = %s",
+                        (run_ids, resolved_account_id),
+                    )
             # 다량의 데이터를 빠르게 넣기 위한 INSERT 구문 준비
             insert_query = """
                 INSERT INTO public.executions (
-                    run_id, xai_report_id, ticker, signal_date, signal_price, signal,
+                    run_id, account_id, xai_report_id, ticker, signal_date, signal_price, signal,
                     fill_date, fill_price, qty, side, value,
                     commission, cash_after, position_qty, avg_price,
                     pnl_realized, pnl_unrealized, created_at
@@ -393,6 +494,7 @@ def _normalize_run_id(value: Any) -> Optional[str]:
 
                 data_to_insert.append((
                     str(row["run_id"]),
+                    resolved_account_id,
                     xai_id,
                     str(row["ticker"]),
                     row["signal_date"],  
diff --git a/AI/pipelines/daily_routine.py b/AI/pipelines/daily_routine.py
index 91ccd24f..c92f19ca 100644
--- a/AI/pipelines/daily_routine.py
+++ b/AI/pipelines/daily_routine.py
@@ -84,7 +84,10 @@ def run_daily_pipeline(
     run_id = f"daily_{exec_date_str}"
     print(f"\n[{exec_date_str}] === AI Daily Portfolio Routine (Mode: {mode.upper()}) ===")
 
-    repo = repo if repo is not None else PortfolioRepository(db_name=trading_config.pipeline.db_name)
+    repo = repo if repo is not None else PortfolioRepository(
+        db_name=trading_config.pipeline.db_name,
+        account_code=trading_config.pipeline.account_code,
+    )
 
     if mode == "simulation" and hasattr(repo, "reset_run_data"):
         try:
diff --git a/schema.sql b/schema.sql
index c133403a..3e39d9b8 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,6 +1,9 @@
 ----------------------------------------------------------------------
 -- Schema: public
---  - 퀀트 트레이딩, 백테스트, XAI 분석 및 포트폴리오 관리 시스템의 핵심 데이터 모델
+-- 퀀트 트레이딩, 백테스트, XAI 분석 및 포트폴리오 관리 시스템의 핵심 데이터 모델
+-- AI 스키마 현황입니다. 주요 테이블과 인덱스가 포함되어 있으며, 향후 필요에 따라 추가/수정될 수 있습니다.
+-- 구조만 보여줄 뿐, 이 문서를 변경해도 db 자체는 변경되지 않습니다!
+-- 스키마 변경시 최신화 바랍니다.
 ----------------------------------------------------------------------
 
 -- 1. HDD 경로를 테이블스페이스로 등록
@@ -175,13 +178,49 @@ CREATE TABLE "xai_reports" (
 ) TABLESPACE ts_ai_hdd;
 
 ----------------------------------------------------------------------
--- 9. executions
+-- 9. account_names
+--    - 파이프라인/계정 식별자 메타데이터 관리
+--    - executions.account_id 외래키 참조 기준 테이블
+----------------------------------------------------------------------
+CREATE TABLE IF NOT EXISTS "account_names" (
+    "id" bigserial PRIMARY KEY,                  -- 계정 식별자 PK
+    "account_code" varchar(128) NOT NULL,       -- 파이프라인에서 참조하는 안정 키
+    "account_name" varchar(255) NOT NULL,       -- 표시용 계정 이름
+    "description" text,                          -- 선택 설명
+    "is_active" boolean DEFAULT true NOT NULL,  -- 활성 여부
+    "created_at" timestamp with time zone DEFAULT now() NOT NULL, -- 생성 시각
+    "updated_at" timestamp with time zone DEFAULT now() NOT NULL, -- 수정 시각
+    CONSTRAINT "uq_account_names_account_code" UNIQUE ("account_code")
+) TABLESPACE ts_ai_hdd;
+
+-- 기본 legacy 파이프라인 계정을 idempotent하게 등록
+INSERT INTO "account_names" (
+    "account_code",
+    "account_name",
+    "description",
+    "is_active"
+) VALUES (
+    'chart_based_signal_model',
+    '차트 기반 시그널 모델',
+    'Chart-based signal model account.',
+    true
+)
+ON CONFLICT ("account_code") DO UPDATE
+SET
+    "account_name" = EXCLUDED."account_name",
+    "description" = EXCLUDED."description",
+    "is_active" = true,
+    "updated_at" = now();
+
+----------------------------------------------------------------------
+-- 10. executions
 --    - 실제 거래 또는 백테스트 시뮬레이션에서 발생한 개별 체결 이력
 --    - 자산 추적 및 성과 평가를 위한 가장 세밀한 로그 데이터
 ----------------------------------------------------------------------
 CREATE TABLE "executions" (
     "id" bigserial PRIMARY KEY,        -- 체결 로그 ID
     "run_id" varchar(64),              -- 백테스트/실행 회차 ID
+    "account_id" bigint,               -- 체결 주체 계정 ID (account_names.id)
     "ticker" varchar(255) NOT NULL,    -- 종목 티커
     "signal_date" date NOT NULL,       -- 전략 신호 발생일
     "signal_price" numeric(38, 2),     -- 전략 신호 당시 가격
@@ -199,11 +238,12 @@ CREATE TABLE "executions" (
     "pnl_unrealized" numeric(38, 2) NOT NULL, -- 현재 시점 기준 미실현 손익
     "created_at" timestamp with time zone DEFAULT now() NOT NULL, -- DB 기록 시각
     "xai_report_id" bigint,            -- 연관된 XAI 리포트 참조 ID
+    CONSTRAINT "fk_executions_account_names" FOREIGN KEY ("account_id") REFERENCES "account_names"("id") ON DELETE RESTRICT,
     CONSTRAINT "fk_executions_xai_reports" FOREIGN KEY ("xai_report_id") REFERENCES "xai_reports"("id") ON DELETE SET NULL
 ) TABLESPACE ts_ai_hdd;
 
 ----------------------------------------------------------------------
--- 10. portfolio_summary
+-- 11. portfolio_summary
 --    - 전체 자산의 일별 성과 요약 (Equity Curve 생성용)
 ----------------------------------------------------------------------
 CREATE TABLE "portfolio_summary" (
@@ -219,7 +259,7 @@ CREATE TABLE "portfolio_summary" (
 ) TABLESPACE ts_ai_hdd;
 
 ----------------------------------------------------------------------
--- 11. portfolio_positions
+-- 12. portfolio_positions
 --      날짜별 종목 포지션 스냅샷을 저장
 --    - portfolio_summary(일자별 총자산)와 함께 일별 상태 재현/검증 가능
 ----------------------------------------------------------------------
@@ -237,7 +277,7 @@ CREATE TABLE "portfolio_positions" (
 ) TABLESPACE ts_ai_hdd;
 
 ----------------------------------------------------------------------
--- 12. event_calendar
+-- 13. event_calendar
 --    - 주요 경제 일정(FOMC, CPI, GDP) 및 기업 실적 발표일 저장
 --    - AI 모델의 'Event' 피처(D-Day 계산 등)를 위한 원천 데이터
 ----------------------------------------------------------------------
@@ -257,7 +297,7 @@ CREATE TABLE IF NOT EXISTS "event_calendar" (
 ) TABLESPACE ts_ai_hdd;
 
 ----------------------------------------------------------------------
--- 13. sector_returns
+-- 14. sector_returns
 --    - stock_info의 'sector'와 매핑되는 ETF의 일별 수익률 저장
 --    - Wide Format이 아닌 Long Format (Date, Sector) 구조
 ----------------------------------------------------------------------
@@ -280,6 +320,10 @@ CREATE INDEX IF NOT EXISTS "idx_crypto_price_data_ticker" ON "crypto_price_data"
 -- 2. executions: 특정 백테스트 회차의 체결 내역 모아보기
 CREATE INDEX IF NOT EXISTS "idx_executions_run_id" ON "executions" ("run_id") TABLESPACE ts_ai_hdd;
 
+-- 2-1. executions: 계정 단위 분리 조회/기간 조회
+CREATE INDEX IF NOT EXISTS "idx_executions_account_id" ON "executions" ("account_id") TABLESPACE ts_ai_hdd;
+CREATE INDEX IF NOT EXISTS "idx_executions_account_id_fill_date" ON "executions" ("account_id", "fill_date") TABLESPACE ts_ai_hdd;
+
 -- 3. executions: XAI 리포트 조인용 (외래키는 자동 인덱스 생성이 안 됨)
 CREATE INDEX IF NOT EXISTS "idx_executions_xai_report_id" ON "executions" ("xai_report_id") TABLESPACE ts_ai_hdd;
 
@@ -293,4 +337,4 @@ CREATE INDEX IF NOT EXISTS "idx_portfolio_positions_date_ticker" ON "portfolio_p
 CREATE INDEX IF NOT EXISTS "idx_xai_reports_run_id" ON "xai_reports" ("run_id") TABLESPACE ts_ai_hdd;
 
 -- 7. company_fundamentals: 종목별 재무제표 데이터 조회용 (날짜 기준)
-CREATE INDEX IF NOT EXISTS "idx_company_fundamentals_date" ON "company_fundamentals" ("date") TABLESPACE ts_ai_hdd;
\ No newline at end of file
+CREATE INDEX IF NOT EXISTS "idx_company_fundamentals_date" ON "company_fundamentals" ("date") TABLESPACE ts_ai_hdd;