diff --git a/ml-augmentation-toolkit_project/README.md b/ml-augmentation-toolkit_project/README.md
new file mode 100644
index 0000000..a610d44
--- /dev/null
+++ b/ml-augmentation-toolkit_project/README.md
@@ -0,0 +1,34 @@
+# alloyxai
+
+> **A modular machine learning pipeline for data augmentation and explainable modeling in superalloy design**  
+> 面向高温合金设计的数据增强与可解释性建模一体化机器学习框架
+
+---
+
+## 🔬 Project Overview | 项目概述
+
+**`alloyxai`** is a research-oriented Python toolkit that integrates *data generation*, *imbalance handling*, and *model interpretability* into a unified machine learning pipeline, specifically designed for **superalloy composition optimization and microstructure-performance prediction**.
+
+该项目融合了多种数据增强手段（MCMC、WGAN-GP、SMOGN）与可解释性分析（SHAP），适用于**高温合金成分设计、相粗化行为建模及高温性能预测等典型材料科学问题**。
+
+---
+
+## 🧩 Core Modules | 核心模块
+
+| 模块名            | 描述 |
+|-------------------|------|
+| `MCMCSampler`     | 基于贝叶斯推断的元素比例生成器（Dirichlet + TruncatedNormal） |
+| `WGANGPRegressor` | 面向回归问题的小样本数据生成器，集成条件判别与梯度惩罚机制 |
+| `SMOGNAugmentor`  | 用于不平衡目标分布的回归型过采样（适合长尾、高偏态分布） |
+| `SHAPAnalyzer`    | 提供主效应、交互项、蜂群图与依赖图等多层次模型解释能力 |
+
+---
+
+## 🚀 Example Workflow | 示例工作流
+
+```bash
+# 安装依赖
+pip install -r requirements.txt
+
+# 运行主流程（默认启用 MCMC + WGAN + SHAP）
+python pipeline.py
diff --git a/ml-augmentation-toolkit_project/ml-augmentation-toolkit/__init__.py b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/__init__.py
new file mode 100644
index 0000000..99a8091
--- /dev/null
+++ b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/__init__.py
@@ -0,0 +1,2 @@
+
+
diff --git a/ml-augmentation-toolkit_project/ml-augmentation-toolkit/mcmc_sampler.py b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/mcmc_sampler.py
new file mode 100644
index 0000000..dddf553
--- /dev/null
+++ b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/mcmc_sampler.py
@@ -0,0 +1,160 @@
+import os
+import pandas as pd
+import numpy as np
+import pymc as pm
+import arviz as az
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+class MCMCSampler:
+    """
+    使用 PyMC 对高温合金元素组成与温度进行 MCMC 采样。
+
+    - 元素组成建模为 Dirichlet 分布（强约束：总和为100%）
+    - 温度建模为 Truncated Normal 分布
+    """
+
+    def __init__(self,
+                 data_path,
+                 trace_save_path,
+                 sample_save_path,
+                 elements_cols=None,
+                 t_col='T',
+                 draws=4000,
+                 tune=1000,
+                 chains=4,
+                 cores=4,
+                 seed=42,
+                 concentration=100):
+        """
+        初始化采样器
+
+        Parameters:
+            data_path (str): 原始CSV数据路径
+            trace_save_path (str): 轨迹保存路径
+            sample_save_path (str): 生成样本保存路径
+            elements_cols (list): 元素列名（默认10种常见元素）
+            t_col (str): 温度列名
+            draws (int): 每条链的采样步数
+            tune (int): 调优步数
+            chains (int): 链数
+            cores (int): 并行核数
+            seed (int): 随机种子
+            concentration (float): Dirichlet浓度参数
+        """
+        self.data_path = data_path
+        self.trace_save_path = trace_save_path
+        self.sample_save_path = sample_save_path
+        self.elements_cols = elements_cols or ['Co', 'Al', 'W', 'Ta', 'Ti', 'Nb', 'Ni', 'Cr', 'V', 'Mo']
+        self.t_col = t_col
+        self.draws = draws
+        self.tune = tune
+        self.chains = chains
+        self.cores = cores
+        self.seed = seed
+        self.concentration = concentration
+        self.EPSILON = 1e-6
+
+    def load_data(self):
+        """读取数据并检查列合法性"""
+        if not os.path.exists(self.data_path):
+            raise FileNotFoundError(f"找不到数据文件: {self.data_path}")
+        self.data = pd.read_csv(self.data_path)
+
+        for col in self.elements_cols + [self.t_col]:
+            if col not in self.data.columns:
+                raise ValueError(f"缺失列: {col}，请检查数据文件格式。")
+
+        self.elements_data = self.data[self.elements_cols].replace(0, 1e-5)
+        self.t_data = self.data[self.t_col]
+
+    def _compute_dirichlet_alpha(self):
+        """根据元素均值计算 Dirichlet 参数 α"""
+        mean_props = self.elements_data.mean(axis=0) / 100.0
+        alpha = np.maximum(mean_props * self.concentration, self.EPSILON)
+        return alpha
+
+    def build_model(self):
+        """构建 PyMC 模型并进行采样"""
+        alpha = self._compute_dirichlet_alpha()
+        t_mu, t_sigma = self.t_data.mean(), self.t_data.std()
+        t_min, t_max = self.t_data.min(), self.t_data.max()
+
+        with pm.Model() as self.model:
+            proportions = pm.Dirichlet("proportions", a=alpha, shape=(len(self.elements_cols),))
+            elements_generated = pm.Deterministic("elements_generated", proportions * 100)
+            t_prior = pm.TruncatedNormal("T_prior", mu=t_mu, sigma=t_sigma,
+                                         lower=t_min - 10, upper=t_max + 10)
+
+            self.trace = pm.sample(
+                draws=self.draws,
+                tune=self.tune,
+                chains=self.chains,
+                cores=self.cores,
+                target_accept=0.95,
+                random_seed=self.seed,
+                return_inferencedata=True
+            )
+
+    def check_convergence(self):
+        """使用ArviZ进行收敛性诊断"""
+        summary = az.summary(self.trace, var_names=["proportions", "T_prior"])
+        if summary["r_hat"].max() > 1.05:
+            print("⚠️ 警告：存在未收敛参数，建议增加采样步数或调整模型！")
+        return summary
+
+    def save_trace(self):
+        """保存 MCMC 轨迹数据为 CSV"""
+        proportions_trace = self.trace.posterior['proportions'].stack(sample=("chain", "draw")).values.transpose(1, 0)
+        t_trace = self.trace.posterior['T_prior'].stack(sample=("chain", "draw")).values.flatten()
+        trace_df = pd.DataFrame(proportions_trace, columns=[f"proportions_{el}" for el in self.elements_cols])
+        trace_df["T_prior"] = t_trace
+
+        os.makedirs(os.path.dirname(self.trace_save_path), exist_ok=True)
+        trace_df.to_csv(self.trace_save_path, index=False)
+
+    def extract_samples(self):
+        """提取生成的后验样本"""
+        posterior = self.trace.posterior
+        self.samples_df = pd.DataFrame({
+            col: posterior['elements_generated'][..., i].values.flatten()
+            for i, col in enumerate(self.elements_cols)
+        })
+        self.samples_df['T'] = posterior['T_prior'].values.flatten()
+
+    def save_samples(self):
+        """保存后验样本"""
+        os.makedirs(os.path.dirname(self.sample_save_path), exist_ok=True)
+        self.samples_df.to_csv(self.sample_save_path, index=False)
+
+    def plot_distributions(self, save_dir=None):
+        """原始与生成数据分布对比图（可选保存）"""
+        for col in self.elements_cols + ['T']:
+            plt.figure(figsize=(8, 4))
+            sns.kdeplot(self.data[col], label="原始数据", fill=True)
+            sns.kdeplot(self.samples_df[col], label="生成数据", fill=True)
+            plt.title(f"{col} 分布对比")
+            plt.xlabel("值")
+            plt.ylabel("密度")
+            plt.legend()
+            plt.tight_layout()
+            if save_dir:
+                os.makedirs(save_dir, exist_ok=True)
+                plt.savefig(os.path.join(save_dir, f"{col}_kde.png"))
+            plt.show()
+
+    def run(self, plot=True, save_plot_dir=None):
+        """执行完整 MCMC 流程"""
+        print("🔄 开始 MCMC 流程...")
+        self.load_data()
+        self.build_model()
+        self.check_convergence()
+        self.save_trace()
+        self.extract_samples()
+        self.save_samples()
+        if plot:
+            self.plot_distributions(save_dir=save_plot_dir)
+        print("✅ MCMC流程完成！")
+        return self.samples_df, self.trace
+
diff --git a/ml-augmentation-toolkit_project/ml-augmentation-toolkit/shap_analyzer.py b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/shap_analyzer.py
new file mode 100644
index 0000000..68055b1
--- /dev/null
+++ b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/shap_analyzer.py
@@ -0,0 +1,129 @@
+import os
+import pandas as pd
+import numpy as np
+import shap
+import matplotlib.pyplot as plt
+from xgboost import XGBRegressor
+from sklearn.model_selection import cross_val_predict, KFold
+from sklearn.metrics import r2_score
+
+
+class SHAPAnalyzer:
+    """
+    使用XGBoost + SHAP进行特征重要性分析和交互作用分析。
+    """
+
+    def __init__(self, target_col, feature_name_mapping=None, random_state=42):
+        self.target_col = target_col
+        self.feature_name_mapping = feature_name_mapping or {}
+        self.random_state = random_state
+
+    def fit(self, train_data, test_data, model_params=None):
+        self.train_data = train_data
+        self.test_data = test_data
+
+        self.X_train = self.train_data.drop(columns=[self.target_col])
+        self.y_train = self.train_data[self.target_col]
+        self.X_test = self.test_data.drop(columns=[self.target_col], errors='ignore')
+
+        self.features = self.X_train.columns.tolist()
+        self.feature_display_names = [self.feature_name_mapping.get(col, col) for col in self.features]
+
+        self.model_params = model_params or {
+            'colsample_bytree': 1.0,
+            'gamma': 2.0,
+            'learning_rate': 0.1,
+            'max_depth': 10,
+            'n_estimators': 50,
+            'subsample': 0.7,
+            'eval_metric': 'rmse',
+            'n_jobs': -1,
+            'random_state': self.random_state
+        }
+
+        xgb_model = XGBRegressor(**self.model_params)
+        kf = KFold(n_splits=10, shuffle=True, random_state=self.random_state)
+        y_pred = cross_val_predict(xgb_model, self.X_train, self.y_train, cv=kf)
+
+        self.r2_score_cv = r2_score(self.y_train, y_pred)
+        self.y_cv_pred = y_pred  # 保存交叉验证预测
+        print(f"Cross-validated R²: {self.r2_score_cv:.4f}")
+
+        self.final_model = xgb_model.fit(self.X_train, self.y_train)
+
+        self.explainer = shap.TreeExplainer(self.final_model, feature_perturbation='tree_path_dependent')
+        self.shap_values = self.explainer(self.X_test).values
+        self.shap_interaction_values = self.explainer.shap_interaction_values(self.X_test)
+
+    def save_feature_importance(self, path):
+        xgb_importance = self.final_model.feature_importances_
+        shap_importance = np.abs(self.shap_values).mean(axis=0)
+
+        importance_df = pd.DataFrame({
+            'Feature': self.features,
+            'DisplayName': self.feature_display_names,
+            'XGBoost_Importance': xgb_importance,
+            'SHAP_Importance': shap_importance
+        }).sort_values('SHAP_Importance', ascending=False)
+
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        importance_df.to_csv(path, index=False, float_format="%.6f")
+        print(f"✅ 特征重要性保存到: {path}")
+
+    def save_shap_values(self, path):
+        shap_df = pd.DataFrame(self.shap_values, columns=self.features)
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        shap_df.to_csv(path, index=False, float_format="%.6f")
+        print(f"✅ SHAP值保存到: {path}")
+
+    def save_shap_summary_plot(self, path):
+        plt.figure(figsize=(10, 8))
+        shap.summary_plot(self.shap_values, self.X_test, feature_names=self.feature_display_names, show=False)
+        plt.title("SHAP Summary Plot")
+        plt.tight_layout()
+        plt.savefig(path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"✅ SHAP蜂群图保存到: {path}")
+
+    def save_interaction_heatmap(self, path):
+        plt.figure(figsize=(10, 8))
+        shap.summary_plot(self.shap_interaction_values, self.X_test, plot_type="compact_dot", show=False)
+        plt.title("SHAP Interaction Heatmap")
+        plt.tight_layout()
+        plt.savefig(path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"✅ 交互热力图保存到: {path}")
+
+    def save_interaction_strengths(self, path):
+        strength = np.mean(np.abs(self.shap_interaction_values), axis=0)
+
+        interaction_records = []
+        for i in range(len(self.features)):
+            for j in range(i+1, len(self.features)):
+                interaction_records.append({
+                    'Feature_A': self.features[i],
+                    'Feature_B': self.features[j],
+                    'Interaction_Strength': strength[i, j]
+                })
+
+        interaction_df = pd.DataFrame(interaction_records).sort_values('Interaction_Strength', ascending=False)
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        interaction_df.to_csv(path, index=False, float_format="%.6f")
+        print(f"✅ 全局交互强度保存到: {path}")
+
+    def plot_dependence(self, feature, interaction_feature=None, path=None):
+        shap.dependence_plot(
+            feature,
+            self.shap_values,
+            self.X_test,
+            interaction_index=interaction_feature,
+            show=False
+        )
+        plt.title(f"{feature} Interaction with {interaction_feature}")
+        if path:
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            plt.savefig(path, dpi=300, bbox_inches='tight')
+            plt.close()
+            print(f"✅ 依赖图保存到: {path}")
+        else:
+            plt.show()
diff --git a/ml-augmentation-toolkit_project/ml-augmentation-toolkit/smogn_augmentor.py b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/smogn_augmentor.py
new file mode 100644
index 0000000..2a001e3
--- /dev/null
+++ b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/smogn_augmentor.py
@@ -0,0 +1,77 @@
+import os
+import smogn
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+class SMOGNAugmentor:
+    """
+    使用 SMOGN 对回归数据进行增强，解决目标变量分布不平衡问题。
+    """
+
+    def __init__(self, target_col, samp_method="balance", save_path=None):
+        """
+        初始化增强器
+
+        参数:
+            target_col (str): 目标变量名（如 'K', 'Vol'）
+            samp_method (str): 采样方法，可选 'balance' 或 'extreme'
+            save_path (str): 增强后数据的保存路径（可选）
+        """
+        self.target_col = target_col
+        self.samp_method = samp_method
+        self.save_path = save_path
+        self.original_df = None
+        self.enhanced_df = None
+
+    def fit_transform(self, df):
+        """
+        对输入 DataFrame 执行 SMOGN 增强
+
+        参数:
+            df (pd.DataFrame): 原始数据
+
+        返回:
+            pd.DataFrame: 增强后的数据
+        """
+        self.original_df = df.copy()
+        self.enhanced_df = smogn.smoter(
+            data=df,
+            y=self.target_col,
+            samp_method=self.samp_method
+        )
+        return self.enhanced_df
+
+    def plot_distribution(self, bins=30):
+        """
+        可视化增强前后目标变量的分布对比图
+
+        参数:
+            bins (int): 直方图分箱数
+        """
+        if self.original_df is None or self.enhanced_df is None:
+            raise ValueError("请先运行 fit_transform()")
+
+        plt.figure(figsize=(8, 5))
+        plt.hist(self.original_df[self.target_col], bins=bins, alpha=0.5, label="原始数据", edgecolor="black")
+        plt.hist(self.enhanced_df[self.target_col], bins=bins, alpha=0.5, label="SMOGN 增强数据", edgecolor="black")
+        plt.xlabel(self.target_col)
+        plt.ylabel("频数")
+        plt.title(f"SMOGN 增强前后 {self.target_col} 的分布对比")
+        plt.legend()
+        plt.tight_layout()
+        plt.show()
+
+    def save(self, path=None):
+        """
+        保存增强后的数据为 CSV
+
+        参数:
+            path (str): 指定保存路径；如为空则使用初始化时的 save_path
+        """
+        path = path or self.save_path
+        if path is None:
+            raise ValueError("未指定保存路径，请传入 path 或设置 save_path")
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        self.enhanced_df.to_csv(path, index=False)
+        print(f"✅ 增强后的数据已保存至：{path}")
diff --git a/ml-augmentation-toolkit_project/ml-augmentation-toolkit/wgan_gp_generator.py b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/wgan_gp_generator.py
new file mode 100644
index 0000000..df9d327
--- /dev/null
+++ b/ml-augmentation-toolkit_project/ml-augmentation-toolkit/wgan_gp_generator.py
@@ -0,0 +1,187 @@
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+def seed_everything(seed=42):
+    """确保结果可复现"""
+    import random
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+class Generator(nn.Module):
+    def __init__(self, latent_dim, condition_dim, output_dim):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Linear(latent_dim + condition_dim, 256),  # 输入：噪声 + 条件变量
+            nn.LeakyReLU(0.2),
+            nn.Linear(256, 512),
+            nn.LeakyReLU(0.2),
+            nn.Linear(512, 1024),
+            nn.LeakyReLU(0.2),
+            nn.Linear(1024, output_dim)  # 输出：回归特征
+        )
+
+    def forward(self, z, conditions):
+        input_combined = torch.cat((z, conditions), dim=1)
+        return self.model(input_combined)
+
+
+class Discriminator(nn.Module):
+    def __init__(self, input_dim, condition_dim):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_dim + condition_dim, 512),
+            nn.LeakyReLU(0.2),
+            nn.Linear(512, 256),
+            nn.LeakyReLU(0.2),
+            nn.Linear(256, 1)  # 输出：真实性分数
+        )
+
+    def forward(self, x, conditions):
+        input_combined = torch.cat((x, conditions), dim=1)
+        return self.model(input_combined)
+
+
+class WGANGPRegressor:
+    def __init__(self, latent_dim=11, lambda_gp=10, device=None):
+        self.latent_dim = latent_dim
+        self.lambda_gp = lambda_gp
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.generator = None
+        self.discriminator = None
+
+    def fit(self, X, conditions, n_epochs=3000, batch_size=32, n_critic=5, lr=1e-5, save_log_path=None):
+        """
+        训练 WGAN-GP 模型
+
+        参数:
+            X: np.ndarray, shape=(n_samples, n_features)，回归特征
+            conditions: np.ndarray, shape=(n_samples, n_condition_features)，条件变量
+        """
+        seed_everything()
+
+        X = torch.tensor(X, dtype=torch.float32).to(self.device)
+        conditions = torch.tensor(conditions, dtype=torch.float32).to(self.device)
+
+        self.output_dim = X.shape[1]
+        self.condition_dim = conditions.shape[1]
+
+        self.generator = Generator(self.latent_dim, self.condition_dim, self.output_dim).to(self.device)
+        self.discriminator = Discriminator(self.output_dim, self.condition_dim).to(self.device)
+
+        optimizer_G = optim.Adam(self.generator.parameters(), lr=lr, betas=(0.5, 0.9))
+        optimizer_D = optim.Adam(self.discriminator.parameters(), lr=lr, betas=(0.5, 0.9))
+
+        self.g_losses = []
+        self.d_losses = []
+
+        for epoch in range(n_epochs):
+            for _ in range(n_critic):
+                optimizer_D.zero_grad()
+                idx = torch.randint(0, X.shape[0], (batch_size,))
+                real_x = X[idx]
+                real_c = conditions[idx]
+
+                z = torch.randn(batch_size, self.latent_dim).to(self.device)
+                fake_x = self.generator(z, real_c).detach()
+
+                d_real = self.discriminator(real_x, real_c)
+                d_fake = self.discriminator(fake_x, real_c)
+                gp = self._gradient_penalty(real_x, fake_x, real_c)
+
+                d_loss = -torch.mean(d_real) + torch.mean(d_fake) + gp
+                d_loss.backward()
+                optimizer_D.step()
+
+            # 训练生成器
+            optimizer_G.zero_grad()
+            z = torch.randn(batch_size, self.latent_dim).to(self.device)
+            fake_x = self.generator(z, real_c)
+            d_fake = self.discriminator(fake_x, real_c)
+            g_loss = -torch.mean(d_fake)
+            g_loss.backward()
+            optimizer_G.step()
+
+            self.d_losses.append(d_loss.item())
+            self.g_losses.append(g_loss.item())
+
+            if epoch % 100 == 0:
+                print(f"[{epoch}/{n_epochs}] D_loss: {d_loss.item():.4f}, G_loss: {g_loss.item():.4f}")
+
+        # 日志保存
+        if save_log_path:
+            df_log = pd.DataFrame({'D_loss': self.d_losses, 'G_loss': self.g_losses})
+            os.makedirs(os.path.dirname(save_log_path), exist_ok=True)
+            df_log.to_csv(save_log_path, index=False)
+
+    def _gradient_penalty(self, real_x, fake_x, condition):
+        alpha = torch.rand(real_x.size(0), 1).to(self.device)
+        interpolated = (alpha * real_x + (1 - alpha) * fake_x).requires_grad_(True)
+        d_interpolated = self.discriminator(interpolated, condition)
+        gradients = torch.autograd.grad(outputs=d_interpolated,
+                                        inputs=interpolated,
+                                        grad_outputs=torch.ones_like(d_interpolated),
+                                        create_graph=True, retain_graph=True)[0]
+        grad_norm = gradients.view(gradients.size(0), -1).norm(2, dim=1)
+        return self.lambda_gp * ((grad_norm - 1) ** 2).mean()
+
+    def generate(self, condition_array, n_samples=None, z=None):
+        """
+        生成模拟数据
+
+        参数:
+            condition_array: np.ndarray, 条件变量数组
+            n_samples: int, 要生成的样本数量（若 z 提供则可省略）
+            z: torch.Tensor, 自定义潜变量张量
+
+        返回:
+            np.ndarray: 生成数据
+        """
+        self.generator.eval()
+        condition_array = np.array(condition_array)
+
+        if z is None:
+            if n_samples is None:
+                n_samples = condition_array.shape[0]
+            z = torch.randn(n_samples, self.latent_dim).to(self.device)
+        else:
+            z = z.to(self.device)
+            n_samples = z.size(0)
+
+        conditions = torch.tensor(condition_array, dtype=torch.float32).to(self.device)
+        if conditions.shape[0] != n_samples:
+            raise ValueError("生成样本数与条件变量数量不一致。")
+
+        with torch.no_grad():
+            fake_data = self.generator(z, conditions).cpu().numpy()
+        return fake_data
+
+    def plot_loss(self):
+        plt.figure(figsize=(10, 5))
+        plt.plot(self.d_losses, label="Discriminator Loss")
+        plt.plot(self.g_losses, label="Generator Loss")
+        plt.xlabel("Iterations")
+        plt.ylabel("Loss")
+        plt.legend()
+        plt.title("WGAN-GP Training Loss")
+        plt.tight_layout()
+        plt.show()
+
+    def save_model(self, path_prefix):
+        os.makedirs(os.path.dirname(path_prefix), exist_ok=True)
+        torch.save(self.generator.state_dict(), path_prefix + "_G.pth")
+        torch.save(self.discriminator.state_dict(), path_prefix + "_D.pth")
+
+    def load_model(self, path_prefix):
+        self.generator.load_state_dict(torch.load(path_prefix + "_G.pth", map_location=self.device))
+        self.discriminator.load_state_dict(torch.load(path_prefix + "_D.pth", map_location=self.device))
diff --git a/ml-augmentation-toolkit_project/pipeline.py b/ml-augmentation-toolkit_project/pipeline.py
new file mode 100644
index 0000000..b6a32be
--- /dev/null
+++ b/ml-augmentation-toolkit_project/pipeline.py
@@ -0,0 +1,155 @@
+import os
+import pandas as pd
+from ml-augmentation-toolkit.mcmc_sampler import MCMCSampler
+from ml-augmentation-toolkit.wgan_gp_generator import WGANGPRegressor
+from ml-augmentation-toolkit.smogn_augmentor import SMOGNAugmentor
+from ml-augmentation-toolkit.shap_analyzer import SHAPAnalyzer
+from sklearn.preprocessing import StandardScaler
+
+
+def main(config):
+    # Step 0: 加载原始数据
+    print("\n🔵 加载原始数据...")
+    original_df = pd.read_csv(config["original_data_path"])
+
+    # 初始化增强数据列表
+    enhanced_datasets = []
+
+    # Step 1: MCMC采样（可选）
+    if config["use_mcmc"]:
+        print("\n🚀 Step 1: MCMC Sampling...")
+        mcmc_sampler = MCMCSampler(
+            data_path=config["original_data_path"],
+            trace_save_path=config["mcmc"]["trace_save_path"],
+            sample_save_path=config["mcmc"]["sample_save_path"],
+            draws=config["mcmc"]["draws"],
+            chains=config["mcmc"]["chains"],
+            cores=config["mcmc"]["cores"]
+        )
+        mcmc_samples, _ = mcmc_sampler.run(plot=False)
+        enhanced_datasets.append(mcmc_samples)
+        print("✅ MCMC采样完成。")
+
+    # Step 2: WGAN-GP生成（可选）
+    if config["use_wgan"]:
+        print("\n🚀 Step 2: WGAN-GP Generation...")
+        scaler_X = StandardScaler()
+        scaler_y = StandardScaler()
+
+        X = original_df.drop(columns=[config["target_col"]]).values
+        y = original_df[config["target_col"]].values.reshape(-1, 1)
+
+        X_scaled = scaler_X.fit_transform(X)
+        y_scaled = scaler_y.fit_transform(y)
+
+        wgan_gp = WGANGPRegressor(latent_dim=config["wgan"]["latent_dim"])
+        wgan_gp.fit(X_scaled, y_scaled, n_epochs=config["wgan"]["n_epochs"], batch_size=config["wgan"]["batch_size"])
+
+        generated_scaled = wgan_gp.generate(y_scaled, n_samples=config["wgan"]["n_generated_samples"])
+        generated_X = scaler_X.inverse_transform(generated_scaled)
+
+        generated_df = pd.DataFrame(generated_X, columns=original_df.columns.drop(config["target_col"]))
+        generated_df[config["target_col"]] = scaler_y.inverse_transform(y_scaled[:generated_df.shape[0]]).flatten()
+
+        os.makedirs(os.path.dirname(config["wgan"]["save_path"]), exist_ok=True)
+        generated_df.to_csv(config["wgan"]["save_path"], index=False)
+        enhanced_datasets.append(generated_df)
+        print("✅ WGAN-GP生成完成。")
+
+    # Step 3: SMOGN增强（可选）
+    if config["use_smogn"]:
+        print("\n🚀 Step 3: SMOGN Data Augmentation...")
+        smogn_augmentor = SMOGNAugmentor(
+            target_col=config["target_col"],
+            samp_method="balance",
+            save_path=config["smogn"]["save_path"]
+        )
+        smogn_df = smogn_augmentor.fit_transform(original_df)
+        smogn_augmentor.save()
+        enhanced_datasets.append(smogn_df)
+        print("✅ SMOGN增强完成。")
+
+    # Step 4: 整合所有增强数据
+    print("\n🔵 整合增强数据...")
+    if enhanced_datasets:
+        all_data = pd.concat(enhanced_datasets, axis=0).drop_duplicates().reset_index(drop=True)
+    else:
+        print("⚠️ 未选择任何数据增强方法，仅使用原始数据。")
+        all_data = original_df.copy()
+
+    print(f"总数据量: {all_data.shape[0]} 条")
+
+    # Step 5: SHAP 可解释性分析
+    print("\n🚀 Step 4: SHAP Analysis...")
+    shap_analyzer = SHAPAnalyzer(
+        target_col=config["target_col"],
+        feature_name_mapping=config.get("feature_name_mapping", {}),
+        random_state=42
+    )
+    test_data = pd.read_csv(config["shap"]["test_data_path"])
+
+    shap_analyzer.fit(train_data=all_data, test_data=test_data)
+
+    shap_analyzer.save_feature_importance(config["shap"]["feature_importance_path"])
+    shap_analyzer.save_shap_values(config["shap"]["shap_values_path"])
+    shap_analyzer.save_shap_summary_plot(config["shap"]["shap_summary_plot_path"])
+    shap_analyzer.save_interaction_heatmap(config["shap"]["interaction_heatmap_path"])
+    shap_analyzer.save_interaction_strengths(config["shap"]["interaction_strength_path"])
+    shap_analyzer.plot_dependence(
+        feature=config["shap"]["dependence_plot_feature"],
+        interaction_feature=config["shap"]["dependence_plot_interaction"],
+        path=config["shap"]["dependence_plot_path"]
+    )
+
+    print("\n🎯 Pipeline 完成!")
+
+
+if __name__ == "__main__":
+    config = {
+        "original_data_path": "data/原始实验数据.csv",  # 原始实验数据路径
+
+        "use_mcmc": True,   # 是否启用 MCMC
+        "use_wgan": True,   # 是否启用 WGAN
+        "use_smogn": False, # 是否启用 SMOGN
+
+        "target_col": "Vol",
+
+        "mcmc": {
+            "trace_save_path": "outputs/mcmc_trace.csv",
+            "sample_save_path": "outputs/mcmc_samples.csv",
+            "draws": 4000,
+            "chains": 4,
+            "cores": 8
+        },
+
+        "wgan": {
+            "latent_dim": 11,
+            "n_epochs": 3000,
+            "batch_size": 64,
+            "n_generated_samples": 1000,
+            "save_path": "outputs/wgan_generated.csv"
+        },
+
+        "smogn": {
+            "save_path": "outputs/smogn_augmented.csv"
+        },
+
+        "shap": {
+            "test_data_path": "data/原始实验数据.csv",
+            "feature_importance_path": "outputs/shap_feature_importance.csv",
+            "shap_values_path": "outputs/shap_values.csv",
+            "shap_summary_plot_path": "outputs/shap_summary_plot.png",
+            "interaction_heatmap_path": "outputs/interaction_heatmap.png",
+            "interaction_strength_path": "outputs/global_interaction_strength.csv",
+            "dependence_plot_feature": "Ti",
+            "dependence_plot_interaction": "Ta",
+            "dependence_plot_path": "outputs/Ti_Ta_dependence_plot.png"
+        },
+
+        "feature_name_mapping": {
+            "Co": "Co", "Al": "Al", "W": "W", "Ta": "Ta", "Ti": "Ti", "Nb": "Nb", "Ni": "Ni", "Cr": "Cr", "V": "V", "Mo": "Mo",
+            "Tage": r"$T_{\mathrm{age}}$", "tage": r"$t_{\mathrm{age}}$"
+        }
+    }
+
+    main(config)
diff --git a/ml-augmentation-toolkit_project/requirements.txt b/ml-augmentation-toolkit_project/requirements.txt
new file mode 100644
index 0000000..e69de29
diff --git a/ml-augmentation-toolkit_project/setup.py b/ml-augmentation-toolkit_project/setup.py
new file mode 100644
index 0000000..349871e
--- /dev/null
+++ b/ml-augmentation-toolkit_project/setup.py
@@ -0,0 +1,31 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='alloyxai',
+    version='0.1.0',
+    description="An integrated machine learning pipeline for advanced data augmentation and model interpretability in high-temperature alloy research.",
+    author_email='linlinsun1010@163.com',
+    url='https://github.com/003sunshine/alloyxai',
+    packages=find_packages(),
+    include_package_data=True,
+    install_requires=[
+        'numpy>=1.20.0',
+        'pandas>=1.3.0',
+        'matplotlib>=3.4.0',
+        'scikit-learn>=1.0.0',
+        'xgboost>=1.5.0',
+        'shap>=0.41.0',
+        'smogn>=0.1.2',
+        'pymc>=5.0.0',
+        'arviz>=0.12.0',
+        'torch>=1.9.0'
+    ],
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Intended Audience :: Science/Research",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    python_requires='>=3.8',
+)