From 32b94c03f65241367c6b022adf0d5ba767efe1a2 Mon Sep 17 00:00:00 2001 From: Shivam Shandilya Date: Tue, 1 Jul 2025 19:35:16 +0000 Subject: [PATCH 1/2] added support for TACO-RL --- experiments/taco-rl/README.md | 369 +++++++++++++ .../taco-rl/configs/train_reinforce.yaml | 53 ++ experiments/taco-rl/metrics.py | 27 + experiments/taco-rl/train_reinforce.py | 276 ++++++++++ experiments/taco-rl/utils.py | 361 +++++++++++++ llmlingua/__init__.py | 3 +- llmlingua/taco-rl/README.md | 112 ++++ llmlingua/taco-rl/__init__.py | 7 + .../taco-rl/prompt_compressor_reinforce.py | 509 ++++++++++++++++++ 9 files changed, 1716 insertions(+), 1 deletion(-) create mode 100644 experiments/taco-rl/README.md create mode 100644 experiments/taco-rl/configs/train_reinforce.yaml create mode 100644 experiments/taco-rl/metrics.py create mode 100644 experiments/taco-rl/train_reinforce.py create mode 100644 experiments/taco-rl/utils.py create mode 100644 llmlingua/taco-rl/README.md create mode 100644 llmlingua/taco-rl/__init__.py create mode 100644 llmlingua/taco-rl/prompt_compressor_reinforce.py diff --git a/experiments/taco-rl/README.md b/experiments/taco-rl/README.md new file mode 100644 index 0000000..ca171f6 --- /dev/null +++ b/experiments/taco-rl/README.md @@ -0,0 +1,369 @@ +# TACO-RL Experiments: Training and Implementation Guide + +This directory contains the experimental implementation of TACO-RL (Task-Aware Prompt Compression Optimization with Reinforcement Learning) for fine-tuning LLMLingua models on new tasks. + +## Directory Structure + +``` +experiments/taco-rl/ +├── README.md # This file +├── train_reinforce.py # Main training script +├── utils.py # Utility functions and LLM querying +├── metrics.py # Evaluation metrics +├── configs/ # Configuration files +│ └── train_reinforce.yaml # Training configuration +└── logs/ # Training logs (created during training) +``` + +## Quick Start + +### 1. Prepare Your Data + +Create a JSON file with your training data in the following format: + +```json +{ + "0": { + "original_prompt": "Your original prompt text here...", + "gpt_output": "Expected GPT output for the prompt..." + }, + "1": { + "original_prompt": "Another prompt...", + "gpt_output": "Another expected output..." + } +} +``` + +**Data Format Requirements:** +- `original_prompt`: The input text that needs to be compressed +- `gpt_output`: The expected output from the teacher model (e.g., GPT-3.5) for the original prompt + +**Task-Specific Examples:** + +**Meeting Summarization:** +```json +{ + "0": { + "original_prompt": "Meeting transcript: John discussed Q1 results showing 15% growth in revenue. Sarah mentioned challenges with supply chain. Mike proposed new marketing strategy...", + "gpt_output": "Summary: Q1 results were positive with 15% revenue growth. Supply chain challenges were identified. New marketing strategy was proposed." + } +} +``` + +**Question Answering:** +```json +{ + "0": { + "original_prompt": "Context: The Eiffel Tower was built in 1889 by Gustave Eiffel for the World's Fair. It stands 324 meters tall and was originally intended to be temporary. Question: When was the Eiffel Tower built?", + "gpt_output": "The Eiffel Tower was built in 1889." + } +} +``` + +**Code Summarization:** +```json +{ + "0": { + "original_prompt": "def quicksort(arr): if len(arr) <= 1: return arr; pivot = arr[len(arr) // 2]; left = [x for x in arr if x < pivot]; middle = [x for x in arr if x == pivot]; right = [x for x in arr if x > pivot]; return quicksort(left) + middle + quicksort(right)", + "gpt_output": "This function implements the quicksort algorithm using a divide-and-conquer approach with pivot selection and recursive sorting." + } +} +``` + +### 2. Configure API Settings (Optional) + +If you're using Azure OpenAI services, you can configure your API settings in `utils.py`: + +```python +# In utils.py, update the DEFAULT_API_CONFIG with your settings: +DEFAULT_API_CONFIG = { + "scope": "https://cognitiveservices.azure.com/.default", + "client_id": "YOUR_CLIENT_ID_HERE", # Replace with your client ID + "api_details": { + "primary": { + "api_base": "YOUR_PRIMARY_API_BASE_HERE", # Replace with your primary API base + "api_version": "2024-02-01", + }, + "secondary": { + "api_base": "YOUR_SECONDARY_API_BASE_HERE", # Replace with your secondary API base + "api_version": "2024-02-01", + } + } +} +``` + +Alternatively, you can initialize with custom configuration in your training script: + +```python +from utils import initialize_api_config + +custom_config = { + "client_id": "your-client-id", + "api_details": { + "primary": {"api_base": "your-primary-endpoint"}, + "secondary": {"api_base": "your-secondary-endpoint"} + } +} +initialize_api_config(custom_config) +``` + +### 3. Configure Training Parameters + +Edit `configs/train_reinforce.yaml` to set your desired hyperparameters: + +```yaml +data: + train_file_path: "path/to/your/train_data.json" + +model: + model_load_path: "microsoft/llmlingua-2-xlm-roberta-large-meetingbank" + trained_model_output_dir_hf: "models/reinforce_trained_model" + +task: + prompt: "Summarize the provided text (which may be compressed).\n{transcript}\nSummary:" + gpt_model: "gpt-35-turbo" + max_tokens: 1500 + +hyperparams: + epochs: 4 + train_batch_size: 8 + policy_lr: 1e-5 + compression_rate: 0.5 + compression_relaxation_tokens: 30 + max_seq_len: 512 + entropy_coeff: 0.01 + + # Compression settings + target_token: -1 + use_context_level_filter: false + use_token_level_filter: true + target_context: -1 + context_level_compression_rate: 1.0 + context_level_target_token: -1 + force_tokens: "" + drop_consecutive: true + force_reserve_digit: false + +logging: + log_dir: "logs_new" + log_interval: 5 + save_interval: 100 + +device: + use_cuda: true + device_map: "auto" +``` + +### 4. Run Training + +```bash +cd LLMLingua/experiments/taco-rl +accelerate launch train_reinforce.py +``` + +## Configuration Parameters + +### Data Configuration +- `train_file_path`: Path to your training data JSON file + +### Model Configuration +- `model_load_path`: Pre-trained LLMLingua model to fine-tune +- `trained_model_output_dir_hf`: Directory to save the fine-tuned model + +### Task Configuration +- `prompt`: Template prompt for the teacher model (GPT-3.5). Use `{transcript}` as placeholder for the compressed text +- `gpt_model`: Teacher model name for reward computation +- `max_tokens`: Maximum tokens for teacher model responses + +**Task-Specific Prompt Examples:** + +**Meeting Summarization:** +```yaml +task: + prompt: "Summarize the provided meeting transcript (which may be compressed).\n{transcript}\nSummary:" +``` + +**Question Answering:** +```yaml +task: + prompt: "Answer the following question based on the provided context (which may be compressed).\n{transcript}\nAnswer:" +``` + +**Code Summarization:** +```yaml +task: + prompt: "Summarize the following code (which may be compressed).\n{transcript}\nSummary:" +``` + +### Training Hyperparameters +- `epochs`: Number of training epochs +- `train_batch_size`: Batch size for training +- `policy_lr`: Learning rate for policy optimization +- `compression_rate`: Target compression ratio (0.0-1.0) +- `compression_relaxation_tokens`: Tolerance for compression deviation +- `max_seq_len`: Maximum sequence length for tokenization +- `entropy_coeff`: Entropy regularization coefficient for exploration + +### Compression Settings +- `target_token`: Target number of tokens (-1 for rate-based compression) +- `use_context_level_filter`: Whether to use context-level filtering +- `use_token_level_filter`: Whether to use token-level filtering +- `target_context`: Target number of contexts (-1 for rate-based) +- `context_level_compression_rate`: Context-level compression rate +- `context_level_target_token`: Context-level target tokens +- `force_tokens`: Tokens to always preserve +- `drop_consecutive`: Whether to drop consecutive tokens +- `force_reserve_digit`: Whether to preserve digits + +### Logging Configuration +- `log_dir`: Directory for log files +- `log_interval`: How often to log metrics +- `save_interval`: How often to save model checkpoints + +### Device Configuration +- `use_cuda`: Whether to use CUDA if available +- `device_map`: Device mapping strategy + +## Training Process + +### 1. Data Loading +The training script uses the `GenericRLDataset` class to load training data: + +```python +class GenericRLDataset(Dataset): + def __init__(self, data_path): + self.data = json.load(open(data_path)) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + tokenized_dataset = tokenize_text(self.data[str(idx)]["original_prompt"]) + return { + "input_ids": tokenized_dataset["input_ids"], + "attention_mask": tokenized_dataset["attention_mask"], + "org_prompt": self.data[str(idx)]["original_prompt"], + "gpt_output": self.data[str(idx)]["gpt_output"] + } +``` + +### 2. Model Initialization +Creates a `PromptCompressorReinforce` instance with the specified pre-trained model: + +```python +compressor = PromptCompressorReinforce( + model_name=model_name, + model_config={}, + use_llmlingua2=True, + device_map=device_map, +) +``` + +### 3. REINFORCE Training Loop +For each batch: +1. Compresses prompts using the RL-enhanced compressor +2. Calculates rewards based on compression quality using the teacher model +3. Updates the policy network using REINFORCE algorithm +4. Logs training metrics and progress + +## Reward Function + +The reward function evaluates compression quality by: + +1. **Compression Quality**: BLEU score between summaries of original and compressed text +2. **Compression Ratio**: Penalty for not meeting target compression rate +3. **Compression Coefficient**: Penalty for deviating from target compression +4. **Entropy Regularization**: Encourages exploration during training + +### Reward Computation Details +The reward is positive when the compression is within the tolerance threshold. It is set to a negative value during over-compression and when it crosses the tolerance threshold. + +```python +def calculate_rewards(gpt_output, model_compressed_text, compression_ratio): + if compression_ratio["compressed"] > 0.1*compression_ratio["original"]: + org_input_summary = gpt_output + model_compressed_summary = get_gpt_output(model_compressed_text) + comp_ratio = compression_ratio["original"] / compression_ratio["compressed"] + else: + comp_ratio = compression_ratio["original"] / compression_ratio["compressed"] + return {"comp_ratio": comp_ratio, "reward": -0.4} + + if org_input_summary is not None and model_compressed_summary is not None: + eval_scores = evaluate_sim2([model_compressed_summary], [org_input_summary]) + bleu = eval_scores["bleu"] + comp_coeff = compression_ratio["compressed"] - compression_rate*compression_ratio["original"] + + if abs(comp_coeff) > input_dict["compression_relaxation_tokens"]: + reward = -0.1 + else: + reward = bleu + + return {"comp_ratio": comp_ratio, "reward": reward} + else: + return {"comp_ratio": comp_ratio, "reward": 0.4} +``` +### Note: This function will need to be modified as per the task. For example, for the QA task, the *reward* should be F1 Score. + +## Output + +The training script produces: + +### 1. Fine-tuned Model +- Saved to the specified output directory +- Can be loaded and used with the main LLMLingua package + +### 2. Training Logs +- `reinforce_logs_updated.csv`: Policy loss and learning rate logs +- `rewards_logs_updated.csv`: Reward and compression ratio logs +- Console output with real-time training progress + +### 3. Log Format +``` +Train/Val, Epoch, Step, Policy Loss, LR +Train, 0, 0, 0.123, 1e-05 +Train, 0, 5, 0.098, 1e-05 +... +``` + +## Evaluation + +After training your TACO-RL model, you can evaluate its performance using the evaluation scripts and utilities available in the [LLMLingua2 experiments directory](../llmlingua2/evaluation/). + + +## Integration with Main LLMLingua + +After training, you can use your fine-tuned model with the main LLMLingua package: + +```python +from llmlingua import PromptCompressor + +# Load your fine-tuned model +compressor = PromptCompressor( + model_name="path/to/your/fine_tuned_model", + use_llmlingua2=True +) + +# Use it for compression +compressed_prompt = compressor.compress_prompt_llmlingua2( + ["Your prompt here..."], + rate=0.5 +) +``` + + +## Notes + +- The training uses the REINFORCE algorithm for policy gradient optimization +- The compressor maintains the same interface as the original LLMLingua but provides additional RL information +- Make sure to adjust batch sizes and learning rates based on your hardware capabilities +- The teacher model (GPT-3.5) is used to evaluate compression quality and provide reward signals +- All hyperparameters are configurable through the YAML file for easy experimentation +- Different tasks may require different evaluation metrics for optimal performance + +## Debugging Tips + +1. **Check Logs**: Monitor the CSV log files for training progress +2. **Validate Data**: Ensure your training data format is correct +3. **Test Configuration**: Start with a small dataset to test your setup +4. **Monitor Rewards**: Watch for consistent reward improvements over time +5. **Check Compression**: Verify that compression ratios are within expected ranges \ No newline at end of file diff --git a/experiments/taco-rl/configs/train_reinforce.yaml b/experiments/taco-rl/configs/train_reinforce.yaml new file mode 100644 index 0000000..9b2d80a --- /dev/null +++ b/experiments/taco-rl/configs/train_reinforce.yaml @@ -0,0 +1,53 @@ +# Configuration file for REINFORCE training + +data: + train_file_path: + +model: + model_load_path: "microsoft/llmlingua-2-xlm-roberta-large-meetingbank" + trained_model_output_dir_hf: "models/reinforce_trained_model" + +task: + prompt: "Summarize the provided meeting transcript (which may be compressed).\n{transcript}\nSummary:" + gpt_model: "gpt-35-turbo" + max_tokens: 1500 + +hyperparams: + # Training parameters + epochs: 4 + train_batch_size: 8 + + # Learning rates + policy_lr: 1e-5 + + # Compression parameters + compression_rate: 0.5 + compression_relaxation_tokens: 30 + + # REINFORCE specific parameters + entropy_coeff: 0.01 # Entropy regularization coefficient + + # Model parameters + max_seq_len: 512 + + # Compression settings + target_token: -1 + use_context_level_filter: false + use_token_level_filter: true + target_context: -1 + context_level_compression_rate: 1.0 + context_level_target_token: -1 + force_tokens: "" + drop_consecutive: true + force_reserve_digit: false + +# Logging settings +logging: + log_dir: "logs_new" + log_interval: 5 + save_interval: 100 + +# Device settings +device: + use_cuda: true + device_map: "cpu" \ No newline at end of file diff --git a/experiments/taco-rl/metrics.py b/experiments/taco-rl/metrics.py new file mode 100644 index 0000000..5e53eed --- /dev/null +++ b/experiments/taco-rl/metrics.py @@ -0,0 +1,27 @@ +import evaluate + +def evaluate_sim2(pred_list, gt_list, truncate_pred=True, truncate_gt=False): + if truncate_pred: + pred_list_truncated = [] + for pred in pred_list: + pred = pred.lstrip("\n").split("\n")[0].strip() + pred_list_truncated.append(pred) + pred_list = pred_list_truncated + if truncate_gt: + gt_list_truncated = [] + for gt in gt_list: + gt = gt.lstrip("\n").split("\n")[0].strip() + gt_list_truncated.append(gt) + gt_list = gt_list_truncated + + + rouge = evaluate.load("rouge") + bleu = evaluate.load("bleu") + metrics = {} + rouge_scores = rouge.compute(predictions=pred_list, references=gt_list) + bleu_scores = bleu.compute(predictions=pred_list, references=gt_list) + metrics["rouge1"] = rouge_scores["rouge1"] + metrics["rougeL"] = rouge_scores["rougeL"] + metrics["bleu"] = bleu_scores["bleu"] + + return metrics \ No newline at end of file diff --git a/experiments/taco-rl/train_reinforce.py b/experiments/taco-rl/train_reinforce.py new file mode 100644 index 0000000..9b195f2 --- /dev/null +++ b/experiments/taco-rl/train_reinforce.py @@ -0,0 +1,276 @@ +import torch +import torch.optim as optim +import numpy as np +from torch.utils.data import DataLoader, Dataset +from accelerate import Accelerator +from transformers import BertModel, BertTokenizer, AutoTokenizer, AutoModelForTokenClassification +import torch.nn.functional as F +import random +import json +from tqdm.auto import tqdm +import torch.nn as nn +from torch.distributions import Categorical +from llmlingua.taco_rl import PromptCompressorReinforce +from metrics import evaluate_sim2 +from utils import query_llm +from accelerate import Accelerator +from accelerate import DistributedDataParallelKwargs as DDPK +from accelerate.utils import InitProcessGroupKwargs +from datetime import timedelta +from csv_logger import CsvLogger +import logging +import regex as re +import hydra +import gc +import os +import warnings +import math + + +warnings.filterwarnings("ignore") +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +# turn off logging +logging.disable(logging.CRITICAL) + +# kwargs = DDPK(find_unused_parameters=True) +kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=3600)) +accelerator = Accelerator(kwargs_handlers=[kwargs]) + +input_dict={} +@hydra.main(config_path="configs", config_name="train_reinforce.yaml", version_base=None) +def run(cfg): + input_dict["train_data_path"] = cfg.data.train_file_path + input_dict["model_load_path_hf"] = cfg.model.model_load_path + input_dict["model_save_path_hf"] = cfg.model.trained_model_output_dir_hf + input_dict["num_epochs"] = cfg.hyperparams.epochs + input_dict["compression_rate"] = cfg.hyperparams.compression_rate + input_dict["policy_lr"] = cfg.hyperparams.policy_lr + input_dict["train_batch_size"] = cfg.hyperparams.train_batch_size + input_dict["compression_relaxation_tokens"] = cfg.hyperparams.compression_relaxation_tokens + input_dict["prompt"] = cfg.task.prompt + input_dict["gpt_model"] = cfg.task.gpt_model + input_dict["max_tokens"] = cfg.task.max_tokens + input_dict["target_token"] = cfg.hyperparams.target_token + input_dict["use_context_level_filter"] = cfg.hyperparams.use_context_level_filter + input_dict["use_token_level_filter"] = cfg.hyperparams.use_token_level_filter + input_dict["target_context"] = cfg.hyperparams.target_context + input_dict["context_level_compression_rate"] = cfg.hyperparams.context_level_compression_rate + input_dict["context_level_target_token"] = cfg.hyperparams.context_level_target_token + input_dict["force_tokens"] = cfg.hyperparams.force_tokens + input_dict["drop_consecutive"] = cfg.hyperparams.drop_consecutive + input_dict["force_reserve_digit"] = cfg.hyperparams.force_reserve_digit + input_dict["max_seq_len"] = cfg.hyperparams.max_seq_len + + # Logging settings + input_dict["log_dir"] = cfg.logging.log_dir + input_dict["log_interval"] = cfg.logging.log_interval + input_dict["save_interval"] = cfg.logging.save_interval + + # Device settings + input_dict["use_cuda"] = cfg.device.use_cuda + input_dict["device_map"] = cfg.device.device_map + + # REINFORCE parameters + input_dict["entropy_coeff"] = cfg.hyperparams.entropy_coeff + +run() + +# Create log directory if it doesn't exist +os.makedirs(input_dict["log_dir"], exist_ok=True) + +if accelerator.is_local_main_process: + delimiter = "," + level_names = ["Train", "Val"] + reward_level_names = ["TrainR", "ValR"] + csv_logger = CsvLogger(filename=f"{input_dict['log_dir']}/reinforce_logs_updated.csv",fmt = f'%(levelname)s{delimiter}%(message)s', add_level_names= level_names,header=["Train/Val", "Epoch", "Step", "Policy Loss", "LR"]) + rewards_logger = CsvLogger(filename=f"{input_dict['log_dir']}/rewards_logs_updated.csv",fmt = f'%(levelname)s{delimiter}%(message)s', add_level_names= reward_level_names,header=["Train/Val", "Epoch", "Step", "Rewards", "Comp Ratio"]) + +def get_model_output(input_prompt_list): + for i in range(len(input_prompt_list)): + input_prompt_list[i] = re.sub(r'\n', ' ', input_prompt_list[i]) + + compressed_output_dict = compressor.compress_prompt_llmlingua2( + input_prompt_list, + rate=compression_rate, + target_token=target_token, + use_context_level_filter=use_context_level_filter, + use_token_level_filter=use_token_level_filter, + target_context=target_context, + context_level_rate=context_level_compression_rate, + context_level_target_token=context_level_target_token, + force_tokens=force_tokens, + drop_consecutive=drop_consecutive, + force_reserve_digit=force_reserve_digit, + ) + + return compressed_output_dict['compressed_prompt'], compressed_output_dict['compressed_prompt_list'], compressed_output_dict['compression_ratios'], compressed_output_dict["actions"], compressed_output_dict["old_log_probs"], compressed_output_dict["old_logits"], compressed_output_dict["compressed_prompt_list_2"], compressed_output_dict["entropy"] + + +class REINFORCE: + def __init__(self, policy_network, policy_optimizer, policy_scheduler): + self.policy_network = policy_network + self.policy_optimizer = policy_optimizer + self.policy_scheduler = policy_scheduler + + def update(self, rewards_and_comp_ratios, old_log_probs, entropies, epoch_counter, steps): + rewards = [reward["reward"] for reward in rewards_and_comp_ratios] + comp_ratios = [reward["comp_ratio"] for reward in rewards_and_comp_ratios] + + rewards = torch.FloatTensor(rewards).to(accelerator.device) + rewards = rewards.unsqueeze(1).detach() + + comp_ratios = torch.FloatTensor(comp_ratios).to(accelerator.device) + comp_ratios = comp_ratios.unsqueeze(1).detach() + + entropies = torch.stack(entropies) + old_log_probs = torch.stack(old_log_probs) + + # Enhanced policy loss with entropy regularization + policy_loss = -torch.mean(old_log_probs * (rewards)) - input_dict["entropy_coeff"]*torch.mean(entropies) + + self.policy_optimizer.zero_grad() + + if steps % input_dict["log_interval"] == 0: + print(f"Avg. Entropy: {torch.mean(entropies)}") + + if accelerator.is_local_main_process: + csv_logger.Train([epoch_counter, steps, policy_loss.item(), self.policy_scheduler.get_last_lr()[0]]) + + accelerator.backward(policy_loss) + self.policy_optimizer.step() + self.policy_scheduler.step() + +def get_gpt_output(input_text): + query = prompt.format(transcript=input_text) + gpt_output = query_llm( + prompt=query, + model_name=input_dict["gpt_model"], + max_tokens=input_dict["max_tokens"], + ) + + return gpt_output + +def calculate_rewards(gpt_output, model_compressed_text, compression_ratio): + if compression_ratio["compressed"] > 0.1*compression_ratio["original"]: + org_input_summary = gpt_output + model_compressed_summary = get_gpt_output(model_compressed_text) + comp_ratio = compression_ratio["original"] / compression_ratio["compressed"] + else: + print(compression_ratio) + comp_ratio = compression_ratio["original"] / compression_ratio["compressed"] + return {"comp_ratio": comp_ratio, "reward": -0.4} + + if org_input_summary is not None and model_compressed_summary is not None: + eval_scores = evaluate_sim2([model_compressed_summary], [org_input_summary]) + bleu = eval_scores["bleu"] + comp_coeff = compression_ratio["compressed"] - compression_rate*compression_ratio["original"] + + if abs(comp_coeff) > input_dict["compression_relaxation_tokens"]: + reward = -0.1 + else: + reward = bleu + + return {"comp_ratio": comp_ratio, "reward": reward} + else: + return {"comp_ratio": comp_ratio, "reward": 0.4} + +def train(dataloader, num_epochs): + epoch_counter = 0 + steps = 0 + for epoch in range(num_epochs): + for batch_idx, batch in enumerate(tqdm(dataloader, total=len(dataloader)*(num_epochs-epoch))): + input_prompt_list = batch["org_prompt"] + gpt_output_list = batch["gpt_output"] + + compressed_prompt, compressed_prompt_list, compression_ratio_list, actions, old_log_probs, _, compressed_prompt_list_2, entropies = get_model_output(input_prompt_list) + + rewards_and_comp_ratios = [calculate_rewards(gpt_output_list[i], compressed_prompt_list_2[i], compression_ratio_list[i]) for i in range(len(gpt_output_list))] + rewards = [reward["reward"] for reward in rewards_and_comp_ratios] + comp_ratios = [reward["comp_ratio"] for reward in rewards_and_comp_ratios] + + # update the policy network + reinforce_agent.update(rewards_and_comp_ratios, old_log_probs, entropies, epoch_counter, steps) + + # Enhanced logging with more metrics + if batch_idx % input_dict["log_interval"] == 0: + print(f"Epoch: {epoch}, Batch: {batch_idx}, Avg. Batch Rewards: {sum(rewards)/len(rewards)}, Avg. Comp Ratio: {sum(comp_ratios)/len(comp_ratios)}") + if accelerator.is_local_main_process: + rewards_logger.TrainR([epoch_counter, batch_idx//input_dict["log_interval"], sum(rewards)/len(rewards), sum(comp_ratios)/len(comp_ratios)]) + + steps += 1 + + epoch_counter += 1 + +def tokenize_text(text): + tokenize_text = tokenizer(text, max_length=input_dict["max_seq_len"], padding="max_length", return_tensors="pt", truncation=True) + return {k: v[0] for k, v in tokenize_text.items()} + +class GenericRLDataset(Dataset): + def __init__(self, data_path): + self.data = json.load(open(data_path)) + # self.data = dict(list(self.data.items())[:100]) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + tokenized_dataset = tokenize_text(self.data[str(idx)]["original_prompt"]) + return {"input_ids": tokenized_dataset["input_ids"], "attention_mask": tokenized_dataset["attention_mask"], "org_prompt": self.data[str(idx)]["original_prompt"], "gpt_output": self.data[str(idx)]["gpt_output"]} + + +# Example usage +device = torch.device("cuda" if torch.cuda.is_available() and input_dict["use_cuda"] else "cpu") + +input_data_path = input_dict["train_data_path"] +model_name = input_dict["model_load_path_hf"] + +compression_rate = input_dict["compression_rate"] +target_token = input_dict["target_token"] +use_context_level_filter = input_dict["use_context_level_filter"] +use_token_level_filter = input_dict["use_token_level_filter"] +target_context = input_dict["target_context"] +context_level_compression_rate = input_dict["context_level_compression_rate"] +context_level_target_token = input_dict["context_level_target_token"] +force_tokens = input_dict["force_tokens"] +drop_consecutive = input_dict["drop_consecutive"] +force_reserve_digit = input_dict["force_reserve_digit"] + +# Use the new PromptCompressorReinforce class +compressor = PromptCompressorReinforce( + model_name=model_name, + model_config={}, + use_llmlingua2=True, + device_map=input_dict["device_map"], +) + +tokenizer = compressor.tokenizer + +prompt = input_dict["prompt"] + +policy_optimizer = optim.AdamW(compressor.model.parameters(), lr=input_dict["policy_lr"]) + +dataset = GenericRLDataset(input_data_path) +dataloader = DataLoader(dataset, batch_size=input_dict["train_batch_size"], shuffle=True) + +num_epochs = input_dict["num_epochs"] + +policy_scheduler = optim.lr_scheduler.CosineAnnealingLR(policy_optimizer, T_max=num_epochs*len(dataloader)) + +compressor.model, policy_optimizer, policy_scheduler, dataloader = accelerator.prepare( + compressor.model, policy_optimizer, policy_scheduler, dataloader +) + +policy_net = compressor.model + +reinforce_agent = REINFORCE(policy_net, policy_optimizer, policy_scheduler) + +train(dataloader, num_epochs=num_epochs) + +# save the model +unwrapped_model = accelerator.unwrap_model(policy_net) +unwrapped_model.save_pretrained( + input_dict["model_save_path_hf"], + is_main_process=accelerator.is_main_process, + save_function=accelerator.save, +) \ No newline at end of file diff --git a/experiments/taco-rl/utils.py b/experiments/taco-rl/utils.py new file mode 100644 index 0000000..353e979 --- /dev/null +++ b/experiments/taco-rl/utils.py @@ -0,0 +1,361 @@ +import json +import os +import random +import re +import string +import time + +import numpy as np +import torch +import yaml +from torch.utils.data import Dataset +from azure.identity import ManagedIdentityCredential, get_bearer_token_provider +from openai import AzureOpenAI + + +# Default API configuration - can be overridden by user +DEFAULT_API_CONFIG = { + "scope": "https://cognitiveservices.azure.com/.default", + "client_id": "YOUR_CLIENT_ID_HERE", # Replace with your client ID + "api_details": { + "primary": { + "api_base": "YOUR_PRIMARY_API_BASE_HERE", # Replace with your primary API base + "api_version": "2024-02-01", + }, + "secondary": { + "api_base": "YOUR_SECONDARY_API_BASE_HERE", # Replace with your secondary API base + "api_version": "2024-02-01", + } + } +} + +# Global variables for API configuration +api_config = DEFAULT_API_CONFIG.copy() +token_provider = None +client = None +api_base = None + +def initialize_api_config(custom_config=None): + """Initialize API configuration with custom settings or defaults""" + global api_config, token_provider, client, api_base + + if custom_config: + api_config.update(custom_config) + + # Initialize token provider + token_provider = get_bearer_token_provider( + ManagedIdentityCredential(client_id=api_config["client_id"]), + api_config["scope"] + ) + + # Initialize client with default endpoint + client = AzureOpenAI( + api_version=api_config["api_details"]["primary"]["api_version"], + azure_endpoint=api_config["api_details"]["primary"]["api_base"], + azure_ad_token_provider=token_provider + ) + + api_base = api_config["api_details"]["primary"]["api_base"] + +# Initialize with default config +initialize_api_config() + +def query_llm( + prompt, + model_name, + max_tokens, + **kwargs, +): + SLEEP_TIME_FAILED = 1 + global api_base, client, api_config + + request = { + "temperature": kwargs["temperature"] if "temperature" in kwargs else 0.0, + "top_p": kwargs["top_p"] if "top_p" in kwargs else 1.0, + "seed": kwargs["seed"] if "seed" in kwargs else 42, + "max_tokens": max_tokens, + "n": 1, + "stream": False, + } + request["messages"] = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt}, + ] + + answer = None + response = None + while answer is None: + try: + response = client.chat.completions.create(model=model_name, **request) + answer = response.choices[0].message.content + + except Exception as e: + print(f"error: {e}, response: {response}") + answer = None + if "content management" in str(e): + print(f"error: {e}, response: {response}") + print("returning None") + break + elif "content" in str(e): + print(f"error: {e}, response: {response}") + print("returning None") + break + elif "filtered" in str(response): + print(f"error: {e}, response: {response}") + print("returning None") + break + elif "repetitive patterns" in str(e): + print(f"error: {e}, response: {response}") + print("returning None") + break + elif "exceeded token rate limit" in str(e) or "exceeded call rate limit" in str(e) or "rate limit" in str(e): + # change api details + # print(f"error: {e}, response: {response}") + print("changing api details") + if api_base == api_config["api_details"]["primary"]["api_base"]: + api_base = api_config["api_details"]["secondary"]["api_base"] + client = AzureOpenAI( + api_version=api_config["api_details"]["secondary"]["api_version"], + azure_endpoint=api_config["api_details"]["secondary"]["api_base"], + azure_ad_token_provider=token_provider + ) + else: + api_base = api_config["api_details"]["primary"]["api_base"] + client = AzureOpenAI( + api_version=api_config["api_details"]["primary"]["api_version"], + azure_endpoint=api_config["api_details"]["primary"]["api_base"], + azure_ad_token_provider=token_provider + ) + time.sleep(SLEEP_TIME_FAILED) + # sleep(SLEEP_TIME_SUCCESS) + return answer + + +class TokenClfDataset(Dataset): + def __init__( + self, + texts, + max_len=512, + tokenizer=None, + model_name="bert-base-multilingual-cased", + ): + self.len = len(texts) + self.texts = texts + self.tokenizer = tokenizer + self.max_len = max_len + self.model_name = model_name + if "bert-base-multilingual-cased" in model_name: + self.cls_token = "[CLS]" + self.sep_token = "[SEP]" + self.unk_token = "[UNK]" + self.pad_token = "[PAD]" + self.mask_token = "[MASK]" + elif "xlm-roberta-large" in model_name: + self.bos_token = "" + self.eos_token = "" + self.sep_token = "" + self.cls_token = "" + self.unk_token = "" + self.pad_token = "" + self.mask_token = "" + else: + raise NotImplementedError() + + def __getitem__(self, index): + text = self.texts[index] + tokenized_text = self.tokenizer.tokenize(text) + + tokenized_text = ( + [self.cls_token] + tokenized_text + [self.sep_token] + ) # add special tokens + + if len(tokenized_text) > self.max_len: + tokenized_text = tokenized_text[: self.max_len] + else: + tokenized_text = tokenized_text + [ + self.pad_token for _ in range(self.max_len - len(tokenized_text)) + ] + + attn_mask = [1 if tok != self.pad_token else 0 for tok in tokenized_text] + + ids = self.tokenizer.convert_tokens_to_ids(tokenized_text) + + return { + "ids": torch.tensor(ids, dtype=torch.long), + "mask": torch.tensor(attn_mask, dtype=torch.long), + } + + def __len__(self): + return self.len + + +def seed_everything(seed: int): + random.seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def is_begin_of_new_word(token, model_name, force_tokens, token_map): + if "bert-base-multilingual-cased" in model_name \ + or "tinybert" in model_name.lower() \ + or "mobilebert" in model_name.lower(): + if token.lstrip("##") in force_tokens or token.lstrip("##") in set( + token_map.values() + ): + return True + return not token.startswith("##") + elif "xlm-roberta-large" in model_name: + if ( + token in string.punctuation + or token in force_tokens + or token in set(token_map.values()) + ): + return True + return token.startswith("▁") + else: + raise NotImplementedError() + + +def replace_added_token(token, token_map): + for ori_token, new_token in token_map.items(): + token = token.replace(new_token, ori_token) + return token + + +def get_pure_token(token, model_name): + if "bert-base-multilingual-cased" in model_name \ + or "tinybert" in model_name.lower() \ + or "mobilebert" in model_name.lower(): + return token.lstrip("##") + elif "xlm-roberta-large" in model_name: + return token.lstrip("▁") + else: + raise NotImplementedError() + + +def process_structured_json_data(json_data, json_config): + if isinstance(json_config, str): + with open(json_config, "r") as file: + json_config = yaml.safe_load(file) + elif not isinstance(json_config, dict): + raise ValueError( + "Invalid json config file. It should be a dictionary or a path to a yaml file." + ) + assert set(json_data.keys()) == set( + json_config.keys() + ), "Keys in json data and json config file do not match." + context = ["{"] + forced_context_ids = [0] + for i, (k, v) in enumerate(json_data.items()): + if not json_config[k]["pair_remove"]: + forced_context_ids.append(i + 1) + rate, compress, value_type = ( + json_config[k]["rate"], + json_config[k]["compress"], + json_config[k]["value_type"], + ) + if not compress: + rate = 1 + context.append(precess_jsonKVpair(k, v, value_type, rate)) + context[-1] = context[-1][:-14] + "" + context.append("}") + forced_context_ids.append(len(json_data) + 1) + + return context, forced_context_ids + + +def precess_jsonKVpair(k, v, value_type, rate): + if rate == 1: + return ( + "" + + f"{json.dumps({k:v})[1:-1]}, " + + "" + ) + if value_type == "str" or value_type == "string": + v = str(v) + new_v = ( + f"" + + v + + "" + ) + return ( + "" + + f"{json.dumps({k:new_v})[1:-1]}, " + + "" + ) + elif value_type in ["int", "float", "integer", "number"]: + if value_type in ["int", "integer"]: + v = int(v) + if value_type in ["float", "number"]: + v = float(v) + return ( + "" + + f'"{k}": {v}, ' + ) + elif value_type == "bool" or value_type == "boolean": + if v in ["True", "true", "TRUE", True]: + v = "true" + elif v in ["False", "false", "FALSE", False]: + v = "false" + else: + raise ValueError(f"Invalid boolean value: {v}") + new_v = ( + f"" + + v + + "" + ) + return ( + "" + + f"{json.dumps({k:new_v})[1:-1]}, " + + "" + ) + elif value_type == "list" or value_type == "List": + return ( + "" + + f'"{k}": {process_sequence_data(rate, "[", "]", v)}' + ) + elif value_type == "dict" or value_type == "dictionary": + return ( + "" + + f'"{k}": {process_sequence_data(rate, "[", "]", v, is_dict=True)}' + ) + elif value_type == "set": + raise ValueError(f"Invalid value type: {value_type}") + # return '' + f'"{k}": {process_sequence_data(rate, "{", "}", v)}' + elif value_type == "tuple": + return ( + "" + + f'"{k}": {process_sequence_data(rate, "(", ")", v)}' + ) + else: + raise ValueError(f"Invalid value type: {value_type}") + + +def process_sequence_data(rate, start, end, sequence, is_dict=False): + res = f'{start}"' + n = len(sequence) + if not is_dict: + for i, item in enumerate(sequence): + item = str(item) + res += f"{item}" + if i != n - 1: + res += '", "' + else: + for i, (k, v) in enumerate(sequence.items()): + item = f"{k}: {v}" + item.replace('"', "'") + res += f"{item}" + if i != n - 1: + res += '", "' + res += f'"{end}, ' + return res + + +def remove_consecutive_commas(text): + text = re.sub(r",\s*", ",", text) + text = re.sub(r",+", ",", text) + return text \ No newline at end of file diff --git a/llmlingua/__init__.py b/llmlingua/__init__.py index d750210..ead3b84 100644 --- a/llmlingua/__init__.py +++ b/llmlingua/__init__.py @@ -3,6 +3,7 @@ # flake8: noqa from .prompt_compressor import PromptCompressor +from .taco_rl import PromptCompressorReinforce from .version import VERSION as __version__ -__all__ = ["PromptCompressor"] +__all__ = ["PromptCompressor", "PromptCompressorReinforce"] diff --git a/llmlingua/taco-rl/README.md b/llmlingua/taco-rl/README.md new file mode 100644 index 0000000..e52916b --- /dev/null +++ b/llmlingua/taco-rl/README.md @@ -0,0 +1,112 @@ +# TACO-RL: Task-Aware Prompt Compression Optimization with Reinforcement Learning + +This submodule provides reinforcement learning capabilities for fine-tuning LLMLingua models on new tasks using reward signals from language models like GPT-3.5. + +## Overview + +TACO-RL (Task-Aware Prompt Compression Optimization with Reinforcement Learning) extends the LLMLingua framework by adding reinforcement learning capabilities that allow you to fine-tune pre-trained LLMLingua models on new tasks. The approach addresses the limitations of existing prompt compression techniques by leveraging task-specific reward signals to guide the learning process. + +### Key Innovation + +Unlike traditional prompt compression methods that either rely on sub-optimal metrics like information entropy or treat compression as a task-agnostic token classification problem, TACO-RL: + +1. **Leverages Bidirectional Context**: Uses existing Transformer encoder-based token classification models for low latency +2. **Task-Specific Optimization**: Guides learning with task-specific reward signals using lightweight REINFORCE algorithm +3. **Performance Improvement**: Achieves 8%-189% improvement across diverse tasks while maintaining compression rates and latency requirements + +### Research Foundation + +Based on the paper "TACO-RL: Task Aware Prompt Compression Optimization with Reinforcement Learning" ([arXiv:2409.13035](https://arxiv.org/pdf/2409.13035)), this implementation addresses two key research questions: + +- **Q1**: How to design a prompt compression model that effectively leverages bidirectional context while providing low inference latency? +- **Q2**: How to efficiently train a model with proper guidance from task-specific reward signals while minimizing computational cost? + +The solution builds on LLMLingua-2's task-agnostic encoder-based transformer model and enhances it with task-specific reward signals using on-policy REINFORCE algorithm. + +## How TACO-RL Works + +### Architecture Overview + +1. **Pre-trained Base Model**: Start with a pre-trained LLMLingua model (e.g., `microsoft/llmlingua-2-xlm-roberta-large-meetingbank`) +2. **Task-Specific Data**: Prepare task-specific data with original prompts and expected outputs +3. **Reward Signal Generation**: Use a teacher model (e.g., GPT-3.5) to evaluate compression quality by comparing outputs from original vs. compressed prompts +4. **REINFORCE Training**: Fine-tune the model using policy gradients based on task-specific reward signals +5. **Task-Optimized Model**: Get a model optimized for your specific task while maintaining low inference latency + +### Training Process + +During the model alignment process: +1. Generate task output from both original and compressed prompts +2. Compute task-specific reward signal using divergence between outputs +3. Update the base encoder model using on-policy REINFORCE algorithm +4. The reward signals guide the model to preserve task-relevant information during compression + +## Task-Specific Metrics and Evaluation + +TACO-RL has been extensively evaluated on three diverse and challenging tasks with different evaluation metrics: + +### 1. Text Summarization (MeetingBank Dataset) + +**Metrics Considered:** +- **BLEU**: Measures n-gram overlap between generated and reference summaries +- **ROUGE-1**: Unigram overlap for content coverage +- **ROUGE-2**: Bigram overlap for fluency assessment +- **ROUGE-L**: Longest common subsequence for structure preservation +- **BERTScore F1**: Semantic similarity using BERT embeddings + +### 2. Question Answering (SQuAD Dataset) + +**Metrics Considered:** +- **Exact Match (EM)**: Percentage of answers that exactly match the ground truth +- **F1 Score**: Harmonic mean of precision and recall for answer span prediction +- **BLEU**: For answer generation quality +- **ROUGE**: For answer completeness and relevance + +### 3. Code Summarization (CodeSearchNet Dataset) + +**Metrics Considered:** +- **BLEU**: For code summary quality +- **ROUGE-1/2/L**: For summary completeness and relevance +- **BERTScore F1**: For semantic similarity of code descriptions +- **CodeBLEU**: Specialized metric for code-to-text generation + +## Key Features + +The `PromptCompressorReinforce` class extends the base `PromptCompressor` with: + +1. **Action Tracking**: Tracks actions taken during compression for RL training +2. **Log Probability Storage**: Stores log probabilities for policy gradient computation +3. **Entropy Calculation**: Computes entropy for exploration regularization +4. **RL Information Methods**: Provides methods to access and clear RL-specific data +5. **Configurable Training**: All hyperparameters configurable via YAML files +6. **Multi-Metric Support**: Configurable evaluation metrics for different tasks + + +## Integration with Main LLMLingua + +After training, you can use your fine-tuned model with the main LLMLingua package: + +```python +from llmlingua import PromptCompressor + +# Load your fine-tuned model +compressor = PromptCompressor( + model_name="path/to/your/fine_tuned_model", + use_llmlingua2=True +) + +# Use it for compression +compressed_prompt = compressor.compress_prompt_llmlingua2( + ["Your prompt here..."], + rate=0.5 +) +``` + +## Notes + +- The training uses the REINFORCE algorithm for policy gradient optimization +- The compressor maintains the same interface as the original LLMLingua but provides additional RL information +- Make sure to adjust batch sizes and learning rates based on your hardware capabilities +- The teacher model (GPT-3.5) is used to evaluate compression quality and provide reward signals +- All hyperparameters are configurable through the YAML file for easy experimentation +- Different tasks may require different evaluation metrics for optimal performance diff --git a/llmlingua/taco-rl/__init__.py b/llmlingua/taco-rl/__init__.py new file mode 100644 index 0000000..6a41be2 --- /dev/null +++ b/llmlingua/taco-rl/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2023 Microsoft +# Licensed under The MIT License [see LICENSE for details] + +# flake8: noqa +from .prompt_compressor_reinforce import PromptCompressorReinforce + +__all__ = ["PromptCompressorReinforce"] diff --git a/llmlingua/taco-rl/prompt_compressor_reinforce.py b/llmlingua/taco-rl/prompt_compressor_reinforce.py new file mode 100644 index 0000000..0ccd00f --- /dev/null +++ b/llmlingua/taco-rl/prompt_compressor_reinforce.py @@ -0,0 +1,509 @@ +# Copyright (c) 2023-2025 Microsoft +# Licensed under The MIT License [see LICENSE for details] + +import re +import copy +import numpy as np +import torch +import torch.nn.functional as F +from torch.distributions import Categorical +from torch.utils.data import DataLoader +from typing import List +from llmlingua.prompt_compressor import PromptCompressor +from utils import TokenClfDataset, get_pure_token, is_begin_of_new_word, replace_added_token + +class PromptCompressorReinforce(PromptCompressor): + """ + PromptCompressorReinforce extends PromptCompressor with reinforcement learning capabilities. + + This class overrides the compression methods to track actions, log probabilities, and entropy + for reinforcement learning training. It maintains the same interface as PromptCompressor + but provides additional RL-specific outputs. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.actions = [] + self.old_log_probs = [] + self.old_logits = None + self.entropy = [] + self.compressed_prompt_list_2 = [] + self.compression_ratios = [] + + def compress_prompt_llmlingua2( + self, + context: List[str], + rate: float = 0.5, + target_token: int = -1, + use_context_level_filter: bool = False, + use_token_level_filter: bool = True, + target_context: int = -1, + context_level_rate: float = 1.0, + context_level_target_token: int = -1, + force_context_ids: List[int] = [], + return_word_label: bool = False, + word_sep: str = "\t\t|\t\t", + label_sep: str = " ", + token_to_word: str = "mean", + force_tokens: List[str] = [], + force_reserve_digit: bool = False, + drop_consecutive: bool = False, + chunk_end_tokens: List[str] = [".", "\n"], + ): + """ + Override the llmlingua2 compression method to track RL-specific information. + """ + assert len(force_tokens) <= self.max_force_token + token_map = {} + for i, t in enumerate(force_tokens): + if len(self.tokenizer.tokenize(t)) != 1: + token_map[t] = self.added_tokens[i] + chunk_end_tokens = copy.deepcopy(chunk_end_tokens) + for c in chunk_end_tokens: + if c in token_map: + chunk_end_tokens.append(token_map[c]) + chunk_end_tokens = set(chunk_end_tokens) + + if type(context) == str: + context = [context] + context = copy.deepcopy(context) + + if len(context) == 1 and use_context_level_filter: + use_context_level_filter = False + + n_original_token = 0 + context_chunked = [] + for i in range(len(context)): + n_original_token += self.get_token_length( + context[i], use_oai_tokenizer=True + ) + for ori_token, new_token in token_map.items(): + context[i] = context[i].replace(ori_token, new_token) + context_chunked.append( + self.__chunk_context(context[i], chunk_end_tokens=chunk_end_tokens) + ) + + org_context = None + if use_context_level_filter: + # want use_context_level_filter but do not specify any parameters in context level? + # we will set context_level_rate = (rate + 1.0) / 2 if specify rate or target_token * 2 if specify target_token + if ( + target_context <= 0 + and context_level_rate >= 1.0 + and context_level_target_token <= 0 + ): + if target_token < 0 and rate < 1.0: + context_level_rate = ( + (rate + 1.0) / 2 if use_token_level_filter else rate + ) + if target_token >= 0: + context_level_target_token = ( + target_token * 2 if use_token_level_filter else target_token + ) + + if target_context >= 0: + context_level_rate = min(target_context / len(context), 1.0) + if context_level_target_token >= 0: + context_level_rate = min( + context_level_target_token / n_original_token, 1.0 + ) + + context_probs, context_words = self.__get_context_prob( + context_chunked, + token_to_word=token_to_word, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + ) + + threshold = np.percentile( + context_probs, int(100 * (1 - context_level_rate)) + ) + + reserved_context = [] + context_label = [False] * len(context_probs) + for i, p in enumerate(context_probs): + if p >= threshold or ( + force_context_ids is not None and i in force_context_ids + ): + reserved_context.append(context_chunked[i]) + context_label[i] = True + n_reserved_token = 0 + for chunks in reserved_context: + for c in chunks: + n_reserved_token += self.get_token_length(c, use_oai_tokenizer=True) + if target_token >= 0: + rate = min(target_token / n_reserved_token, 1.0) + + org_context = copy.deepcopy(reserved_context) + + if use_token_level_filter: + compressed_context, word_list, word_label_list, actions, old_log_probs, old_logits, compressed_prompt_list_2, entropy = self.__compress( + reserved_context, + reduce_rate=max(0, 1 - rate), + token_to_word=token_to_word, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + drop_consecutive=drop_consecutive, + ) + else: + compressed_context, word_list, word_label_list, actions, old_log_probs, old_logits, compressed_prompt_list_2, entropy = self.__compress( + reserved_context, + reduce_rate=0, + token_to_word=token_to_word, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + drop_consecutive=drop_consecutive, + ) + + n_compressed_token = 0 + compression_ratios=[] + for c,o in zip(compressed_prompt_list_2,org_context): + org_token_length = self.get_token_length(o, use_oai_tokenizer=True) + compressed_token_length = self.get_token_length(c, use_oai_tokenizer=True) + fin_dict = {"original":org_token_length, "compressed":compressed_token_length, "original_context":o, "compressed_context":c} + compression_ratios.append(fin_dict) + for c in compressed_context: + n_compressed_token += self.get_token_length(c, use_oai_tokenizer=True) + saving = (n_original_token - n_compressed_token) * 0.06 / 1000 + ratio = ( + 1 if n_compressed_token == 0 else n_original_token / n_compressed_token + ) + res = { + "compressed_prompt": "\n\n".join(compressed_context), + "compressed_prompt_list": compressed_context, + "compression_ratios":compression_ratios, + "compressed_prompt_list_2":compressed_prompt_list_2, + "entropy": entropy, + "origin_tokens": n_original_token, + "compressed_tokens": n_compressed_token, + "ratio": f"{ratio:.1f}x", + "rate": f"{1 / ratio * 100:.1f}%", + "saving": f", Saving ${saving:.1f} in GPT-4.", + "actions": actions, + "old_log_probs": old_log_probs, + "old_logits": old_logits, + } + if return_word_label: + words = [] + labels = [] + j = 0 + for i in range(len(context)): + if context_label[i]: + words.extend(word_list[j]) + labels.extend(word_label_list[j]) + j += 1 + else: + words.extend(context_words[i]) + labels.extend([0] * len(context_words[i])) + word_label_lines = word_sep.join( + [f"{word}{label_sep}{label}" for word, label in zip(words, labels)] + ) + res["fn_labeled_original_prompt"] = word_label_lines + return res + + if target_token > 0: + rate = min(target_token / n_original_token, 1.0) + + org_context = copy.deepcopy(context_chunked) + + if use_token_level_filter: + compressed_context, word_list, word_label_list, actions, old_log_probs, old_logits, compressed_prompt_list_2, entropy = self.__compress( + context_chunked, + reduce_rate=max(0, 1 - rate), + token_to_word=token_to_word, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + drop_consecutive=drop_consecutive, + ) + else: + compressed_context, word_list, word_label_list, actions, old_log_probs, old_logits, compressed_prompt_list_2, entropy = self.__compress( + context_chunked, + reduce_rate=0, + token_to_word=token_to_word, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + drop_consecutive=drop_consecutive, + ) + + n_compressed_token = 0 + compression_ratios=[] + for c,o in zip(compressed_prompt_list_2, org_context): + org_token_length = self.get_token_length(o[0], use_oai_tokenizer=True) + compressed_token_length = self.get_token_length(c, use_oai_tokenizer=True) + fin_dict = {"original":org_token_length, "compressed":compressed_token_length, "original_context":o[0], "compressed_context":c} + compression_ratios.append(fin_dict) + for c in compressed_context: + n_compressed_token += self.get_token_length(c, use_oai_tokenizer=True) + saving = (n_original_token - n_compressed_token) * 0.06 / 1000 + ratio = 1 if n_compressed_token == 0 else n_original_token / n_compressed_token + res = { + "compressed_prompt": "\n\n".join(compressed_context), + "compressed_prompt_list": compressed_context, + "compression_ratios":compression_ratios, + "compressed_prompt_list_2":compressed_prompt_list_2, + "entropy": entropy, + "origin_tokens": n_original_token, + "compressed_tokens": n_compressed_token, + "ratio": f"{ratio:.1f}x", + "rate": f"{1 / ratio * 100:.1f}%", + "saving": f", Saving ${saving:.1f} in GPT-4.", + "actions": actions, + "old_log_probs": old_log_probs, + "old_logits": old_logits, + } + if return_word_label: + words = [] + labels = [] + for w_list, l_list in zip(word_list, word_label_list): + words.extend(w_list) + labels.extend(l_list) + + word_label_lines = word_sep.join( + [f"{word}{label_sep}{label}" for word, label in zip(words, labels)] + ) + res["fn_labeled_original_prompt"] = word_label_lines + return res + + def __chunk_context(self, origin_text, chunk_end_tokens): + # leave 2 token for CLS and SEP + max_len = self.max_seq_len - 2 + origin_list = [] + origin_tokens = self.tokenizer.tokenize(origin_text) + n = len(origin_tokens) + st = 0 + while st < n: + if st + max_len > n - 1: + chunk = self.tokenizer.convert_tokens_to_string(origin_tokens[st:n]) + origin_list.append(chunk) + break + else: + ed = st + max_len + for j in range(0, ed - st): + if origin_tokens[ed - j] in chunk_end_tokens: + ed = ed - j + break + chunk = self.tokenizer.convert_tokens_to_string( + origin_tokens[st : ed + 1] + ) + origin_list.append(chunk) + st = ed + 1 + return origin_list + + def __merge_token_to_word_for_actions( + self, tokens, token_actions, force_tokens, token_map, force_reserve_digit + ): + """ + Merge tokens to words while tracking actions for RL training. + """ + words = [] + word_actions = [] + word_actions_no_force = [] + + for token, action in zip(tokens, token_actions): + if token in self.special_tokens: + continue + # add a new word + elif is_begin_of_new_word(token, self.model_name, force_tokens, token_map): + pure_token = get_pure_token(token, self.model_name) + action_no_force = action + if pure_token in force_tokens or pure_token in set(token_map.values()): + action = 1 + token = replace_added_token(token, token_map) + words.append(token) + word_actions.append( + [ + 1 + if force_reserve_digit and bool(re.search(r"\d", token)) + else action + ] + ) + word_actions_no_force.append([action_no_force]) + # concatenate with previous token + else: + pure_token = get_pure_token(token, self.model_name) + words[-1] += pure_token + word_actions[-1].append( + 1 + if force_reserve_digit and bool(re.search(r"\d", token)) + else action + ) + word_actions_no_force[-1].append(action) + + return words, word_actions, word_actions_no_force + + def __token_action_to_word_action(self, token_actions, convert_mode="all"): + """ + Convert token actions to word actions for RL training. + """ + if convert_mode == "all": + word_action = [all(p) for p in token_actions] + elif convert_mode == "any": + word_action = [any(p) for p in token_actions] + else: + raise NotImplementedError() + + return word_action + + def __compress( + self, + context_list: list, + reduce_rate: float = 0.5, + token_to_word: str = "mean", + force_tokens: List[str] = [], + token_map: dict = {}, + force_reserve_digit: bool = False, + drop_consecutive: bool = False, + ): + """ + Override the compression method to track RL-specific information. + """ + def split_string_to_words(input_string): + pattern = r'\b\w+\b|[<>=/!@#$%^&*()?":{}|\\`~;_+-]' + result = re.findall(pattern, input_string) + return result + + if reduce_rate <= 0: + words, word_labels = [], [] + for i in range(len(context_list)): + chunk_list = context_list[i] + chunk_words = [] + chunk_word_labels = [] + for j in range(len(chunk_list)): + # replace to original token + for ori_token, new_token in token_map.items(): + chunk_list[j] = chunk_list[j].replace(new_token, ori_token) + ws = split_string_to_words(chunk_list[j]) + chunk_words.extend(ws) + chunk_word_labels.extend([1 for _ in range(len(ws))]) + context_list[i] = "".join(chunk_list) + words.append(chunk_words) + word_labels.append(chunk_word_labels) + return context_list, words, word_labels + + chunk_list = [] + for chunks in context_list: + for c in chunks: + chunk_list.append(c) + + dataset = TokenClfDataset( + chunk_list, tokenizer=self.tokenizer, max_len=self.max_seq_len + ) + dataloader = DataLoader( + dataset, batch_size=self.max_batch_size, shuffle=False, drop_last=False + ) + + compressed_chunk_list = [] + word_list = [] + word_label_list = [] + actions, old_log_probs, old_logits, entropy = [], [], None, [] + compressed_prompt_list_2 = [] + + for batch in dataloader: + ids = batch["ids"].type(torch.long).to(self.model.device) + mask = batch["mask"].type(torch.long).to(self.model.device) == 1 + + outputs = self.model(input_ids=ids, attention_mask=mask) + + loss, logits = outputs.loss, outputs.logits + probs = F.softmax(logits, dim=-1) + + old_probs = probs + for logits_, old_probs_ in zip(logits, old_probs): + dist = Categorical(probs=old_probs_) + action = dist.sample() + log_prob = dist.log_prob(action) + actions.append(action) + old_log_probs.append(log_prob) + entropy.append(dist.entropy()) + + # Create compressed prompt list from actions + for j in range(ids.shape[0]): + chunk_actions = actions[j] + chunk_ids = ids[j] + chunk_mask = mask[j] + active_actions = torch.masked_select(chunk_actions, chunk_mask) + active_ids = torch.masked_select(chunk_ids, chunk_mask) + + tokens = self.tokenizer.convert_ids_to_tokens( + active_ids.squeeze().tolist() + ) + token_actions = [action for action in active_actions.cpu().detach().numpy()] + + words, valid_token_actions, _ = self.__merge_token_to_word_for_actions( + tokens=tokens, + token_actions=token_actions, + force_tokens=force_tokens, + token_map=token_map, + force_reserve_digit=force_reserve_digit, + ) + word_actions = self.__token_action_to_word_action( + valid_token_actions, convert_mode="all" + ) + + if drop_consecutive: + is_token_between = False + prev = None + for i, (word, word_action) in enumerate(zip(words, word_actions)): + if word in force_tokens: + if is_token_between: + is_token_between = False + elif not is_token_between and word == prev: + word_actions[i] = 0 + prev = word + else: + is_token_between |= word_action + + keep_words = [] + for word, word_action in zip(words, word_actions): + if word_action: + keep_words.append(word) + keep_str = self.tokenizer.convert_tokens_to_string(keep_words) + compressed_prompt_list_2.append(keep_str) + + compressed_context_list = [] + original_word_list = [] + original_word_label_list = [] + prev_idx = 0 + for chunk_list in context_list: + n_chunk = len(chunk_list) + compressed_context_list.append( + "".join(compressed_prompt_list_2[prev_idx : prev_idx + n_chunk]) + ) + original_word_list.append([]) + original_word_label_list.append([]) + prev_idx = prev_idx + n_chunk + + return compressed_context_list, original_word_list, original_word_label_list, actions, old_log_probs, old_logits, compressed_prompt_list_2, entropy + + def get_rl_info(self): + """ + Get reinforcement learning information for training. + + Returns: + dict: Dictionary containing RL-specific information including actions, log probabilities, and entropy. + """ + return { + "actions": self.actions, + "old_log_probs": self.old_log_probs, + "old_logits": self.old_logits, + "entropy": self.entropy, + "compressed_prompt_list_2": self.compressed_prompt_list_2, + "compression_ratios": self.compression_ratios + } + + def clear_rl_info(self): + """ + Clear stored reinforcement learning information. + """ + self.actions = [] + self.old_log_probs = [] + self.old_logits = None + self.entropy = [] + self.compressed_prompt_list_2 = [] + self.compression_ratios = [] + \ No newline at end of file From 76b4b13704012b45dbf6fc3adb340055d9faa570 Mon Sep 17 00:00:00 2001 From: Shivam Shandilya Date: Tue, 1 Jul 2025 19:51:14 +0000 Subject: [PATCH 2/2] added dependencies for TACO-RL --- experiments/taco-rl/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/experiments/taco-rl/README.md b/experiments/taco-rl/README.md index ca171f6..6266ef5 100644 --- a/experiments/taco-rl/README.md +++ b/experiments/taco-rl/README.md @@ -15,6 +15,22 @@ experiments/taco-rl/ └── logs/ # Training logs (created during training) ``` +## Requirements + +### Core Dependencies +- llmlingua + +### Additional Dependencies +Install the following packages for TACO-RL experiments: + +```bash +pip install openai evaluate csv_logger hydra-core rouge_score +``` + +### API Access +- OpenAI API access (for GPT-3.5 teacher model) +- Azure OpenAI API access (if using Azure services) + ## Quick Start ### 1. Prepare Your Data