diff --git a/DOCUMENT.md b/DOCUMENT.md index 1ff0b7b..e0a7c5a 100644 --- a/DOCUMENT.md +++ b/DOCUMENT.md @@ -278,7 +278,7 @@ llm_lingua = PromptCompressor( - **model_name** (str): Name of the small language model from Huggingface, use "microsoft/llmlingua-2-xlm-roberta-large-meetingbank" or "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank" for LLMLingua-2. Defaults to "NousResearch/Llama-2-7b-hf". - **device_map** (str): The computing environment. Options include 'cuda', 'cpu', 'mps', 'balanced', 'balanced_low_0', 'auto'. Default is 'cuda'. -- **model_config** (dict, optional): Configuration for the Huggingface model. Defaults to {}. +- **model_config** (dict, optional): Configuration for the Huggingface model. Defaults to {}. Supports `trust_remote_code` (defaults to `False` for security; see [Security Considerations](#security-considerations)). - **open_api_config** (dict, optional): Configuration for OpenAI Embedding in coarse-level prompt compression. Defaults to {}. - **use_llmlingua2** (bool, optional): Whether to use llmlingua-2 for prompt compression. Defaults is False. @@ -417,3 +417,28 @@ recovered_response = llm_lingua.recover( #### Response - **recovered_response** (str): The recovered response, integrating the original prompt's context. + +## Security Considerations + +### `trust_remote_code` + +By default, LLMLingua sets `trust_remote_code=False` when loading models from the Hugging Face Hub. This prevents the automatic execution of arbitrary Python code shipped within a model repository, which could be exploited in a supply-chain attack. + +If you are using a model that requires custom code (e.g., certain Jina embedding models), you can explicitly opt in by passing `trust_remote_code=True` in `model_config`: + +```python +llm_lingua = PromptCompressor( + model_name="your-model-name", + model_config={"trust_remote_code": True}, +) +``` + +> **⚠️ Warning:** Only enable `trust_remote_code` for models you trust. A compromised or malicious model repository could execute arbitrary code on your machine when this option is enabled. + +### `torch.load` and `weights_only` + +The experiment scripts under `experiments/llmlingua2/` use `torch.load` with `weights_only=True` to prevent arbitrary code execution via Python pickle deserialization. If you are loading your own `.pt` files in custom training or data pipelines, ensure you also use `weights_only=True` unless you fully trust the source of the file: + +```python +data = torch.load(path, weights_only=True) +``` diff --git a/README.md b/README.md index 1ca17bc..f5009ef 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,8 @@ llm_lingua = PromptCompressor("microsoft/phi-2") llm_lingua = PromptCompressor("TheBloke/Llama-2-7b-Chat-GPTQ", model_config={"revision": "main"}) ``` +> **🔒 Security Note:** `trust_remote_code` now defaults to `False`. If your model requires custom remote code, opt in explicitly via `model_config={"trust_remote_code": True}`. See [Security Considerations](./DOCUMENT.md#security-considerations) for details. + To try **LongLLMLingua** in your scenarios, you can use ```python diff --git a/experiments/llmlingua2/data_collection/filter.py b/experiments/llmlingua2/data_collection/filter.py index 5b31417..e0cad0b 100644 --- a/experiments/llmlingua2/data_collection/filter.py +++ b/experiments/llmlingua2/data_collection/filter.py @@ -20,7 +20,7 @@ ) args = parser.parse_args() -res_pt = torch.load(args.load_path) +res_pt = torch.load(args.load_path, weights_only=True) ## filtering variation_rate_list = res_pt["variation_rate"] diff --git a/experiments/llmlingua2/model_training/train_roberta.py b/experiments/llmlingua2/model_training/train_roberta.py index a5ae084..4591a80 100644 --- a/experiments/llmlingua2/model_training/train_roberta.py +++ b/experiments/llmlingua2/model_training/train_roberta.py @@ -165,7 +165,7 @@ def test(model, eval_dataloader): device = "cuda" if cuda.is_available() else "cpu" -data = torch.load(args.data_path) +data = torch.load(args.data_path, weights_only=True) tokenizer = AutoTokenizer.from_pretrained(args.model_name) model = AutoModelForTokenClassification.from_pretrained( diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py index 84e390e..6751a3b 100644 --- a/llmlingua/prompt_compressor.py +++ b/llmlingua/prompt_compressor.py @@ -118,7 +118,7 @@ def init_llmlingua2( def load_model( self, model_name: str, device_map: str = "cuda", model_config: dict = {} ): - trust_remote_code = model_config.get("trust_remote_code", True) + trust_remote_code = model_config.get("trust_remote_code", False) if "trust_remote_code" not in model_config: model_config["trust_remote_code"] = trust_remote_code config = AutoConfig.from_pretrained(model_name, **model_config) @@ -1987,7 +1987,7 @@ def cos_sim(a, b): if self.retrieval_model is None or self.retrieval_model_name != rank_method: model = ( AutoModel.from_pretrained( - "jinaai/jina-embeddings-v2-base-en", trust_remote_code=True + "jinaai/jina-embeddings-v2-base-en", trust_remote_code=False ) .eval() .to(self.device)