Merge pull request #241 from microsoft/copilot/hot-bug-fix for security concern.

QianhuiWu · web-flow · commit 88bcf033e195 · 2026-04-08T11:59:13.000-07:00
fix: address supply-chain RCE via trust_remote_code and torch.load
diff --git a/DOCUMENT.md b/DOCUMENT.md
@@ -278,7 +278,7 @@ llm_lingua = PromptCompressor(
 
 - **model_name** (str): Name of the small language model from Huggingface, use "microsoft/llmlingua-2-xlm-roberta-large-meetingbank" or "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank" for LLMLingua-2. Defaults to "NousResearch/Llama-2-7b-hf".
 - **device_map** (str): The computing environment. Options include 'cuda', 'cpu', 'mps', 'balanced', 'balanced_low_0', 'auto'. Default is 'cuda'.
-- **model_config** (dict, optional): Configuration for the Huggingface model. Defaults to {}.
+- **model_config** (dict, optional): Configuration for the Huggingface model. Defaults to {}. Supports `trust_remote_code` (defaults to `False` for security; see [Security Considerations](#security-considerations)).
 - **open_api_config** (dict, optional): Configuration for OpenAI Embedding in coarse-level prompt compression. Defaults to {}.
 - **use_llmlingua2** (bool, optional): Whether to use llmlingua-2 for prompt compression. Defaults is False.
 
@@ -417,3 +417,28 @@ recovered_response = llm_lingua.recover(
 #### Response
 
 - **recovered_response** (str): The recovered response, integrating the original prompt's context.
+
+## Security Considerations
+
+### `trust_remote_code`
+
+By default, LLMLingua sets `trust_remote_code=False` when loading models from the Hugging Face Hub. This prevents the automatic execution of arbitrary Python code shipped within a model repository, which could be exploited in a supply-chain attack.
+
+If you are using a model that requires custom code (e.g., certain Jina embedding models), you can explicitly opt in by passing `trust_remote_code=True` in `model_config`:
+
+```python
+llm_lingua = PromptCompressor(
+    model_name="your-model-name",
+    model_config={"trust_remote_code": True},
+)
+```
+
+> **⚠️ Warning:** Only enable `trust_remote_code` for models you trust. A compromised or malicious model repository could execute arbitrary code on your machine when this option is enabled.
+
+### `torch.load` and `weights_only`
+
+The experiment scripts under `experiments/llmlingua2/` use `torch.load` with `weights_only=True` to prevent arbitrary code execution via Python pickle deserialization. If you are loading your own `.pt` files in custom training or data pipelines, ensure you also use `weights_only=True` unless you fully trust the source of the file:
+
+```python
+data = torch.load(path, weights_only=True)
+```
diff --git a/README.md b/README.md
@@ -182,6 +182,8 @@ llm_lingua = PromptCompressor("microsoft/phi-2")
 llm_lingua = PromptCompressor("TheBloke/Llama-2-7b-Chat-GPTQ", model_config={"revision": "main"})
 ```
 
+> **🔒 Security Note:** `trust_remote_code` now defaults to `False`. If your model requires custom remote code, opt in explicitly via `model_config={"trust_remote_code": True}`. See [Security Considerations](./DOCUMENT.md#security-considerations) for details.
+
 To try **LongLLMLingua** in your scenarios, you can use
 
 ```python
diff --git a/experiments/llmlingua2/data_collection/filter.py b/experiments/llmlingua2/data_collection/filter.py
@@ -20,7 +20,7 @@
 )
 args = parser.parse_args()
 
-res_pt = torch.load(args.load_path)
+res_pt = torch.load(args.load_path, weights_only=True)
 
 ## filtering
 variation_rate_list = res_pt["variation_rate"]
diff --git a/experiments/llmlingua2/model_training/train_roberta.py b/experiments/llmlingua2/model_training/train_roberta.py
@@ -165,7 +165,7 @@ def test(model, eval_dataloader):
 
 
 device = "cuda" if cuda.is_available() else "cpu"
-data = torch.load(args.data_path)
+data = torch.load(args.data_path, weights_only=True)
 
 tokenizer = AutoTokenizer.from_pretrained(args.model_name)
 model = AutoModelForTokenClassification.from_pretrained(
diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py
@@ -118,7 +118,7 @@ def init_llmlingua2(
     def load_model(
         self, model_name: str, device_map: str = "cuda", model_config: dict = {}
     ):
-        trust_remote_code = model_config.get("trust_remote_code", True)
+        trust_remote_code = model_config.get("trust_remote_code", False)
         if "trust_remote_code" not in model_config:
             model_config["trust_remote_code"] = trust_remote_code
         config = AutoConfig.from_pretrained(model_name, **model_config)
@@ -1987,7 +1987,7 @@ def cos_sim(a, b):
             if self.retrieval_model is None or self.retrieval_model_name != rank_method:
                 model = (
                     AutoModel.from_pretrained(
-                        "jinaai/jina-embeddings-v2-base-en", trust_remote_code=True
+                        "jinaai/jina-embeddings-v2-base-en", trust_remote_code=False
                     )
                     .eval()
                     .to(self.device)

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`)`
`21`	`21`	`args = parser.parse_args()`
`22`	`22`
`23`		`-res_pt = torch.load(args.load_path)`
	`23`	`+res_pt = torch.load(args.load_path, weights_only=True)`
`24`	`24`
`25`	`25`	`## filtering`
`26`	`26`	`variation_rate_list = res_pt["variation_rate"]`