From 74ab1678ffc7d6b4d5612bab3bc66f6e15ef6503 Mon Sep 17 00:00:00 2001 From: Vaibhav Singh <45447817+itsvaibhav01@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:13:11 +0900 Subject: [PATCH 1/2] Corrected typo in converser task prompt Corrected the typo of word `model` which was mistakenly written as `moodel`. Assuming this prompt holds a lot of weight for defence, so correcting typo can make some difference. --- conversers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conversers.py b/conversers.py index f7c76b0..a2c14b1 100644 --- a/conversers.py +++ b/conversers.py @@ -105,7 +105,7 @@ def get_defense_prompt(self, conv, prompt): full_prompts = [] task_prompt = f'''Please optimize the defense prompt based on the response of vision-language moodel to a jailbreak instruction from scenario {self.scenario}. \n The safety rules of scenario {self.scenario} is {self.rule} - You should optimize defense prompt based on the specific rules to improve the vision-language model's safety. \n The response of vision-language moodel is: "{prompt}" ''' + You should optimize defense prompt based on the specific rules to improve the vision-language model's safety. \n The response of vision-language model is: "{prompt}" ''' conv.append_message(conv.roles[0], task_prompt) @@ -411,4 +411,4 @@ def get_model_path_and_template(model_name): - \ No newline at end of file + From cf52b7f0bb270cc2c1b5ee6fb99517de16f5ec93 Mon Sep 17 00:00:00 2001 From: Vaibhav Singh <45447817+itsvaibhav01@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:15:23 +0900 Subject: [PATCH 2/2] Update conversers.py --- conversers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conversers.py b/conversers.py index a2c14b1..93a7736 100644 --- a/conversers.py +++ b/conversers.py @@ -103,7 +103,7 @@ def get_defense_prompt(self, conv, prompt): init_message = """{\"improvement\": \"""" full_prompts = [] - task_prompt = f'''Please optimize the defense prompt based on the response of vision-language moodel to a jailbreak instruction from scenario {self.scenario}. \n + task_prompt = f'''Please optimize the defense prompt based on the response of vision-language model to a jailbreak instruction from scenario {self.scenario}. \n The safety rules of scenario {self.scenario} is {self.rule} You should optimize defense prompt based on the specific rules to improve the vision-language model's safety. \n The response of vision-language model is: "{prompt}" '''