diff --git a/data/persona_prompt_template.txt b/data/persona_prompt_template.txt index 2851d30b..ba07b84e 100644 --- a/data/persona_prompt_template.txt +++ b/data/persona_prompt_template.txt @@ -25,7 +25,10 @@ Below is the context: """ - If the chatbot is repetitive point that out to the chatbot and/or show frustration # Other instructions -- You start the conversation +- You start the conversation +- When asked to provide multiple responses, generate 5 diverse possible responses with probability scores (0.0-1.0) representing how likely each response is based on the persona +- Sample responses from different parts of the probability distribution, including less likely but contextually appropriate options +- Each response should vary in tone, directness, or emotional expression while staying in character - Don't repeat the same sentences, make the same point multiple times or rephrase the same question - Don't repeat or rephrase your sample prompt over and over - Don't ask for scripts including but not limited to email templates diff --git a/generate.py b/generate.py index 94aa94df..61a97145 100644 --- a/generate.py +++ b/generate.py @@ -23,6 +23,7 @@ async def main( run_id: Optional[str] = None, max_concurrent: Optional[int] = None, max_total_words: Optional[int] = None, + multiple_responses: bool = False, ) -> List[Dict[str, Any]]: """ Generate conversations and return results. @@ -37,9 +38,13 @@ async def main( runs_per_prompt: Number of runs per prompt persona_names: List of persona names to use. If None, uses all personas. verbose: Whether to print status messages - folder_name: Custom folder name for saving conversations. If None, uses default format. + folder_name: Custom folder name for saving conversations. + If None, uses default format. max_total_words: Optional maximum total words across all responses - max_concurrent: Maximum number of concurrent conversations. If None, runs all conversations concurrently. + max_concurrent: Maximum number of concurrent conversations. + If None, runs all conversations concurrently. + multiple_responses: If True, generate multiple responses with scores + and select highest-scored one Returns: List of conversation results @@ -67,6 +72,7 @@ async def main( print(f" - Run ID: {run_id}") print(f" - Max concurrent: {max_concurrent}") print(f" - Max total words: {max_total_words}") + print(f" - Multiple responses: {multiple_responses}") # Generate default folder name if not provided if folder_name is None: @@ -97,6 +103,7 @@ async def main( run_id=run_id, max_concurrent=max_concurrent, max_total_words=max_total_words, + multiple_responses=multiple_responses, ) # Run conversations @@ -187,6 +194,17 @@ async def main( type=int, ) + parser.add_argument( + "--multiple-responses", + "-m", + help=( + "Enable multiple response generation with scoring. " + "Generates 5 diverse responses and selects the highest-scored one." + ), + action="store_true", + default=False, + ) + args = parser.parse_args() persona_model_config = { @@ -222,5 +240,6 @@ async def main( folder_name=args.folder_name, max_concurrent=args.max_concurrent, max_total_words=args.max_total_words, + multiple_responses=args.multiple_responses, ) ) diff --git a/generate_conversations/conversation_simulator.py b/generate_conversations/conversation_simulator.py index 1113d435..3a70edb8 100644 --- a/generate_conversations/conversation_simulator.py +++ b/generate_conversations/conversation_simulator.py @@ -1,9 +1,29 @@ -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple + +from pydantic import BaseModel from llm_clients import LLMInterface from utils.conversation_utils import save_conversation_to_file +class ScoredResponse(BaseModel): + """A single response with its probability score.""" + + text: str + probability: float + + +class ResponseWithScores(BaseModel): + """Model for multiple responses with confidence scores. + + Note: Uses nested Pydantic model instead of List[Tuple[str, float]] + because OpenAI's structured output API doesn't support tuple types in + JSON schema. Tuples must be converted to objects with named fields. + """ + + responses: List[ScoredResponse] + + class ConversationSimulator: """Simulates a conversation between two LLM instances.""" @@ -15,16 +35,6 @@ def __init__(self, persona: LLMInterface, agent: LLMInterface): # Define termination signals that indicate persona wants to end the conversation self.termination_signals: Set[str] = set() - # "goodbye", "bye", "farewell", "talk to you later", - # "ttyl", - # "end conversation", "conversation over", "that's all", - # "nothing more to discuss", - # "i'm done", "let's end here", - # "conversation complete", "wrapping up", "final thoughts", - # "concluding", "to conclude", - # "in conclusion" - # } - def _should_terminate_conversation( self, response: str, speaker: LLMInterface ) -> bool: @@ -44,13 +54,7 @@ def _should_terminate_conversation( return True # Check for common ending patterns - ending_patterns = [ - # "it was nice", - # "pleasure talking", - # "great conversation", - # "good chat", - # "until next time" - ] + ending_patterns = [] for pattern in ending_patterns: if pattern in response_lower: @@ -63,6 +67,7 @@ async def start_conversation( max_turns: int, initial_message: Optional[str] = None, max_total_words: Optional[int] = None, + multiple_responses: bool = False, ) -> List[Dict[str, Any]]: """ Start a conversation between the two LLMs with early stopping support. @@ -72,7 +77,8 @@ async def start_conversation( initial_message: Optional initial message (for the first speaker) to start the conversation. By default, first speaker is persona. max_total_words: Optional maximum total words across all responses - + multiple_responses: If True, generate multiple responses with scores + and select the highest-scored one. Requires JudgeLLM support. Returns: List of conversation turns with speaker and message @@ -90,20 +96,63 @@ async def start_conversation( # Record start time for this turn # Generate response - response = await current_speaker.generate_response(current_message) + response: str + score: Optional[float] + all_responses: Optional[List[Tuple[str, float]]] + if multiple_responses and hasattr( + current_speaker, "generate_structured_response" + ): + # Generate multiple responses with scores + # Add instruction to generate multiple responses + multi_response_message = ( + f"{current_message}\n\n" + "Please provide 5 diverse possible responses as a persona would, " + "each with a probability score (0.0-1.0) indicating how likely " + "that response is based on the persona's characteristics." + ) + structured_response = ( + await current_speaker.generate_structured_response( + multi_response_message, ResponseWithScores + ) + ) + + # Select the response with the highest score + best_response = max( + structured_response.responses, key=lambda x: x.probability + ) + response = best_response.text + score = best_response.probability + # Store all responses in metadata for transparency + all_responses = [ + (r.text, r.probability) for r in structured_response.responses + ] + else: + # Generate single response (default behavior) + # Note: Despite interface definition, implementations return str + response = await current_speaker.generate_response(current_message) # type: ignore[assignment] + score = None + all_responses = None + + # response is mostly a text string total_words += len(response.split()) + # Record this turn - self.conversation_history.append( - { - "turn": turn + 1, - "speaker": current_speaker.get_name(), - "input": current_message or "", - "response": response, - "early_termination": False, - "logging": current_speaker.get_last_response_metadata(), - } - ) + turn_data = { + "turn": turn + 1, + "speaker": current_speaker.get_name(), + "input": current_message or "", + "response": response, + "early_termination": False, + "logging": current_speaker.get_last_response_metadata(), + } + + # Add multiple response metadata if available + if multiple_responses and all_responses is not None: + turn_data["selected_score"] = score + turn_data["all_responses"] = all_responses + + self.conversation_history.append(turn_data) # Check if persona wants to end the conversation if self._should_terminate_conversation(response, current_speaker): diff --git a/generate_conversations/runner.py b/generate_conversations/runner.py index 6d993229..bb99aa71 100644 --- a/generate_conversations/runner.py +++ b/generate_conversations/runner.py @@ -33,6 +33,7 @@ def __init__( folder_name: str = "conversations", max_concurrent: Optional[int] = None, max_total_words: Optional[int] = None, + multiple_responses: bool = False, ): self.persona_model_config = persona_model_config self.agent_model_config = agent_model_config @@ -45,6 +46,7 @@ def __init__( # Default: None - run all conversations concurrently self.max_concurrent = max_concurrent self.max_total_words = max_total_words + self.multiple_responses = multiple_responses self.AGENT_SYSTEM_PROMPT = self.agent_model_config.get( "system_prompt", "You are a helpful AI assistant." @@ -66,8 +68,6 @@ async def run_single_conversation( # Generate filename base using persona name, model, and run number tag = uuid.uuid4().hex[:6] - # TODO: consider removing timestamp - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # TODO: should this be inside the LLM class? model_short = ( model_name.replace("claude-3-", "c3-") @@ -111,6 +111,7 @@ async def run_single_conversation( initial_message=None, max_turns=max_turns, max_total_words=self.max_total_words, + multiple_responses=self.multiple_responses, ) # Log each conversation turn @@ -164,7 +165,9 @@ async def run_conversations( ) -> List[Dict[str, Any]]: """Run multiple conversations concurrently.""" # Load prompts from CSV based on persona names - personas = load_prompts_from_csv(persona_names) + personas = load_prompts_from_csv( + persona_names, multiple_responses=self.multiple_responses + ) # Load agent configuration (fixed, shared across all conversations) agent = LLMFactory.create_llm( @@ -209,7 +212,8 @@ async def run_with_limit(task): return await task print( - f"Running {len(tasks)} conversations with max concurrency: {self.max_concurrent}" + f"Running {len(tasks)} conversations with max concurrency: " + f"{self.max_concurrent}" ) results = await asyncio.gather(*[run_with_limit(task) for task in tasks]) else: diff --git a/generate_conversations/utils.py b/generate_conversations/utils.py index 186d9666..018df5b0 100644 --- a/generate_conversations/utils.py +++ b/generate_conversations/utils.py @@ -11,6 +11,7 @@ def load_prompts_from_csv( name_list: Optional[List[str]] = None, prompt_path="data/personas.tsv", prompt_template_path="data/persona_prompt_template.txt", + multiple_responses: bool = False, ) -> List[dict[str, str]]: """Load prompts from personas.csv file and return them as a list. @@ -18,6 +19,8 @@ def load_prompts_from_csv( name_list: Optional list of names to filter by. If None, returns all prompts. prompt_path: Path to the CSV file containing persona data prompt_template_path: Path to the template file for formatting prompts + multiple_responses: If True, include instructions for generating + multiple responses """ csv_path = Path(prompt_path) @@ -33,6 +36,22 @@ def load_prompts_from_csv( with open(template_path, "r", encoding="utf-8") as template_file: template = template_file.read() + # Remove multiple response instructions if not needed + if not multiple_responses: + lines = template.split("\n") + filtered_lines = [] + skip_next = False + for line in lines: + # Skip the three lines about multiple responses + if "When asked to provide multiple responses" in line: + skip_next = 2 # Skip this line and the next 2 + continue + if skip_next > 0: + skip_next -= 1 + continue + filtered_lines.append(line) + template = "\n".join(filtered_lines) + data = [] with open(csv_path, "r", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t") diff --git a/llm_clients/claude_llm.py b/llm_clients/claude_llm.py index 8f1efac6..82370229 100644 --- a/llm_clients/claude_llm.py +++ b/llm_clients/claude_llm.py @@ -115,7 +115,7 @@ async def generate_response(self, message: Optional[str] = None) -> str: # Store raw metadata self.last_response_metadata["raw_metadata"] = dict(metadata) - return response.content + return response.content # type: ignore[return-value] except Exception as e: # Store error metadata self.last_response_metadata = { diff --git a/llm_clients/config.py b/llm_clients/config.py index a66b1687..5d5196f4 100644 --- a/llm_clients/config.py +++ b/llm_clients/config.py @@ -49,6 +49,16 @@ class Config: "max_tokens": 1000, }, "gemini-pro": {"provider": "google", "temperature": 0.7, "max_tokens": 1000}, + "gemini-3-pro-preview": { + "provider": "google", + "temperature": 0.7, + "max_tokens": 1000, + }, + "gemini-2.5-flash": { + "provider": "google", + "temperature": 0.7, + "max_tokens": 1000, + }, "llama2:7b": { "provider": "ollama", "temperature": 0.7, diff --git a/llm_clients/gemini_llm.py b/llm_clients/gemini_llm.py index ac113cad..982a1f1a 100644 --- a/llm_clients/gemini_llm.py +++ b/llm_clients/gemini_llm.py @@ -124,7 +124,7 @@ async def generate_response(self, message: Optional[str] = None) -> str: # Store raw metadata self.last_response_metadata["raw_metadata"] = dict(metadata) - return response.content + return response.content # type: ignore[return-value] except Exception as e: # Store error metadata self.last_response_metadata = { @@ -141,10 +141,10 @@ def get_last_response_metadata(self) -> Dict[str, Any]: """Get metadata from the last response.""" return self.last_response_metadata.copy() - async def generate_structured_response( + async def _generate_structured_via_json_parsing( self, message: Optional[str], response_model: Type[T] ) -> T: - """Generate a structured response using Pydantic model. + """Fallback method for Gemini 3 that parses JSON from text response. Args: message: The prompt message @@ -153,50 +153,184 @@ async def generate_structured_response( Returns: Instance of the response_model with structured data """ - messages = [] + import json + import re - if self.system_prompt: - messages.append(SystemMessage(content=self.system_prompt)) + # Add JSON formatting instruction to the message + json_message = ( + f"{message}\n\n" + f"IMPORTANT: Respond with ONLY valid JSON matching this schema:\n" + f"{response_model.model_json_schema()}\n\n" + f"Do not include any text before or after the JSON." + ) - messages.append(HumanMessage(content=message)) + # Use normal text generation + text_response = await self.generate_response(json_message) + # Try to extract JSON from the response try: - # Create a structured LLM using with_structured_output - structured_llm = self.llm.with_structured_output(response_model) + # First, try to parse the whole response as JSON + parsed_data = json.loads(text_response) + except json.JSONDecodeError: + # If that fails, try to find JSON in code blocks + json_match = re.search( + r"```(?:json)?\s*(\{.*?\})\s*```", text_response, re.DOTALL + ) + if json_match: + parsed_data = json.loads(json_match.group(1)) + else: + # Try to find any JSON object in the text + json_match = re.search(r"\{.*\}", text_response, re.DOTALL) + if json_match: + parsed_data = json.loads(json_match.group(0)) + else: + raise ValueError( + f"Could not extract valid JSON from Gemini response. " + f"Response: {text_response[:500]}" + ) + + # Convert to Pydantic model + return response_model(**parsed_data) - start_time = time.time() - response = await structured_llm.ainvoke(messages) - end_time = time.time() + async def generate_structured_response( + self, message: Optional[str], response_model: Type[T], max_retries: int = 3 + ) -> T: + """Generate a structured response using Pydantic model. - # Store basic metadata for structured responses - self.last_response_metadata = { - "response_id": None, - "model": self.model_name, - "provider": "gemini", - "timestamp": datetime.now().isoformat(), - "response_time_seconds": round(end_time - start_time, 3), - "usage": {}, - "structured_output": True, - } + Args: + message: The prompt message + response_model: Pydantic model class to structure the response + max_retries: Maximum number of retries for None responses (default: 3) - # Ensure response is the correct type - if not isinstance(response, response_model): - raise ValueError( - f"Response is not an instance of {response_model.__name__}" - ) + Returns: + Instance of the response_model with structured data - return response # type: ignore[return-value] - except Exception as e: - # Store error metadata - self.last_response_metadata = { - "response_id": None, - "model": self.model_name, - "provider": "gemini", - "timestamp": datetime.now().isoformat(), - "error": str(e), - "usage": {}, - } - raise RuntimeError(f"Error generating structured response: {str(e)}") from e + Note: + Gemini 2.x models work reliably with structured output. + Gemini 3.x models have issues with LangChain's structured output + and will fall back to JSON text parsing. + See: https://github.com/langchain-ai/langchain-google/issues/1207 + """ + # Check if this is a Gemini 3.x model + is_gemini_3 = "gemini-3" in self.model_name.lower() + + if is_gemini_3: + # Gemini 3 has issues with structured output, use JSON parsing fallback + return await self._generate_structured_via_json_parsing( + message, response_model + ) + + # Gemini 2.x and earlier use normal structured output path + messages = [] + + if self.system_prompt: + messages.append(SystemMessage(content=self.system_prompt)) + + messages.append(HumanMessage(content=message)) + + import asyncio + + last_error = None + for attempt in range(max_retries): + try: + # Create a structured LLM using with_structured_output + # Note: Keeping function_calling as default since json_schema + # may not be available in all langchain-google-genai versions + structured_llm = self.llm.with_structured_output(response_model) + + start_time = time.time() + response = await structured_llm.ainvoke(messages) + end_time = time.time() + + # Store basic metadata for structured responses + self.last_response_metadata = { + "response_id": None, + "model": self.model_name, + "provider": "gemini", + "timestamp": datetime.now().isoformat(), + "response_time_seconds": round(end_time - start_time, 3), + "usage": {}, + "structured_output": True, + "retry_attempt": attempt + 1, + } + + # Handle None response (MALFORMED_FUNCTION_CALL from Gemini) + if response is None: + error_msg = ( + f"Gemini returned None (attempt {attempt + 1}/{max_retries}). " + f"This is a known issue with Gemini's function calling. " + ) + if attempt < max_retries - 1: + # Wait before retrying (exponential backoff) + wait_time = 2**attempt + print( + f"WARNING: {error_msg}Waiting {wait_time}s before retry..." + ) + await asyncio.sleep(wait_time) + continue + else: + raise ValueError( + f"{error_msg}Max retries exceeded. " + f"Message: {message[:200] if message else 'None'}..." + ) + + # Ensure response is the correct type + # LangChain's Gemini integration may return dict instead of Pydantic + if isinstance(response, dict): + try: + response = response_model(**response) + except Exception as conv_error: + model_name = response_model.__name__ + raise ValueError( + f"Failed to convert dict to {model_name}: " + f"{conv_error}. Response: {response}" + ) from conv_error + elif not isinstance(response, response_model): + model_name = response_model.__name__ + response_type = type(response) + raise ValueError( + f"Response is not an instance of {model_name}, " + f"got {response_type}" + ) + + return response # type: ignore[return-value] + + except ValueError as e: + # If it's a None response error and we have retries left, continue + if "returned None" in str(e) and attempt < max_retries - 1: + last_error = e + continue + # Otherwise, re-raise + raise + except Exception as e: + # For other exceptions, store error and re-raise + self.last_response_metadata = { + "response_id": None, + "model": self.model_name, + "provider": "gemini", + "timestamp": datetime.now().isoformat(), + "error": str(e), + "usage": {}, + "retry_attempt": attempt + 1, + } + raise RuntimeError( + f"Error generating structured response: {str(e)}" + ) from e + + # If we exhausted all retries + self.last_response_metadata = { + "response_id": None, + "model": self.model_name, + "provider": "gemini", + "timestamp": datetime.now().isoformat(), + "error": str(last_error), + "usage": {}, + "retry_attempts": max_retries, + } + raise RuntimeError( + f"Error generating structured response after {max_retries} retries: " + f"{str(last_error)}" + ) from last_error def set_system_prompt(self, system_prompt: str) -> None: """Set or update the system prompt.""" diff --git a/llm_clients/llama_llm.py b/llm_clients/llama_llm.py index 2dd4596c..7510ba3f 100644 --- a/llm_clients/llama_llm.py +++ b/llm_clients/llama_llm.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Any, Dict, Optional from langchain_community.llms import Ollama @@ -40,6 +40,9 @@ def __init__( llm_params.update(kwargs) self.llm = Ollama(**llm_params) + # Store metadata from last response + self.last_response_metadata: Dict[str, Any] = {} + async def generate_response(self, message: Optional[str] = None) -> str: """Generate a response to the given message asynchronously.""" try: @@ -53,10 +56,29 @@ async def generate_response(self, message: Optional[str] = None) -> str: # Ollama doesn't have native async support in langchain-community # So we'll use the synchronous version response = self.llm.invoke(full_message) + + # Store basic metadata + self.last_response_metadata = { + "model": self.model_name, + "provider": "llama", + "usage": {}, + } + return response except Exception as e: + # Store error metadata + self.last_response_metadata = { + "model": self.model_name, + "provider": "llama", + "error": str(e), + "usage": {}, + } return f"Error generating response: {str(e)}" + def get_last_response_metadata(self) -> Dict[str, Any]: + """Get metadata from the last response.""" + return self.last_response_metadata.copy() + def set_system_prompt(self, system_prompt: str) -> None: """Set or update the system prompt.""" self.system_prompt = system_prompt diff --git a/llm_clients/llm_interface.py b/llm_clients/llm_interface.py index 6b360291..89b6f08e 100644 --- a/llm_clients/llm_interface.py +++ b/llm_clients/llm_interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Dict, Optional, Tuple, Type, TypeVar +from typing import Any, Dict, Optional, Type, TypeVar from pydantic import BaseModel @@ -18,13 +18,22 @@ def __init__(self, name: str, system_prompt: Optional[str] = None): self.system_prompt = system_prompt or "" @abstractmethod - async def generate_response( - self, message: Optional[str] = None - ) -> Tuple[str, Dict[str, Any]]: + async def generate_response(self, message: Optional[str] = None) -> str: """Generate a response to the given message asynchronously. Returns: - Tuple of (response_text, metadata_dict) + Response text as a string. Use get_last_response_metadata() to access + metadata from the last response. + """ + pass + + @abstractmethod + def get_last_response_metadata(self) -> Dict[str, Any]: + """Get metadata from the last response. + + Returns: + Dictionary containing metadata such as model name, provider, + timestamp, token usage, and other provider-specific information. """ pass diff --git a/llm_clients/openai_llm.py b/llm_clients/openai_llm.py index 0151f183..cfc88fcc 100644 --- a/llm_clients/openai_llm.py +++ b/llm_clients/openai_llm.py @@ -136,7 +136,7 @@ async def generate_response(self, message: Optional[str] = None) -> str: # Store raw usage_metadata self.last_response_metadata["raw_usage_metadata"] = dict(usage_meta) - return response.content + return response.content # type: ignore[return-value] except Exception as e: # Store error metadata self.last_response_metadata = { diff --git a/scripts/run_combinations.sh b/scripts/run_combinations.sh index 17404a1e..2ef99eed 100755 --- a/scripts/run_combinations.sh +++ b/scripts/run_combinations.sh @@ -5,28 +5,34 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR/.." # Parameters -RUNS=1 +RUNS=5 TURNS=20 MAX_WORDS=4000 users=( - "claude-opus-4-1-20250805" - "gpt-5 -uep max_completion_tokens=5000" - "gemini-3-pro-preview" + # "claude-opus-4-5-20251101" + "claude-sonnet-4-5-20250929" + # "gpt-4o" + # "gpt-5.2-2025-12-11 -uep max_completion_tokens=5000" + # "gemini-3-pro-preview" + # "gemini-2.5-flash" ) providers=( + # "claude-opus-4-5-20251101" + # "claude-sonnet-4-5-20250929" "gpt-4o" - "gpt-5 -pep max_completion_tokens=5000" - "claude-opus-4-1-20250805" - "gemini-3-pro-preview" + # "gpt-5.2-2025-12-11 -pep max_completion_tokens=5000" + # "claude-opus-4-5-20251101" + # "gemini-3-pro-preview" + # "gemini-2.5-flash" ) for user in "${users[@]}"; do for provider in "${providers[@]}"; do echo "Running with user: $user, provider: $provider" - echo "python3 generate.py -u $user -p $provider -r $RUNS -t $TURNS -w $MAX_WORDS" - python3 generate.py -u $user -p $provider -r $RUNS -t $TURNS -w $MAX_WORDS + echo "python3 generate.py -u $user -p $provider -r $RUNS -t $TURNS -m" + python3 generate.py -u $user -p $provider -r $RUNS -t $TURNS done done diff --git a/tests/mocks/mock_llm.py b/tests/mocks/mock_llm.py index 068a8491..d3031db0 100644 --- a/tests/mocks/mock_llm.py +++ b/tests/mocks/mock_llm.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Type, TypeVar, Union +from typing import Any, Dict, List, Optional, Type, TypeVar from llm_clients.llm_interface import JudgeLLM diff --git a/tests/test_question_navigator.py b/tests/test_question_navigator.py index becd1e96..84e41c31 100644 --- a/tests/test_question_navigator.py +++ b/tests/test_question_navigator.py @@ -65,7 +65,8 @@ class TestNotRelevantGoto: def test_not_relevant_parsing(self, navigator): """Test that NOT_RELEVANT>>ID is parsed correctly""" - # Question 13 should have "Denies suicidal thoughts" answer with NOT_RELEVANT>>25 + # Question 13 should have "Denies suicidal thoughts" answer + # with NOT_RELEVANT>>25 q13_data = navigator.get_question_data("13") assert q13_data is not None