From 2ecc10550b1e4757c22c30209159844b2b0a2f6e Mon Sep 17 00:00:00 2001 From: Yaphet Kebede Date: Mon, 15 Dec 2025 10:37:42 -0500 Subject: [PATCH 1/2] out of context issues fix --- src/chains/qvkg_chain.py | 70 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/src/chains/qvkg_chain.py b/src/chains/qvkg_chain.py index 5201520..3575b57 100644 --- a/src/chains/qvkg_chain.py +++ b/src/chains/qvkg_chain.py @@ -25,7 +25,8 @@ def __init__(self, config, default_k=10): self.config = config self.kg_chain = KGChain(config=config) self.qv_chain = QuestionLookupChain(config=config) - self.llm = LLMFactory.get_llm(config=config) + self.raw_llm = LLMFactory.get_raw_llm(config=config) + self.llm = self.raw_llm | LLMFactory.strip_thought self.default_lookup_k = default_k self.langfuse_client = Langfuse(secret_key=config.LANGFUSE_SECRET_KEY, public_key=config.LANGFUSE_PUBLIC_KEY, @@ -112,7 +113,7 @@ def as_generative_chain(self, lookup_parameters=None): "context": x["context"], "chat_history": x["chat_history"], "user_persona": x["user_persona"] - }) | self.COMBINED_ANSWER_PROMPT | self.llm.with_config(name="answer_generation") | StrOutputParser(), + }) | RunnableLambda(self._limit_context_tokens) | self.COMBINED_ANSWER_PROMPT | self.llm.with_config(name="answer_generation") | StrOutputParser(), "extra": RunnableLambda(lambda x: {"knowledge_graph": x["kg_extra"].get("knowledge_graph", {})}), "prompt": RunnableLambda(lambda x: { "input": x["input"], @@ -149,10 +150,75 @@ def _create_answer_generation_prompt(self, prompt_name): MessagesPlaceholder(variable_name="chat_history"), ("user", "{input}") ]) + def _get_raw_from_langfuse(self, prompt_name: str) -> str: """Gets raw string for of prompts in langfuse""" return self.langfuse_client.get_prompt(prompt_name).prompt + def _limit_context_tokens(self, inputs: Dict) -> Dict: + """ + Checks the token count of the prompt constructed from inputs. + If it exceeds the limit, iteratively removes variables from studies, then studies themselves until it fits. + """ + MAX_TOKENS = 16000 # Leave some buffer for response + + # We need to format the messages to count tokens accurately + def get_token_count(current_inputs): + messages = self.COMBINED_ANSWER_PROMPT.format_messages(**current_inputs) + try: + return self.raw_llm.get_num_tokens_from_messages(messages) + except Exception: + # Fallback if the LLM doesn't support token counting + # Rough estimate: 4 chars per token + return sum(len(m.content) for m in messages) / 4 + + if get_token_count(inputs) <= MAX_TOKENS: + return inputs + + # Parse context + try: + context_str = inputs["context"] + # combine_xml_outputs returns ... + root = ET.fromstring(context_str) + + # Loop until we are under the limit + while get_token_count(inputs) > MAX_TOKENS: + studies = root.findall('study') + if not studies: + break + + # Check if we have any variables to remove + # We want to find the study with the most variables + study_with_most_vars = None + max_vars = 0 + + for study in studies: + variables_container = study.find('variables') + if variables_container is not None: + vars_in_study = variables_container.findall('variable') + if len(vars_in_study) > max_vars: + max_vars = len(vars_in_study) + study_with_most_vars = study + + # If we found a study with variables (and count > 0), remove one + if study_with_most_vars is not None and max_vars > 0: + variables_container = study_with_most_vars.find('variables') + vars_in_study = variables_container.findall('variable') + # Remove the last variable + variables_container.remove(vars_in_study[-1]) + else: + # No variables left in any study, remove the last study + root.remove(studies[-1]) + + # Reconstruct context + new_context = ET.tostring(root, encoding='unicode') + inputs["context"] = new_context + + except ET.ParseError: + # If we can't parse, we can't intelligently reduce. + pass + + return inputs if __name__ == "__main__": From 37fe5acd8634d4b7fab16e0997bc9bdb96dfd18d Mon Sep 17 00:00:00 2001 From: Yaphet Kebede Date: Mon, 15 Dec 2025 11:00:30 -0500 Subject: [PATCH 2/2] pin qdrant --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a36d729..b526954 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ langchain-community==0.3.18 httpx==0.27.2 fastapi==0.115.12 requests -qdrant-client +qdrant-client==1.14.2 langserve[all] pydantic langchainhub