From 1e8adfa060681c8ba3123eb219c23615ba034bea Mon Sep 17 00:00:00 2001 From: Abdullah Dawud-Sulaiman Date: Wed, 3 Dec 2025 11:01:39 -0500 Subject: [PATCH 1/2] Add VQA module (MicroTraitLLM_VQA folder) --- MicroTraitLLM_VQA | 1 + 1 file changed, 1 insertion(+) create mode 160000 MicroTraitLLM_VQA diff --git a/MicroTraitLLM_VQA b/MicroTraitLLM_VQA new file mode 160000 index 0000000..0478e55 --- /dev/null +++ b/MicroTraitLLM_VQA @@ -0,0 +1 @@ +Subproject commit 0478e55286f7a054ed0be5e0a898011901a923b2 From 2a1358c4d1fecd371133660097759dd20b191890 Mon Sep 17 00:00:00 2001 From: Abdullah Dawud-Sulaiman Date: Wed, 3 Dec 2025 11:08:58 -0500 Subject: [PATCH 2/2] Add VQA module as regular folder --- MicroTraitLLM_VQA | 1 - MicroTraitLLM_VQA/.gitignore | 6 + MicroTraitLLM_VQA/LICENSE | 21 + MicroTraitLLM_VQA/README.md | 3 + MicroTraitLLM_VQA/call_api.py | 26 + MicroTraitLLM_VQA/citations.py | 51 ++ .../compile_supplement_generation.py | 127 ++++ MicroTraitLLM_VQA/figure_ingest.py | 147 ++++ MicroTraitLLM_VQA/id_convert.py | 32 + MicroTraitLLM_VQA/main.py | 127 ++++ MicroTraitLLM_VQA/metadata.py | 36 + MicroTraitLLM_VQA/pmc_text_api.py | 67 ++ MicroTraitLLM_VQA/pool_summary.py | 136 ++++ MicroTraitLLM_VQA/pubmed_central_search.py | 156 ++++ MicroTraitLLM_VQA/read_api_keys.py | 12 + MicroTraitLLM_VQA/requirements.txt | Bin 0 -> 3210 bytes MicroTraitLLM_VQA/summ_supp.py | 165 +++++ MicroTraitLLM_VQA/templates/index.html | 673 ++++++++++++++++++ MicroTraitLLM_VQA/vqa.py | 248 +++++++ 19 files changed, 2033 insertions(+), 1 deletion(-) delete mode 160000 MicroTraitLLM_VQA create mode 100644 MicroTraitLLM_VQA/.gitignore create mode 100644 MicroTraitLLM_VQA/LICENSE create mode 100644 MicroTraitLLM_VQA/README.md create mode 100644 MicroTraitLLM_VQA/call_api.py create mode 100644 MicroTraitLLM_VQA/citations.py create mode 100644 MicroTraitLLM_VQA/compile_supplement_generation.py create mode 100644 MicroTraitLLM_VQA/figure_ingest.py create mode 100644 MicroTraitLLM_VQA/id_convert.py create mode 100644 MicroTraitLLM_VQA/main.py create mode 100644 MicroTraitLLM_VQA/metadata.py create mode 100644 MicroTraitLLM_VQA/pmc_text_api.py create mode 100644 MicroTraitLLM_VQA/pool_summary.py create mode 100644 MicroTraitLLM_VQA/pubmed_central_search.py create mode 100644 MicroTraitLLM_VQA/read_api_keys.py create mode 100644 MicroTraitLLM_VQA/requirements.txt create mode 100644 MicroTraitLLM_VQA/summ_supp.py create mode 100644 MicroTraitLLM_VQA/templates/index.html create mode 100644 MicroTraitLLM_VQA/vqa.py diff --git a/MicroTraitLLM_VQA b/MicroTraitLLM_VQA deleted file mode 160000 index 0478e55..0000000 --- a/MicroTraitLLM_VQA +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0478e55286f7a054ed0be5e0a898011901a923b2 diff --git a/MicroTraitLLM_VQA/.gitignore b/MicroTraitLLM_VQA/.gitignore new file mode 100644 index 0000000..35fa8bd --- /dev/null +++ b/MicroTraitLLM_VQA/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.DS_Store +apikeys.txt diff --git a/MicroTraitLLM_VQA/LICENSE b/MicroTraitLLM_VQA/LICENSE new file mode 100644 index 0000000..664a8ac --- /dev/null +++ b/MicroTraitLLM_VQA/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 grogers772 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MicroTraitLLM_VQA/README.md b/MicroTraitLLM_VQA/README.md new file mode 100644 index 0000000..0a3d587 --- /dev/null +++ b/MicroTraitLLM_VQA/README.md @@ -0,0 +1,3 @@ +Rapid advances in Large Language Models (LLMs) and broad potential applications in biological research make them a compelling point of investigation. Key challenges remain, including the tendency of LLMs to hallucinate if they are not provided with domain-specific information, thereby potentially misinforming users; the small number of LLM applications directed to prokaryotes despite significant advances in microbiome studies, especially when compared to the many human-specific and eukaryotic domain-specific LLM application that have recently been released; and the inability of current microbe-specific LLM tools to provide sufficiently comprehensive, accurate, and timely answers with proper citations. + +We present MicroTraitLLM, a retrieval-augmented generation (RAG) LLM which utilizes zero-shot and single-shot prompting to give specific, citation-based answers for researchers. Its connection to the live-updating PubMed Central Open Access article database allows the tool to remain up-to-date on scientific knowledge. MicroTraitLLM flexibly allows the user to customize their experience by selecting their choice of LLMs. The tool is also able to generate accurate citations in various formats. We present empirical results demonstrating that MicroTraitLLM provides both improvements in relevant literature search and informative responses as judged by microbial experts, while not increasing latency time compared to popular commercial LLMs. diff --git a/MicroTraitLLM_VQA/call_api.py b/MicroTraitLLM_VQA/call_api.py new file mode 100644 index 0000000..0ec189a --- /dev/null +++ b/MicroTraitLLM_VQA/call_api.py @@ -0,0 +1,26 @@ +import requests + +def call_api(url): + # Function to call an API and return the data + # Handles exceptions and returns None if the request fails + try: + # Send a GET request to the URL + response = requests.get(url,timeout=60) + + # Check if request was successful + response.raise_for_status() + + # Return the data directly + return response.text + + except requests.exceptions.RequestException as e: + print(f"Error fetching data from {url}: {e}") + return None + + except ValueError as e: + print(f"Error parsing JSON from {url}: {e}") + return None + + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/citations.py b/MicroTraitLLM_VQA/citations.py new file mode 100644 index 0000000..6f7f3fb --- /dev/null +++ b/MicroTraitLLM_VQA/citations.py @@ -0,0 +1,51 @@ +def APA_citation(info): + # Function to generate an APA citation from the provided info dictionary + # The info dictionary is usually obtained from the function extract_info() contained in pmc_text_api.py + authorlist = "" + for author in info['names']: + surname, given_names = author.split(';') + surname = surname.split(':')[1] + given_names = given_names.split(':')[1] + given_names = given_names[0] + if author == info['names'][-1]: + authorlist = ''.join([authorlist,"& " + surname + ", " + given_names]) + else: + authorlist = ''.join([authorlist, surname + ", " + given_names + "., "]) + + pub_year = info['publication_date'].split(' ')[0] + + reference = f"{authorlist}. ({pub_year}). {info['title']}. {info['journal']}, {info['volume']}({info['issue']}), {info['first_page']}. {info['doi']}" + + return reference + +def MLA_citation(info): + # Function to generate an MLA citation from the provided info dictionary + # The info dictionary is usually obtained from the function extract_info() contained in pmc_text_api.py + main_author = info['names'][0] + surname, given_name = main_author.split(';') + surname = surname.split(':')[1] + given_name = given_name.split(':')[1] + main_author = f"{surname +', ' + given_name}" + pub_date = info['publication_date'].split(' ') + pub_date = f"{pub_date[2]} {pub_date[1]}. {pub_date[0]}" + reference = f'{main_author} et al. "{info["title"]}." {info["journal"]} vol. {info["volume"]}, {info["first_page"]}. {pub_date}, doi:{info["doi"]}' + + return reference + +def NLM_citation(info): + # Function to generate an NLM citation from the provided info dictionary + # The info dictionary is usually obtained from the function extract_info() contained in pmc_text_api.py + authorlist = "" + for author in info['names']: + surname, given_names = author.split(';') + surname = surname.split(':')[1] + given_names = given_names.split(':')[1] + given_names = given_names[0] + authorlist = ''.join([authorlist, surname + " " + given_names + ", "]) + + authorlist = authorlist[:-2] # Remove the last comma and space + reference = f"{authorlist}. {info['title']}. {info['journal']}. {info['publication_date']};{info['volume']}:{info['first_page']}. doi: {info['doi']}. PMID: {info['pmid']}; PMCID: {info['pmcid']}." + return reference + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/compile_supplement_generation.py b/MicroTraitLLM_VQA/compile_supplement_generation.py new file mode 100644 index 0000000..b23542a --- /dev/null +++ b/MicroTraitLLM_VQA/compile_supplement_generation.py @@ -0,0 +1,127 @@ +from openai import OpenAI +import os +from openai import OpenAI +from ollama import chat +import inspect + +def generate_summary(papers,question,model,citation_format,citations,temperature,api_key): + # Function to generate a summary of summaries based on the provided papers and user input + # Definte the context and example for the LLM + Context = f"""You are an expert in microbial metagenomics and microbial traits. You are tasked with answering the question provided by the user. All information required to answer the question will be provided to you in a large text format as a list. Each entry in the list will contain an article summary, a grade for how well the article answers the user question, and the citation for the article in {citation_format} format. \n + Your answer should be a detailed paragraph consisting of 5-10 sentences answering the user question. Additionally, when writing your response, every source you use should be cited in an in-text format within your response in {citation_format} format. You should never have a references section at the end of your response. \n + If you cannot cite the given information in {citation_format} format, please instead list the article title. \n + You should prioritize using the articles with a higher grade over the ones with a lower grade. \n + You must answer the user question to the best of your ability without using any other information besides the information provided to you. \n""" + + EX1 = 'Question: What are some bacterial strains associated with ear infections?\n' + + A1 = """Ear infections, also known as otitis media, can be caused by various bacterial strains. Some common bacterial strains associated with ear infections include Streptococcus pneumoniae, Haemophilus influenzae, and Moraxella catarrhalis (Schilder et al. 2016). These bacteria are often found in the upper respiratory tract and can migrate to the middle ear, leading to infection. Streptococcus pneumoniae is one of the most common bacterial pathogens causing ear infections, particularly in children. Haemophilus influenzae is another significant contributor to ear infections, especially in cases where the pneumococcal vaccine has been effective in reducing Streptococcus pneumoniae infections (Kaur et al 2017). Moraxella catarrhalis is also known to be involved in ear infections, particularly in cases of chronic otitis media. Understanding the bacterial strains associated with ear infections is crucial for appropriate diagnosis and treatment strategies.\n + """ + + all_messages = [ + {"role": "system", "content": Context}, + {"role": "user", "content": f"An example question is {EX1}"}, + {"role": "assistant", "content": f"An example output is: {A1}"}, + {"role": "user", "content": f"The information to answer the given question is: {papers}"}, + {"role": "user", "content": question} + ] + # Check the model type and call the appropriate LLM to create the summary of summaries + if model == "ChatGPT-4o-mini": + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", api_key)) + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=all_messages, + temperature=temperature, + ) + final_response = response.choices[0].message.content + + elif model == "llama-3.3-70b-versatile": + client = OpenAI(base_url = "https://api.groq.com/openai/v1",api_key=os.environ.get("GROQ_API_KEY", api_key)) + response = client.chat.completions.create( + model=model, + messages=all_messages, + temperature=temperature) + final_response = response.choices[0].message.content + else: + response = chat( + model=model, + messages=all_messages, + options = {'temperature': temperature}) + final_response = response.message.content + # Append the citations to the final response + stack = inspect.stack() + call_fun = stack[-2].function + if call_fun == "ask": + final_response = final_response + f"

References:
{citations}" + else: + final_response = final_response + f"\n\nReferences: \n{citations}" + return final_response + +def generate_supplement(final_response,model,temperature,api_key): + # Function to generate a supplement from the final response + # This is used to extract terms from the final response + # Define the context and example for the LLM + context = """Your job is to read the provided paragraph, and note the species, genes, or proteins referenced inside the paragraph. + If they are, you are to list each within its own respective Python list. The following order for the lists should always be used: Taxonomy, then Protein, then Gene. + The genus, species, and subspecies for one organism should constitute one term in the list. + If only the genus is listed, exclude it from the list. The first term in each list must always be the term you are looking for, which in this case is Taxonomy, Gene, or Protein respectively. + If you do not identify any organisms, genes, or proteins, instead return "None" for the category list. Never abbreviate the organism name, gene, or protein name. + If an abbreviated species name is provided, omit it from the output list you are creating. Ignore any cases with a prime symbol in them, such as 'aph(3')-Ia'. + """ + EX3 = """In Escherichia coli (E. coli), several genes are commonly associated with antibiotic resistance, + reflecting the bacterium's ability to evade the effects of various antibiotics. + Notably, the **blaCTX-M**, **blaTEM**, and **blaSHV** genes encode for extended-spectrum + beta-lactamases (ESBLs), which confer resistance to a wide range of beta-lactam antibiotics + (Ahmad, Joji, & Shahid, 2022). Additionally, the **mcr-1** gene is significant for providing + resistance to colistin, a last-resort antibiotic for treating multidrug-resistant infections + (Nasrollahian, Graham, & Halaji, 2024). Other important resistance genes include **aac(3)-Ib-cr**, + which is linked to aminoglycoside resistance, and **qnr** genes that protect against fluoroquinolones + by encoding proteins that shield target enzymes from antibiotic action (Nasrollahian et al., 2024). + Furthermore, the **sul1**, **sul2**, and **sul3** genes are associated with sulfonamide resistance, + while **tetA** and **tetB** are linked to tetracycline resistance (Ribeiro et al., 2023). The presence + of these genes highlights the genetic diversity and complexity of antibiotic resistance mechanisms in + E. coli, emphasizing the need for ongoing surveillance and management strategies to combat this public + health challenge (Silva et al., 2024). + """ + A3 = """[["Taxonomy", "Escherichia coli"],["Gene", "blaCTX-M", "blaTEM", "blaSHV","mcr-1","aac(3)-Ib-cr","qnr","sul1", "sul2","sul3","tetA","tetB"]]""" + # Check the model type and call the appropriate LLM to create the supplement + if model == "ChatGPT-4o-mini": + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", api_key)) + supplement = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": context}, + {"role": "user","content": EX3}, + {"role": "assistant", "content": A3}, + {"role": "user", "content": f"The paragraph is: {final_response}"}, + ], + temperature=temperature, + ) + termlist = supplement.choices[0].message.content + elif model == "llama-3.3-70b-versatile": + client = OpenAI(base_url = "https://api.groq.com/openai/v1",api_key=os.environ.get("GROQ_API_KEY", api_key)) + supplement = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": context}, + {"role": "user","content": EX3}, + {"role": "assistant", "content": A3}, + {"role": "user", "content": f"The paragraph is: {final_response}"}, + ], + temperature=temperature) + termlist = supplement.choices[0].message.content + else: + supplement = chat( + model=model, + messages=[ + {"role": "system", "content": context}, + {"role": "user","content": EX3}, + {"role": "assistant", "content": A3}, + {"role": "user", "content": f"The paragraph is: {final_response}"}, + ], + options = {'temperature': temperature}) + termlist = supplement.message.content + return termlist + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/figure_ingest.py b/MicroTraitLLM_VQA/figure_ingest.py new file mode 100644 index 0000000..7e6d210 --- /dev/null +++ b/MicroTraitLLM_VQA/figure_ingest.py @@ -0,0 +1,147 @@ +import os +import uuid +import json +import base64 +from typing import Dict, Any + +from openai import OpenAI +from read_api_keys import load_api_keys + +# Storage locations +DATA_DIR = "data" +FIGURE_DIR = os.path.join(DATA_DIR, "figures") +FIGURE_INDEX_PATH = os.path.join(DATA_DIR, "figure_index.jsonl") + +# Make sure directories exist +os.makedirs(FIGURE_DIR, exist_ok=True) + +# OpenAI client for vision + embeddings +_api_keys = load_api_keys("apikeys.txt") +_api_key_openai = _api_keys.get("API_KEY_OPENAI") +client = OpenAI(api_key=_api_key_openai) + + +def _extract_metadata_from_image(image_bytes: bytes) -> Dict[str, Any]: + """ + Call a multimodal model to extract: + - caption: concise description of the figure + - ocr_text: any text / labels it can read + - chart_data: table-like structure of key numbers, if present + Returns a dict. + """ + if not _api_key_openai: + raise RuntimeError("API_KEY_OPENAI missing in apikeys.txt") + + encoded = base64.b64encode(image_bytes).decode("utf-8") + data_url = f"data:image/png;base64,{encoded}" + + resp = client.chat.completions.create( + model="gpt-4o-mini", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "You are a vision model analyzing scientific figures. " + "Return a JSON object with fields: " + "`caption` (string), " + "`ocr_text` (string of all readable text), and " + "`chart_data` (an array of rows; each row an object with keys like " + "axis_labels, series, values, units, etc.). " + "If something is missing, use an empty string or empty array." + ), + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Analyze this figure and extract caption, OCR text, and structured chart/table data as JSON.", + }, + {"type": "image_url", "image_url": {"url": data_url}}, + ], + }, + ], + temperature=0.0, + ) + + content = resp.choices[0].message.content + try: + metadata = json.loads(content) + except json.JSONDecodeError: + # Fallback: wrap raw content + metadata = { + "caption": content, + "ocr_text": "", + "chart_data": [], + } + + # Ensure keys exist + metadata.setdefault("caption", "") + metadata.setdefault("ocr_text", "") + metadata.setdefault("chart_data", []) + + return metadata + + +def _embed_text(text: str) -> list: + """ + Get a text embedding for caption + OCR text. + Used for the text index (and as a stand-in for an image embedding for now). + """ + if not _api_key_openai: + raise RuntimeError("API_KEY_OPENAI missing in apikeys.txt") + + resp = client.embeddings.create( + model="text-embedding-3-small", + input=[text], + ) + return resp.data[0].embedding + + +def ingest_figure(image_bytes: bytes, paper_id: str | None = None) -> Dict[str, Any]: + """ + Main ingestion entry point. + + 1) Run vision model to get metadata (caption, ocr_text, chart_data). + 2) Compute embeddings. + 3) Save original image to disk. + 4) Append a record to figure_index.jsonl. + + Returns the stored record, including a generated figure_id. + """ + # Step 1: metadata from vision model + metadata = _extract_metadata_from_image(image_bytes) + + # Step 2: embeddings + text_for_embedding = (metadata.get("caption", "") + "\n" + + metadata.get("ocr_text", "")).strip() + text_embedding = _embed_text(text_for_embedding) if text_for_embedding else [] + + # For now, we'll just reuse text_embedding for image_index as well. + image_embedding = text_embedding + + # Step 3: save original image + figure_id = str(uuid.uuid4()) + image_filename = f"{figure_id}.png" + image_path = os.path.join(FIGURE_DIR, image_filename) + with open(image_path, "wb") as f: + f.write(image_bytes) + + # Step 4: build record + record: Dict[str, Any] = { + "figure_id": figure_id, + "paper_id": paper_id, + "caption": metadata.get("caption", ""), + "ocr_text": metadata.get("ocr_text", ""), + "chart_data": metadata.get("chart_data", []), + "text_embedding": text_embedding, + "image_embedding": image_embedding, + "image_path": image_path, + } + + # Append to JSONL index + with open(FIGURE_INDEX_PATH, "a", encoding="utf-8") as f: + f.write(json.dumps(record) + "\n") + + return record diff --git a/MicroTraitLLM_VQA/id_convert.py b/MicroTraitLLM_VQA/id_convert.py new file mode 100644 index 0000000..637941f --- /dev/null +++ b/MicroTraitLLM_VQA/id_convert.py @@ -0,0 +1,32 @@ +import json +import call_api + +def process_ids(urls): + results = [] + for url in urls: + data = call_api.call_api(url) + data_json = json.loads(data) + # Initialize the variable to store the result + pmid = None + for record in data_json.get('records', []): + if 'pmid' in record: + pmid= record['pmid'] + # append the result + results.append(pmid) + + return results + +def generate_urls(terms): + urls = [] + for term in terms: + # Convert the term to a string + term_str = str(term) + base_url = 'https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pubmed.cgi/BioC_json/{id}/unicode' + # Format the URL with the term + url = base_url.format(id=term_str) + # Append the URL to the list + urls.append(url) + return urls + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/main.py b/MicroTraitLLM_VQA/main.py new file mode 100644 index 0000000..7a5cb9a --- /dev/null +++ b/MicroTraitLLM_VQA/main.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +import pubmed_central_search as pmc +import compile_supplement_generation as csg +from summ_supp import summ_supp +import pool_summary +from flask import Flask, render_template, request, jsonify +from read_api_keys import load_api_keys +from vqa import run_vqa # VQA helper + +app = Flask(__name__) + +# Default settings +settings = { + "num_articles": 8, + "model_type": "llama-3.3-70b-versatile", + "temperature": 0.0, + "citation_format": "APA", +} + + +@app.route("/") +def home(): + return render_template("index.html", settings=settings) + + +# Endpoint to update settings +@app.route("/update_settings", methods=["POST"]) +def update_settings(): + global settings + data = request.get_json() + print("Incoming settings:", data) + if data: + settings.update(data) + print("Updated settings:", settings) + return jsonify({"status": "success", "settings": settings}) + return jsonify({"status": "error", "message": "No data received"}), 400 + + +# Endpoint to get current settings +@app.route("/get_settings", methods=["GET"]) +def get_settings(): + return jsonify(settings) + + +# Endpoint to handle question submission (text + articles flow) +@app.route("/ask", methods=["POST"]) +def ask(): + question = request.json.get("question") + article_number = settings["num_articles"] + model = settings["model_type"] + citation_format = settings["citation_format"] + temperature = settings["temperature"] + + if not question: + return jsonify({"error": "No question provided"}) + + article_number = int(article_number) + temperature = float(temperature) + + # Load API keys + api_keys = load_api_keys("apikeys.txt") + api_key_openai = api_keys.get("API_KEY_OPENAI") + api_key_groq = api_keys.get("API_KEY_GROQ") + + if model == "ChatGPT-4o-mini": + api_key = api_key_openai + elif model == "llama-3.3-70b-versatile": + api_key = api_key_groq + else: + api_key = None + + first_url, search_term = pmc.question_formation( + question, model, temperature, api_key + ) + idlist = pmc.idlist_confirm( + first_url, question, article_number, model, temperature, api_key, search_term + ) + print("confirmed url") + + urls = pmc.url_format(idlist) + + print("generating summaries") + summaries, citations = pool_summary.spawn( + question, urls, model, citation_format, temperature, api_key + ) + print("generating response") + response = csg.generate_summary( + summaries, question, model, citation_format, citations, temperature, api_key + ) + + supplement = csg.generate_supplement(response, model, temperature, api_key) + + tax_resp, prot_resp, gene_resp = summ_supp(supplement) + + return jsonify( + response=response, + taxonomy=tax_resp, + protein=prot_resp, + gene=gene_resp, + ) + + +# VQA endpoint (image + question) +@app.route("/vqa", methods=["POST"]) +def vqa_endpoint(): + # Expecting a multipart/form-data request with: + # - "image": the uploaded file + # - "question": text field + if "image" not in request.files: + return jsonify({"error": "No image uploaded"}), 400 + + image_file = request.files["image"] + question = request.form.get("question", "").strip() + + if not question: + return jsonify({"error": "No question provided"}), 400 + + # Let vqa.run_vqa handle reading bytes + OpenAI→Groq fallback + answer = run_vqa(image_file, question) + return jsonify({"answer": answer}) + + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. + +if __name__ == "__main__": + app.run(debug=True) diff --git a/MicroTraitLLM_VQA/metadata.py b/MicroTraitLLM_VQA/metadata.py new file mode 100644 index 0000000..43972d8 --- /dev/null +++ b/MicroTraitLLM_VQA/metadata.py @@ -0,0 +1,36 @@ +from bs4 import BeautifulSoup +import re +import requests + +def parse_pmc_metadata(url): + + # Set a user-agent + headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} + response = requests.get(url,headers=headers) + if not response.ok: + raise Exception(f"Failed to fetch URL: {url}") + + soup = BeautifulSoup(response.content, "html.parser") + + def get_meta(name): + # Helper function to extract meta tag content + tag = soup.find("meta", attrs={"name": name}) + return tag["content"] + + # Extract relevant metadata + journal = get_meta("citation_journal_title") + publication_date = get_meta("citation_publication_date") + volume = get_meta("citation_volume") + citation_issue = get_meta("citation_issue") + first_page = get_meta("citation_firstpage") + doi = get_meta("citation_doi") + mult_page = soup.find("section", attrs={"class": "pmc-layout__citation font-secondary font-xs"}) + if mult_page.text: + pattern = r"\b\d{1,2}[–-]\d{1,3}\b" + pages = re.findall(pattern,mult_page.text) + + + return journal, publication_date, volume, citation_issue, first_page, pages, doi + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/pmc_text_api.py b/MicroTraitLLM_VQA/pmc_text_api.py new file mode 100644 index 0000000..3df45cb --- /dev/null +++ b/MicroTraitLLM_VQA/pmc_text_api.py @@ -0,0 +1,67 @@ +import metadata +from call_api import call_api +import json + +def find_text(data): + results = [] + + def recursive_search(obj): + # Helper function to recursively search for 'text' fields + if isinstance(obj, dict): + if 'infons' in obj and obj['infons'].get('section_type') in ['REF','TABLE','GRAPH']: + return # Skip if section_type is REF, TABLE, or GRAPH + + for key, value in obj.items(): + if key == 'text': + results.append(value) + else: + recursive_search(value) + elif isinstance(obj, list): + for item in obj: + recursive_search(item) + + recursive_search(data) + return results + +def extract_info(data): + # Extract metadata from the API response + documents = data.get('documents', []) + if not documents: + return None + + document = documents[0] # We only deal with the first document + passages = document.get('passages', []) + if not passages: + return None + + passage = passages[0] # We only deal with the first passage + infons = passage.get('infons', {}) + + title = passage.get('text') + document_id = documents[0]['id'] + int_url = "https://www.ncbi.nlm.nih.gov/pmc/articles/"+document_id+"/" + # Call for metadata + journal, publication_date, volume, issue, first_page, pages, doi = metadata.parse_pmc_metadata(int_url) + + + names = [] + for key in infons: + if key.startswith('name_'): + names.append(infons[key]) + + # Return the extracted information as a dictionary + return { + 'title': title, + 'names': names, + 'journal': journal, + 'publication_date': publication_date, + 'volume': volume, + 'issue': issue, + 'first_page': first_page, + 'pages': pages, + 'doi': doi, + } + + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/pool_summary.py b/MicroTraitLLM_VQA/pool_summary.py new file mode 100644 index 0000000..72205dd --- /dev/null +++ b/MicroTraitLLM_VQA/pool_summary.py @@ -0,0 +1,136 @@ +import os +import json +import call_api +from pmc_text_api import extract_info, find_text +from openai import OpenAI +from ollama import chat +from citations import APA_citation, MLA_citation, NLM_citation +from concurrent.futures import ProcessPoolExecutor, as_completed, TimeoutError +import inspect + +def summary(url, user_question_prompt,model,citation_format,temperature,**kwargs): + # Function to generate a summary and citation for a given article URL using requested model + # Define the system context and examples + Context = f"""You are an expert in researching microbial metagenomics and microbial traits; however, you have no prior knowledge of any information or topic involving microbes. + You are tasked with summarizing a provided article in the context of a given question from the user. All information required to answer the question, as well as the metadata for the article, will be provided to you in a large text format.\n + Your answer must be 5-8 sentences answering the user question with detailed insights based on what you read in the article. Additionally, within your response, you must provide in-text citations from using the metadata provided to you. The in-text citations must be in {citation_format} format. You must never have a references section at the end of your response.\n + You must also provide a grade after your summary. The grade should be on a scale of 0-100, with the grade corresponding to how much the article assisted in answering the user question.\n + You must answer the user question to the best of your ability without using any other information besides the information provided to you.\n""" + + EX1 = 'Question: What are some bacterial strains associated with ear infections?\n' + + A1 = """Ear infections, also known as otitis media, can be caused by various bacterial strains. Some common bacterial strains associated with ear infections include Streptococcus pneumoniae, Haemophilus influenzae, and Moraxella catarrhalis (Schilder et al. 2016). These bacteria are often found in the upper respiratory tract and can migrate to the middle ear, leading to infection. Streptococcus pneumoniae is one of the most common bacterial pathogens causing ear infections, particularly in children. Moraxella catarrhalis is also known to be involved in ear infections, particularly in cases of chronic otitis media. Understanding the bacterial strains associated with ear infections is crucial for appropriate diagnosis and treatment strategies.\n + + **Grade**: 90/100 + """ + # Extract API key if provided + api_key = kwargs.get('api_key', None) + try: + string = call_api.call_api(url) + if string and not string.startswith('[Error] : No result'): + # extract the JSON content from the string and parse it + api_call = json.loads(string[1:-1]) + info = extract_info(api_call) + results = find_text(api_call) + # create citation based on citation format + if citation_format == "APA": + citation = APA_citation(info) + elif citation_format == "MLA": + citation = MLA_citation(info) + elif citation_format == "NLM": + citation = NLM_citation(info) + else: + citation = "" + + # Prepare messages for the chat models + all_messages = [ + {"role": "system", "content": Context}, + {"role": "user", "content": EX1}, + {"role": "assistant", "content": A1}, + {"role": "user", "content": f"The paper to summarize is: {results}"}, + {"role": "user", "content": f"The metadata to make the in-text citations in {citation_format} citation is {citation}"}, + {"role": "user", "content": f"The user question is: {user_question_prompt}"} + ] + + # Call the appropriate model + if model == "ChatGPT-4o-mini": + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", api_key)) + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=all_messages, + temperature=temperature, + ) + summary_text = response.choices[0].message.content + summary_text = summary_text + f" Reference: {citation}" + return summary_text, citation + + elif model == "llama-3.3-70b-versatile": + client = OpenAI(base_url = "https://api.groq.com/openai/v1",api_key=os.environ.get("GROQ_API_KEY", api_key)) + response = client.chat.completions.create( + model=model, + messages=all_messages, + temperature=temperature, + ) + summary_text = response.choices[0].message.content + summary_text = summary_text + f" Reference: {citation}" + return summary_text, citation + + else: + response = chat( + model=model, + messages=all_messages, + options = {'temperature': temperature, 'num_predict': 4096}) + summary_text = response.message.content + summary_text = summary_text + f" Reference: {citation}" + return summary_text, citation + else: + # Handle case where no record is found + summary_text = "This article cannot be used" + citation = "There is no citation for this article because it cannot be used" + except Exception as e: + print(f"There was an error: {e}") + +def spawn(user_question_prompt, urls, model, citation_format, temperature, api_key): + # Function to manage parallel processing of multiple article summaries + max_workers = min(4, len(urls)) + papers = [] + citations = "" + # Prepare arguments + if api_key is not None: + items = [ + (url, user_question_prompt, model, citation_format, temperature, {'api_key': api_key}) + for url in urls + ] + with ProcessPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(summary, *args[:5], **args[5]) for args in items] + else: + items = [ + (url, user_question_prompt, model, citation_format, temperature) + for url in urls + ] + with ProcessPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(summary, *args) for args in items] + + + for future in as_completed(futures, timeout=120): # total timeout for all + try: + result = future.result(timeout=30) # per-task timeout + print("Process successful!") + summary_text, citation = result + papers.append(summary_text) + stack = inspect.stack() + call_fun = stack[-2].function + print(call_fun) + if call_fun == "ask": + citations += citation + "
" + else: + citations += citation + "\n" + except TimeoutError: + print("Task timed out.") + except Exception as e: + print(f"Error: {e}") + + return papers, citations + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/pubmed_central_search.py b/MicroTraitLLM_VQA/pubmed_central_search.py new file mode 100644 index 0000000..397f19a --- /dev/null +++ b/MicroTraitLLM_VQA/pubmed_central_search.py @@ -0,0 +1,156 @@ +import os +import re +import json +import call_api +from openai import OpenAI +from ollama import chat + +def question_formation(prompt,model,temperature,api_key): + # Function to create the NCBI E-Utilities API URL based on user question + # Run the context window through LLM + # Present initial context window to LLM + Context = """You are tasked with generating a single search term filter for NCBI API URLs based on a user question. + The search term filter consists of various important key words that are spaced apart by plus signs. An example question is: + 'What bacterial strains can degrade TNT?'. The appropriate filter term for the example question is 'bacterial+strains+degrade+TNT'. + Your answer must only contain the filter term. Do not describe any chain of thought or logical processing that you used to reach your answer. + """ + # Create response to the system. System indicates initial context window, User is the user, and assistant is the model. + if model == "ChatGPT-4o-mini": + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", api_key)) + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": Context}, + {"role": "user", "content": prompt}, + ], + temperature=temperature, + ) + # Extract the search term from the model's response using regex + re_search_term = re.search(r'([\w+]+\+[\w+]+(?:\+[\w+]+)*)', response.choices[0].message.content) + if re_search_term is not None: + search_term = re_search_term.group() + link = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&retmode=json&retmax=20&sort=relevance&term={search_term}' + else: + search_term = None + link = None + elif model == "llama-3.3-70b-versatile": + client = OpenAI(base_url = "https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY", api_key)) + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": Context}, + {"role": "user", "content": f'The question to use for this instance is: {prompt}'}, + ], + temperature=temperature) + # Extract the search term from the model's response using regex + re_search_term = re.search(r'([\w+]+\+[\w+]+(?:\+[\w+]+)*)', response.choices[0].message.content) + if re_search_term is not None: + search_term = re_search_term.group() + link = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&retmode=json&retmax=20&sort=relevance&term={search_term}' + else: + search_term = None + link = None + else: + response = chat( + model=model, + messages=[ + {"role": "system", "content": Context}, + {"role": "user", "content": f'The question to use for this instance is: {prompt}'}, + ], + options = {'temperature': temperature}) + # Extract the search term from the model's response using regex + re_search_term = re.search(r'([\w+]+\+[\w+]+(?:\+[\w+]+)*)', response.message.content) + if re_search_term is not None: + search_term = re_search_term.group() + link = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&retmode=json&retmax=20&sort=relevance&term={search_term}' + else: + search_term = None + link = None + return link, search_term + +def idlist_confirm(inp,prompt,article_number,model,temperature,api_key,search_term): + # Function to confirm that the API URL returns results, if not, re-run the question formation + # Run the context window through LLM + Context = """You are tasked with generating a single search term filter for NCBI API URLs based on a user question. + The search term filter consists of various important key words that are spaced apart by plus signs. An example question is: + 'What bacterial strains can degrade TNT?'. The appropriate filter term for the example question is 'bacterial+strains+degrade+TNT'. Your answer must only contain the filter term. Do not describe any chain of thought or logical processing in your answer that you used to reach your answer. + """ + + # Create response to the system. System indicates initial context window, User is the user, and assistant is the model. + if inp is None: + inv_link = inp + data = None + else: + data = call_api.call_api(inp) + data = dict(json.loads(data)) + inv_link = inp + + b = data.get('esearchresult',{}).get('count') + + if b is None: + b = 0 + # If no results, re-run the search term creation and API call + while data is None or int(b) == 0: + # Call appropriate model to create new search term + if model == "ChatGPT-4o-mini": + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", api_key)) + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": Context}, + {"role": "user", "content": f"I asked this question previously:{prompt}. I got the following search term but it did not provide me with the right information: {search_term}. Please create a different search term to try. The search term should be your only response."}, + ], + temperature=temperature, + ) + re_search_term = re.search(r'([\w+]+\+[\w+]+(?:\+[\w+]+)*)', response.choices[0].message.content) + if re_search_term is not None: + search_term = re_search_term.group() + link = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&retmode=json&retmax=20&sort=relevance&term={search_term}' + data = call_api.call_api(link) + data = json.loads(data) + b = data.get('esearchresult',{}).get('count') + else: + search_term = inp + data = None + + else: + print("Creating new search term") + client = OpenAI(base_url = "https://api.groq.com/openai/v1",api_key=os.environ.get("GROQ_API_KEY", api_key)) + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": Context}, + {"role": "user", "content": f"I asked this question previously:{prompt}. I got the following link but it did not provide me with the right information: {search_term}. Please create a different search term to try. The search term should be your only response."}, + ], + temperature=temperature) + re_search_term = re.search(r'([\w+]+\+[\w+]+(?:\+[\w+]+)*)', response.choices[0].message.content) + if re_search_term is not None: + search_term = re_search_term.group() + link = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&retmode=json&retmax=20&sort=relevance&term={search_term}' + data = call_api.call_api(link) + data = json.loads(data) + b = data.get('esearchresult',{}).get('count') + else: + search_term = inp + data = None + + + + idlist = data['esearchresult']['idlist'][0:int(article_number)] + return idlist + +def url_format(idlist): + # Function to format the URLs for the BioC JSON API based on a list of PMC IDs + # BioC links come from Comeau DC, Wei CH, Dogan RI, and Lu Z. PMC text mining subset in BioC: about 3 million full text articles and growing, Bioinformatics, 2019 + base_url = 'https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{id}/unicode' + urls = [] + for NCBIid in idlist: + PMCid = 'PMC'+NCBIid + url = base_url.format(id=PMCid) + urls.append(url) + return urls + + + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/read_api_keys.py b/MicroTraitLLM_VQA/read_api_keys.py new file mode 100644 index 0000000..12d713c --- /dev/null +++ b/MicroTraitLLM_VQA/read_api_keys.py @@ -0,0 +1,12 @@ +def load_api_keys(file_path): + keys = {} + with open(file_path, 'r') as f: + for line in f: + if '=' in line: + key, value = line.strip().split('=', 1) + keys[key] = value + return keys + + +# Copyright Sep 2025 Glen Rogers. +# Subject to MIT license. \ No newline at end of file diff --git a/MicroTraitLLM_VQA/requirements.txt b/MicroTraitLLM_VQA/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..93aacb618b22efb8ba7a8a293b7a6041fb456346 GIT binary patch literal 3210 zcmZXXOK%%T5QOI(AU{QhmwGt|A9Bedh=Bk(1UU&5AL2^na?RaDlz)7Z`nuShr3HeP zt?5T~b#>49zklcDu?%Hfu1a6t%4dCMssGnFhvcNK} z+OjVjo%a!ErC2LPp385m_!;-^oyVw%eVjX|W8R)}6LlzUc`9AR%FH*4^YLWU>1?kk z#3NLRtzq3nX$wtPEhrV75Ua}mfELIx^w)xIxO(^g-UL7ZfX z>h7X0EIMVQ_JgXo#z0oNTIht1Vf7TTdLgyt&B-p6w$SfE1RI&HnIKvU1zK>8R)9O4 zomtA4?yeLETeQ$cEZ0@-TIJcEvw-(g)U=e(AWszXzEkG*C_c8)39zOWb-Pci@{h7_ z#Y&wi^aWO{pDyAHzsWur8Znz}{GwB8b#Hu{XX~g7jqdfyDRg}Pv?kV31-r;;XW$rH~xYJmC7fSs$|2q&B1c=n%#a;?FQczV)@ z33(E$Y^9YH%Bug^MV z7giN|GT52Vis^YaHoDGsFyqhVRp_hYlM5_7GcSt$sxKR3E1hH(;?TMKP~jILJE-&Q zlt1+Mwd{xmPH!r(FqQC%ipJ=$F=54aF~2)m*m4swF6VQn99wzAX)Ekw*q}nb=A+tp zbJ8C)10vAe$De2yRGm&o)rcBiPCW+^)fo|L94ER4tNH7M2E28X%qBQG@nxKI>4O=p zH=Fmwt+2rL?I|7{Gey(=VA#+%zQ zu;@aQmpZ%-u6kt#;a+^<$&9(f)@j~UY%?~zRRHHOYBqay!grAodV{3W!H$-g1?QN( z%fZ$Kr*99d-gjYUcsorMI>{{oOCYgE$m7Yg&4cXfK5#U4PK`)6>7ncPY)(-pBJYLd zzRoZiM{&Z2zlz+L-y?TNwWtB^BQY%ptx9#esp^1Tf&A@6=(Bg~U|bZ(>e z{v-Q?qE7n(K9uGP235_5fxk|c^sP=eEO?I-qrI3;-vd@Uw=yqIynW|7OI0ebJ@Y_r z-*rYh$b;7p_hP+w@yytMUlD=Z)pGzl-j}@XF87AtU-pJ6dQ1+#glcs;4X6UUgKG4i zB)WU)v#-+FE?9<#sBqLKUF<{2BMd!p4&F3i9T3guR?;7bE2>S1u>lCC=31 + + + + MicroTraitLLM + + + +
+
+

+ MicroTraitLLM +

+

Literature-grounded answers for microbial traits, with automatic taxonomy / protein / gene extraction.

+
+ +
+ +
+ +
+

Research question

+ e.g. What microbial traits are associated with sulfate reduction in marine sediments? + +
+ +
+ +
+ + +
+ +
+ + +
+ + +
+

+ Figure VQA + Beta +

+

+ Upload a figure from a paper and ask a question about it + (e.g. “What does panel B show?”). +

+ +
+ +
+ + + +
+ + +
+ +
+ +
+
+ + + +
+
+ + + + diff --git a/MicroTraitLLM_VQA/vqa.py b/MicroTraitLLM_VQA/vqa.py new file mode 100644 index 0000000..d4fa76e --- /dev/null +++ b/MicroTraitLLM_VQA/vqa.py @@ -0,0 +1,248 @@ +import base64 +import json +from dataclasses import dataclass +from typing import Optional + +from openai import OpenAI, RateLimitError +from groq import Groq +from read_api_keys import load_api_keys + +# ------------------------------------------------- +# Load API keys and create clients +# ------------------------------------------------- + +api_keys = load_api_keys("apikeys.txt") +api_key_openai = api_keys.get("API_KEY_OPENAI") +api_key_groq = api_keys.get("API_KEY_GROQ") + +client_openai: Optional[OpenAI] = OpenAI(api_key=api_key_openai) if api_key_openai else None +client_groq: Optional[Groq] = Groq(api_key=api_key_groq) if api_key_groq else None + + +# ------------------------------------------------- +# Figure metadata "ingestion" stub +# ------------------------------------------------- + +@dataclass +class FigureMetadata: + caption: str + ocr_text: str + chart_data: str + + +def ingest_figure(data_url: str) -> Optional[FigureMetadata]: + """ + Lightweight ingestion step: + - Uses OpenAI vision to extract a caption, OCR-style text, and chart/table info. + - Returns a FigureMetadata object, or None if ingestion fails. + """ + if not client_openai: + # No OpenAI key -> skip ingestion + return None + + try: + resp = client_openai.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": ( + "You are a tool that extracts structured metadata from scientific figures. " + "Return ONLY valid JSON with keys 'caption', 'ocr_text', and 'chart_data'. " + "Keep each field concise but informative." + ), + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + "Look at this figure and extract:\n" + "1) A short caption summarizing what it shows.\n" + "2) OCR-like text: any labels, axis titles, or visible text.\n" + "3) Chart/table data: a brief description of trends, ranges, or key values.\n\n" + "Respond ONLY as JSON." + ), + }, + {"type": "image_url", "image_url": {"url": data_url}}, + ], + }, + ], + temperature=0.0, + ) + + raw = resp.choices[0].message.content or "" + + try: + meta = json.loads(raw) + return FigureMetadata( + caption=(meta.get("caption") or "").strip(), + ocr_text=(meta.get("ocr_text") or "").strip(), + chart_data=(meta.get("chart_data") or "").strip(), + ) + except json.JSONDecodeError: + # If the model doesn't obey JSON instructions, treat entire content as a caption. + return FigureMetadata( + caption=raw.strip(), + ocr_text="", + chart_data="", + ) + + except RateLimitError: + # If OpenAI is rate-limited, just skip ingestion instead of crashing. + return None + except Exception: + # Any other ingestion error -> return None (VQA can still run on raw image) + return None + + +# ------------------------------------------------- +# Main VQA function with OpenAI -> Groq fallback +# ------------------------------------------------- + +def run_vqa(image_file, question: str) -> str: + """ + High-level VQA helper. + + Parameters + ---------- + image_file : werkzeug.datastructures.FileStorage + The uploaded image from Flask (request.files["image"]) + question : str + User's question about the figure + + Returns + ------- + str + Either an answer string or a readable error message. + """ + if not question: + return "No question provided." + + # Read raw bytes from the uploaded image + image_bytes = image_file.read() + if not image_bytes: + return "Uploaded image is empty or could not be read." + + # Encode to base64 so we can send it as a data URL (for OpenAI vision) + encoded = base64.b64encode(image_bytes).decode("utf-8") + data_url = f"data:image/png;base64,{encoded}" + + # 1) Ingestion step: try to extract metadata from the figure (caption, OCR, chart data) + metadata = ingest_figure(data_url) + + meta_text_block = "" + if metadata: + bits = [] + if metadata.caption: + bits.append(f"Caption: {metadata.caption}") + if metadata.ocr_text: + bits.append(f"OCR-like text: {metadata.ocr_text}") + if metadata.chart_data: + bits.append(f"Chart / table data: {metadata.chart_data}") + if bits: + meta_text_block = "Here is pre-extracted metadata about the figure:\n" + "\n".join(bits) + + openai_error_msg = None + groq_error_msg = None + + # 2) Primary path: OpenAI multimodal VQA (uses raw image + metadata) + if client_openai: + try: + messages = [ + { + "role": "system", + "content": ( + "You are a helpful assistant that explains scientific figures, " + "especially in microbiology / microbial traits. " + "Use both the raw image and any provided metadata. " + "Be concise but clear, and focus on what the figure shows." + ), + }, + ] + + user_content = [] + if meta_text_block: + user_content.append({"type": "text", "text": meta_text_block + "\n\n"}) + user_content.append({"type": "text", "text": question}) + user_content.append({"type": "image_url", "image_url": {"url": data_url}}) + + messages.append({"role": "user", "content": user_content}) + + resp = client_openai.chat.completions.create( + model="gpt-4o-mini", + messages=messages, + temperature=0.0, + ) + + answer = resp.choices[0].message.content + if answer: + return answer + + openai_error_msg = "Empty response from OpenAI VQA." + + except RateLimitError as e: + openai_error_msg = f"OpenAI rate/credit limit error: {e}" + except Exception as e: + openai_error_msg = f"OpenAI VQA error: {e}" + + # 3) Fallback: Groq text-only VQA using the metadata (no direct image access) + # + # This still demonstrates the 'image-aware path' from the architecture: + # image -> ingestion -> metadata -> answerer + if client_groq and metadata: + try: + groq_prompt_parts = [ + "You answer questions about a scientific figure.", + "You DO NOT see the original image, only pre-extracted metadata.", + "", + "Figure metadata:", + f"- Caption: {metadata.caption or '(none)'}", + f"- OCR-like text: {metadata.ocr_text or '(none)'}", + f"- Chart / table data: {metadata.chart_data or '(none)'}", + "", + f"Question: {question}", + "", + "Answer based only on this metadata. If the metadata is insufficient, say so explicitly." + ] + + groq_messages = [ + { + "role": "system", + "content": ( + "You are a careful model that answers questions about scientific figures " + "using only structured metadata (caption, OCR text, extracted chart data). " + "Do NOT hallucinate details that are not supported by the metadata." + ), + }, + {"role": "user", "content": "\n".join(groq_prompt_parts)}, + ] + + resp_groq = client_groq.chat.completions.create( + model="llama-3.3-70b-versatile", + messages=groq_messages, + temperature=0.0, + ) + + g_answer = resp_groq.choices[0].message.content + if g_answer: + return ( + g_answer + + "\n\n[Note: This fallback answer was generated from extracted metadata only; " + + "the Groq model did not see raw image pixels.]" + ) + + groq_error_msg = "Empty response from Groq (metadata-only) VQA." + + except Exception as e: + groq_error_msg = f"Groq VQA error: {e}" + + # 4) If everything fails, return a combined error string + error_lines = ["VQA failed with all providers."] + if openai_error_msg: + error_lines.append(openai_error_msg) + if groq_error_msg: + error_lines.append(groq_error_msg) + + return "\n".join(error_lines)