From f17aeb136c88786ba530ea99e8589b92d9944892 Mon Sep 17 00:00:00 2001 From: Michael Freeman Date: Tue, 10 Mar 2026 10:44:09 -0500 Subject: [PATCH] wip: non-actor topical/activity questions can now derive topic_terms and use the shared hybrid retriever --- .../threadr/lib/threadr/ml/constrained_qa.ex | 28 +++++++--- .../threadr/test/support/ml_test_support.ex | 3 ++ .../threadr/control_plane/bot_qa_test.exs | 3 +- .../test/threadr/ml/constrained_qa_test.exs | 52 +++++++++++++++++++ 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/elixir/threadr/lib/threadr/ml/constrained_qa.ex b/elixir/threadr/lib/threadr/ml/constrained_qa.ex index 8707b7e..3974545 100644 --- a/elixir/threadr/lib/threadr/ml/constrained_qa.ex +++ b/elixir/threadr/lib/threadr/ml/constrained_qa.ex @@ -47,6 +47,7 @@ defmodule Threadr.ML.ConstrainedQA do "to", "want", "what", + "was", "when", "where", "which", @@ -205,16 +206,19 @@ defmodule Threadr.ML.ConstrainedQA do ), {:ok, payload} <- parse_payload(result.content), "constrained_qa" <- Map.get(payload, "route") do + literal_terms = normalize_literal_terms(Map.get(payload, "literal_terms", [])) + focus = normalize_focus(Map.get(payload, "focus")) + {:ok, %{ actors: normalize_refs(Map.get(payload, "actors", [])), counterpart_actors: normalize_refs(Map.get(payload, "counterpart_actors", [])), - literal_terms: normalize_literal_terms(Map.get(payload, "literal_terms", [])), - topic_terms: [], + literal_terms: literal_terms, + topic_terms: infer_topic_terms(question, literal_terms, focus), literal_match: normalize_literal_match(Map.get(payload, "literal_match")), time_scope: normalize_time_scope(Map.get(payload, "time_scope")), scope_current_channel: Map.get(payload, "scope_current_channel") == true, - focus: normalize_focus(Map.get(payload, "focus")) + focus: focus }} else "fallback" -> {:error, :fallback} @@ -229,18 +233,18 @@ defmodule Threadr.ML.ConstrainedQA do if actors == [] do {:error, :fallback} else - literal_terms = heuristic_literal_terms(question, actors) + topic_terms = heuristic_topic_terms(question, actors) {:ok, %{ actors: actors, counterpart_actors: [], literal_terms: [], - topic_terms: literal_terms, + topic_terms: topic_terms, literal_match: "all", time_scope: infer_time_scope(question), scope_current_channel: Keyword.get(opts, :requester_channel_name) != nil, - focus: heuristic_focus(literal_terms), + focus: heuristic_focus(topic_terms), requester_channel_name: Keyword.get(opts, :requester_channel_name), pair_required: false }} @@ -299,7 +303,7 @@ defmodule Threadr.ML.ConstrainedQA do {:ok, matches, query_metadata(constraints, "literal_term_messages")} end - constraints.actors != [] and Map.get(constraints, :topic_terms, []) != [] -> + Map.get(constraints, :topic_terms, []) != [] -> case fetch_hybrid_topic_matches(tenant_schema, constraints, opts) do [] -> matches = fetch_direct_matches(tenant_schema, constraints, opts) @@ -883,7 +887,7 @@ defmodule Threadr.ML.ConstrainedQA do defp normalize_literal_terms(_value), do: [] - defp heuristic_literal_terms(question, actors) do + defp heuristic_topic_terms(question, actors) do actor_terms = actors |> Enum.flat_map(fn actor -> [actor.handle, actor.display_name, actor.external_id] end) @@ -908,6 +912,14 @@ defmodule Threadr.ML.ConstrainedQA do |> Enum.take(4) end + defp infer_topic_terms(_question, [_ | _], _focus), do: [] + + defp infer_topic_terms(question, [], focus) when focus in ["topics", "activity"] do + heuristic_topic_terms(question, []) + end + + defp infer_topic_terms(_question, [], _focus), do: [] + defp heuristic_focus([]), do: "activity" defp heuristic_focus(_terms), do: "topics" diff --git a/elixir/threadr/test/support/ml_test_support.ex b/elixir/threadr/test/support/ml_test_support.ex index e74e5d2..9e80c5f 100644 --- a/elixir/threadr/test/support/ml_test_support.ex +++ b/elixir/threadr/test/support/ml_test_support.ex @@ -156,6 +156,9 @@ defmodule Threadr.TestConstraintGenerationProvider do String.contains?(prompt, "who has mentioned 1488?") -> ~s({"route":"constrained_qa","actors":[],"counterpart_actors":[],"literal_terms":["1488"],"literal_match":"all","time_scope":"none","scope_current_channel":true,"focus":"activity"}) + String.contains?(prompt, "who was first up today?") -> + ~s({"route":"constrained_qa","actors":[],"counterpart_actors":[],"literal_terms":[],"literal_match":"all","time_scope":"today","scope_current_channel":true,"focus":"activity"}) + String.contains?(prompt, "what did fysty talk about today with leku?") -> ~s({"route":"constrained_qa","actors":["fysty"],"counterpart_actors":["leku"],"time_scope":"today","scope_current_channel":false,"focus":"topics"}) diff --git a/elixir/threadr/test/threadr/control_plane/bot_qa_test.exs b/elixir/threadr/test/threadr/control_plane/bot_qa_test.exs index b09249a..e4267ff 100644 --- a/elixir/threadr/test/threadr/control_plane/bot_qa_test.exs +++ b/elixir/threadr/test/threadr/control_plane/bot_qa_test.exs @@ -196,8 +196,9 @@ defmodule Threadr.ControlPlane.BotQATest do ) assert result.mode == :constrained_qa - assert result.query.retrieval == "filtered_messages" + assert result.query.retrieval == "hybrid_topic_messages" assert result.query.actor_handles == ["farmr"] + assert result.query.topic_terms == ["farmr"] assert result.answer.content =~ "what did farmr talk about today?" end diff --git a/elixir/threadr/test/threadr/ml/constrained_qa_test.exs b/elixir/threadr/test/threadr/ml/constrained_qa_test.exs index 2ace667..a3df475 100644 --- a/elixir/threadr/test/threadr/ml/constrained_qa_test.exs +++ b/elixir/threadr/test/threadr/ml/constrained_qa_test.exs @@ -467,6 +467,58 @@ defmodule Threadr.ML.ConstrainedQATest do assert result.context =~ "umom" end + test "answers channel-scoped topical activity questions from older same-day evidence" do + tenant = create_tenant!("Constrained QA Channel Topic Activity") + thanew = create_actor!(tenant.schema_name, "THANEW") + larsinio = create_actor!(tenant.schema_name, "larsinio") + channel = create_channel!(tenant.schema_name, "#!chases") + now = DateTime.utc_now() |> DateTime.truncate(:second) + + create_message!( + tenant.schema_name, + thanew.id, + channel.id, + "first up", + "first-up-1", + now + ) + + create_message!( + tenant.schema_name, + larsinio.id, + channel.id, + "says u", + "first-up-2", + DateTime.add(now, 60, :second) + ) + + for index <- 1..10 do + create_message!( + tenant.schema_name, + larsinio.id, + channel.id, + "later chatter #{index}", + "later-#{index}", + DateTime.add(now, 600 + index, :second) + ) + end + + assert {:ok, result} = + ConstrainedQA.answer_question( + tenant.subject_name, + "who was first up today?", + requester_channel_name: "#!chases", + generation_provider: Threadr.TestConstraintGenerationProvider, + generation_model: "test-chat" + ) + + assert result.query.retrieval == "hybrid_topic_messages" + assert result.query.topic_terms == ["first", "up"] + assert result.query.channel_name == "#!chases" + assert result.context =~ "first up" + refute result.context =~ "later chatter 10" + end + defp create_tenant!(prefix) do suffix = System.unique_integer([:positive])