From 9be9f8e4820f927ae382a703914dd4b5da2a0645 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Thu, 9 Oct 2025 11:14:20 -0400
Subject: [PATCH 01/12] Replace log boost with stepped boost for
 clique_identifier_count.

Closes #205.
---
 api/server.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/api/server.py b/api/server.py
index 3c3ec88a..e5e42ecd 100755
--- a/api/server.py
+++ b/api/server.py
@@ -448,7 +448,14 @@ async def lookup(string: str,
                 "boost": [
                     # The boost is multiplied with score -- calculating the log() reduces how quickly this increases
                     # the score for increasing clique identifier counts.
-                    "log(sum(clique_identifier_count, 1))"
+                    # "log(sum(clique_identifier_count, 1))"
+                    #
+                    # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
+                    # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
+                    #   - If clique_identifier_count > 1, we boost by 2x
+                    "if(gt(clique_identifier_count,1),2,1)",
+                    #   - If clique_identifier_count > 5, we boost by a further 2x
+                    "if(gt(clique_identifier_count,5),2,1)",
                 ],
             },
         },

From b24cd4d2fd60250ab252a1062c2ac14734ae3535 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Thu, 9 Oct 2025 22:53:17 -0400
Subject: [PATCH 02/12] Increased stepping.

---
 api/server.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/api/server.py b/api/server.py
index 5915b425..9e427ddb 100755
--- a/api/server.py
+++ b/api/server.py
@@ -452,10 +452,12 @@ async def lookup(string: str,
                     #
                     # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
                     # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
-                    #   - If clique_identifier_count > 1, we boost by 2x
-                    "if(gt(clique_identifier_count,1),2,1)",
-                    #   - If clique_identifier_count > 5, we boost by a further 2x
-                    "if(gt(clique_identifier_count,5),2,1)",
+                    #   - If clique_identifier_count == 1, we reduce the boost by 0.5x
+                    "if(eq(clique_identifier_count, 1), 1, 0.5)",
+                    #   - If clique_identifier_count > 10, we boost by a further 2x
+                    "if(gt(clique_identifier_count, 10), 2, 1)",
+                    #   - If clique_identifier_count > 20, we boost by a further 2x
+                    "if(gt(clique_identifier_count, 20), 2, 1)",
                 ],
             },
         },

From 953ffb0ef8c0b325a61069f2360bf808c637e93e Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 10 Nov 2025 16:53:13 -0500
Subject: [PATCH 03/12] Tried to boost model organisms using bq.

---
 api/server.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/api/server.py b/api/server.py
index 9e427ddb..b275a669 100755
--- a/api/server.py
+++ b/api/server.py
@@ -443,10 +443,18 @@ async def lookup(string: str,
                 # pf = phrase fields, i.e. how should we boost these fields if they contain the entire search phrase.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
                 "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
-                # Boosts
-                "bq": [],
+                "bq": [
+                    # Boost queries are run on the matching documents, and provide an ADDITIVE score to matching documents.
+                    # We'll use this to slightly boost model organisms. This shouldn't change their relative order except
+                    # against each other.
+                    "taxa:'NCBITaxon:9606'^5",      # Humans (Homo sapiens): this will +10 to any document that relates to humans.
+                    "taxa:'NCBITaxon:10090'^4",     # Mouse (Mus musculus)
+                    "taxa:'NCBITaxon:10116'^3",     # Rat (Rattus norvegicus)
+                    "taxa:'NCBITaxon:7955'^2",      # Zebrafish (Danio rerio)
+                    "taxa:'NCBITaxon:6239'^1",      # C. elegans
+                ],
                 "boost": [
-                    # The boost is multiplied with score -- calculating the log() reduces how quickly this increases
+                    # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
                     # the score for increasing clique identifier counts.
                     # "log(sum(clique_identifier_count, 1))"
                     #

From cd848edc3e05d2ffb349d836e0135053a8b0abb6 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 10 Nov 2025 17:11:25 -0500
Subject: [PATCH 04/12] Increase bqs so they have a real effect on the results.

---
 api/server.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/api/server.py b/api/server.py
index b275a669..7abf4590 100755
--- a/api/server.py
+++ b/api/server.py
@@ -445,13 +445,12 @@ async def lookup(string: str,
                 "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
                 "bq": [
                     # Boost queries are run on the matching documents, and provide an ADDITIVE score to matching documents.
-                    # We'll use this to slightly boost model organisms. This shouldn't change their relative order except
-                    # against each other.
-                    "taxa:'NCBITaxon:9606'^5",      # Humans (Homo sapiens): this will +10 to any document that relates to humans.
-                    "taxa:'NCBITaxon:10090'^4",     # Mouse (Mus musculus)
-                    "taxa:'NCBITaxon:10116'^3",     # Rat (Rattus norvegicus)
-                    "taxa:'NCBITaxon:7955'^2",      # Zebrafish (Danio rerio)
-                    "taxa:'NCBITaxon:6239'^1",      # C. elegans
+                    # We'll use this to slightly boost model organisms.
+                    'taxa:"NCBITaxon:9606"^200',      # Humans (Homo sapiens): this will +10 to any document that relates to humans.
+                    'taxa:"NCBITaxon:10090"^100',     # Mouse (Mus musculus)
+                    'taxa:"NCBITaxon:10116"^80',      # Rat (Rattus norvegicus)
+                    'taxa:"NCBITaxon:7955"^60',       # Zebrafish (Danio rerio)
+                    'taxa:"NCBITaxon:6239"^40',       # C. elegans
                 ],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases

From 092cd4ed61da21ff4e88ea70ebfaea52791e64bb Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 10 Nov 2025 17:22:50 -0500
Subject: [PATCH 05/12] Changed bqs into boosts so they grow in proportion with
 the score.

Also increased boost for single identifier cliques.
---
 api/server.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/api/server.py b/api/server.py
index 7abf4590..3c50cdea 100755
--- a/api/server.py
+++ b/api/server.py
@@ -443,15 +443,7 @@ async def lookup(string: str,
                 # pf = phrase fields, i.e. how should we boost these fields if they contain the entire search phrase.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
                 "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
-                "bq": [
-                    # Boost queries are run on the matching documents, and provide an ADDITIVE score to matching documents.
-                    # We'll use this to slightly boost model organisms.
-                    'taxa:"NCBITaxon:9606"^200',      # Humans (Homo sapiens): this will +10 to any document that relates to humans.
-                    'taxa:"NCBITaxon:10090"^100',     # Mouse (Mus musculus)
-                    'taxa:"NCBITaxon:10116"^80',      # Rat (Rattus norvegicus)
-                    'taxa:"NCBITaxon:7955"^60',       # Zebrafish (Danio rerio)
-                    'taxa:"NCBITaxon:6239"^40',       # C. elegans
-                ],
+                "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
                     # the score for increasing clique identifier counts.
@@ -459,12 +451,20 @@ async def lookup(string: str,
                     #
                     # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
                     # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
-                    #   - If clique_identifier_count == 1, we reduce the boost by 0.5x
-                    "if(eq(clique_identifier_count, 1), 1, 0.5)",
+                    #   - If clique_identifier_count == 1, we reduce the boost by 0.7x
+                    "if(eq(clique_identifier_count, 1), 1, 0.7)",
                     #   - If clique_identifier_count > 10, we boost by a further 2x
                     "if(gt(clique_identifier_count, 10), 2, 1)",
                     #   - If clique_identifier_count > 20, we boost by a further 2x
                     "if(gt(clique_identifier_count, 20), 2, 1)",
+                    # Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
+                    '''sum(1,
+                        product(termfreq(taxa,"NCBITaxon:9606"),2),
+                        product(termfreq(taxa,"NCBITaxon:10090"),1.5),
+                        product(termfreq(taxa,"NCBITaxon:10116"),1.4),
+                        product(termfreq(taxa,"NCBITaxon:7955"),1.3),
+                        product(termfreq(taxa,"NCBITaxon:6239"),1.2)
+                    )'''
                 ],
             },
         },

From 71a96927e9dbebe4dbda48d795d8b9904480db79 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 10 Nov 2025 17:30:46 -0500
Subject: [PATCH 06/12] Removed boosts on pf, tweaked single identifier clique
 boosts.

---
 api/server.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/api/server.py b/api/server.py
index 3c50cdea..261809aa 100755
--- a/api/server.py
+++ b/api/server.py
@@ -437,12 +437,12 @@ async def lookup(string: str,
         "query": {
             "edismax": {
                 "query": query,
-                # qf = query fields, i.e. how should we boost these fields if they contain the same fields as the input.
+                # qf = query fields, i.e. how should we boost these fields if they contain the query terms.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
                 "qf": "preferred_name_exactish^250 names_exactish^100 preferred_name^25 names^10",
-                # pf = phrase fields, i.e. how should we boost these fields if they contain the entire search phrase.
+                # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
+                "pf": "preferred_name_exactish names_exactish preferred_name names",
                 "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
@@ -452,7 +452,7 @@ async def lookup(string: str,
                     # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
                     # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
                     #   - If clique_identifier_count == 1, we reduce the boost by 0.7x
-                    "if(eq(clique_identifier_count, 1), 1, 0.7)",
+                    "if(eq(clique_identifier_count, 1), 0.7, 1)",
                     #   - If clique_identifier_count > 10, we boost by a further 2x
                     "if(gt(clique_identifier_count, 10), 2, 1)",
                     #   - If clique_identifier_count > 20, we boost by a further 2x

From c224b6809451bbeff9ffbca60d9074df52a5a290 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 8 Dec 2025 15:32:24 -0500
Subject: [PATCH 07/12] Got rid of pf entirely, tweaked boosts.

---
 api/server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/api/server.py b/api/server.py
index d32bff8f..0d95cf50 100755
--- a/api/server.py
+++ b/api/server.py
@@ -450,7 +450,7 @@ async def lookup(string: str,
                 "qf": "preferred_name_exactish^250 names_exactish^100 preferred_name^25 names^10",
                 # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                "pf": "preferred_name_exactish names_exactish preferred_name names",
+                # "pf": "preferred_name_exactish names_exactish preferred_name names",
                 "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
@@ -463,8 +463,8 @@ async def lookup(string: str,
                     "if(eq(clique_identifier_count, 1), 0.7, 1)",
                     #   - If clique_identifier_count > 10, we boost by a further 2x
                     "if(gt(clique_identifier_count, 10), 2, 1)",
-                    #   - If clique_identifier_count > 20, we boost by a further 2x
-                    "if(gt(clique_identifier_count, 20), 2, 1)",
+                    #   - If clique_identifier_count > 20, we boost by a further 3x
+                    "if(gt(clique_identifier_count, 20), 3, 1)",
                     # Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
                     '''sum(1,
                         product(termfreq(taxa,"NCBITaxon:9606"),2),

From 6ea0e9b7144f659e26e53faddf56b6c841a00b13 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 8 Dec 2025 16:09:31 -0500
Subject: [PATCH 08/12] Improved boosts.

---
 api/server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/api/server.py b/api/server.py
index 0d95cf50..907b31d3 100755
--- a/api/server.py
+++ b/api/server.py
@@ -447,15 +447,15 @@ async def lookup(string: str,
                 "query": query,
                 # qf = query fields, i.e. how should we boost these fields if they contain the query terms.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
-                "qf": "preferred_name_exactish^250 names_exactish^100 preferred_name^25 names^10",
+                "qf": "preferred_name_exactish^300 names_exactish^200 preferred_name^10 names^5",
                 # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                # "pf": "preferred_name_exactish names_exactish preferred_name names",
+                "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^10 names^5",
                 "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
                     # the score for increasing clique identifier counts.
-                    # "log(sum(clique_identifier_count, 1))"
+                    # "max(log(sum(clique_identifier_count, 1)), 4)",
                     #
                     # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
                     # to be returned when we don't have an otherwise good match. So instead we make it stepwise:

From 8b4998a8f508da89fd10814a892ca8e0c45616b1 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 8 Dec 2025 16:11:30 -0500
Subject: [PATCH 09/12] Reduced phrase boosts.

---
 api/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/server.py b/api/server.py
index 907b31d3..5115daf5 100755
--- a/api/server.py
+++ b/api/server.py
@@ -450,7 +450,7 @@ async def lookup(string: str,
                 "qf": "preferred_name_exactish^300 names_exactish^200 preferred_name^10 names^5",
                 # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^10 names^5",
+                "pf": "preferred_name_exactish^30 names_exactish^20 preferred_name^10 names^5",
                 "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases

From 0e0760112e44cfb4b70bc73ccbda661940c44225 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Fri, 12 Dec 2025 17:35:34 -0500
Subject: [PATCH 10/12] I like these outputs.

---
 api/server.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/api/server.py b/api/server.py
index 5115daf5..e7fbe90c 100755
--- a/api/server.py
+++ b/api/server.py
@@ -447,10 +447,10 @@ async def lookup(string: str,
                 "query": query,
                 # qf = query fields, i.e. how should we boost these fields if they contain the query terms.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
-                "qf": "preferred_name_exactish^300 names_exactish^200 preferred_name^10 names^5",
+                "qf": "preferred_name_exactish^400 names_exactish^300 preferred_name^4 names^2",
                 # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                "pf": "preferred_name_exactish^30 names_exactish^20 preferred_name^10 names^5",
+                "pf": "preferred_name_exactish^20 names_exactish^10 preferred_name^4 names^2",
                 "bq": [],
                 "boost": [
                     # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
@@ -467,11 +467,11 @@ async def lookup(string: str,
                     "if(gt(clique_identifier_count, 20), 3, 1)",
                     # Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
                     '''sum(1,
-                        product(termfreq(taxa,"NCBITaxon:9606"),2),
-                        product(termfreq(taxa,"NCBITaxon:10090"),1.5),
-                        product(termfreq(taxa,"NCBITaxon:10116"),1.4),
-                        product(termfreq(taxa,"NCBITaxon:7955"),1.3),
-                        product(termfreq(taxa,"NCBITaxon:6239"),1.2)
+                        product(termfreq(taxa,"NCBITaxon:9606"),10),
+                        product(termfreq(taxa,"NCBITaxon:10090"),5),
+                        product(termfreq(taxa,"NCBITaxon:10116"),4),
+                        product(termfreq(taxa,"NCBITaxon:7955"),3),
+                        product(termfreq(taxa,"NCBITaxon:6239"),2)
                     )'''
                 ],
             },

From 90e65d2ae88a3d9dff249de2845b78d4230a0025 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Fri, 12 Dec 2025 17:57:36 -0500
Subject: [PATCH 11/12] Even better.

---
 api/server.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/api/server.py b/api/server.py
index e7fbe90c..c6b12b88 100755
--- a/api/server.py
+++ b/api/server.py
@@ -447,7 +447,7 @@ async def lookup(string: str,
                 "query": query,
                 # qf = query fields, i.e. how should we boost these fields if they contain the query terms.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
-                "qf": "preferred_name_exactish^400 names_exactish^300 preferred_name^4 names^2",
+                "qf": "preferred_name_exactish^500 names_exactish^400 preferred_name^4 names^2",
                 # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
                 "pf": "preferred_name_exactish^20 names_exactish^10 preferred_name^4 names^2",
@@ -460,18 +460,18 @@ async def lookup(string: str,
                     # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
                     # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
                     #   - If clique_identifier_count == 1, we reduce the boost by 0.7x
-                    "if(eq(clique_identifier_count, 1), 0.7, 1)",
+                    # "if(eq(clique_identifier_count, 1), 0.7, 1)",
                     #   - If clique_identifier_count > 10, we boost by a further 2x
-                    "if(gt(clique_identifier_count, 10), 2, 1)",
+                    "if(gt(clique_identifier_count, 20), 5, if(gt(clique_identifier_count, 10), 3, 1))",
                     #   - If clique_identifier_count > 20, we boost by a further 3x
-                    "if(gt(clique_identifier_count, 20), 3, 1)",
                     # Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
                     '''sum(1,
-                        product(termfreq(taxa,"NCBITaxon:9606"),10),
-                        product(termfreq(taxa,"NCBITaxon:10090"),5),
-                        product(termfreq(taxa,"NCBITaxon:10116"),4),
-                        product(termfreq(taxa,"NCBITaxon:7955"),3),
-                        product(termfreq(taxa,"NCBITaxon:6239"),2)
+                        if(not(taxon_specific), 100, 0),
+                        product(termfreq(taxa,"NCBITaxon:9606"),100),
+                        product(termfreq(taxa,"NCBITaxon:10090"),40),
+                        product(termfreq(taxa,"NCBITaxon:10116"),30),
+                        product(termfreq(taxa,"NCBITaxon:7955"),20),
+                        product(termfreq(taxa,"NCBITaxon:6239"),10)
                     )'''
                 ],
             },

From 70e683657534a50304a79af6b6278b8c06445956 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Thu, 18 Dec 2025 17:20:43 -0500
Subject: [PATCH 12/12] Added on:push triggers for testing.

---
 .github/workflows/release-name-resolution.yml | 1 +
 .github/workflows/release-nameres-loading.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/release-name-resolution.yml b/.github/workflows/release-name-resolution.yml
index e5773778..1b349b27 100644
--- a/.github/workflows/release-name-resolution.yml
+++ b/.github/workflows/release-name-resolution.yml
@@ -1,6 +1,7 @@
 name: 'Release a new version of NameResolution to Github Packages'
 
 on:
+    push:
     release:
         types: [published]
 
diff --git a/.github/workflows/release-nameres-loading.yml b/.github/workflows/release-nameres-loading.yml
index cc7e3753..0c94e7be 100644
--- a/.github/workflows/release-nameres-loading.yml
+++ b/.github/workflows/release-nameres-loading.yml
@@ -1,6 +1,7 @@
 name: 'Release a new version of NameResolution Data Loading to Github Packages'
 
 on:
+    push:
     release:
         types: [published]