NCATSTranslator · gaurav · Oct 9, 2025 · Oct 9, 2025 · Oct 10, 2025 · Nov 10, 2025
diff --git a/.github/workflows/release-name-resolution.yml b/.github/workflows/release-name-resolution.yml
@@ -1,6 +1,7 @@
 name: 'Release a new version of NameResolution to Github Packages'
 
 on:
+    push:
     release:
         types: [published]
 

diff --git a/.github/workflows/release-nameres-loading.yml b/.github/workflows/release-nameres-loading.yml
@@ -1,6 +1,7 @@
 name: 'Release a new version of NameResolution Data Loading to Github Packages'
 
 on:
+    push:
     release:
         types: [published]
 

diff --git a/api/server.py b/api/server.py
@@ -455,18 +455,34 @@ async def lookup(string: str,
         "query": {
             "edismax": {
                 "query": query,
-                # qf = query fields, i.e. how should we boost these fields if they contain the same fields as the input.
+                # qf = query fields, i.e. how should we boost these fields if they contain the query terms.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
-                "qf": "preferred_name_exactish^250 names_exactish^100 preferred_name^25 names^10",
-                # pf = phrase fields, i.e. how should we boost these fields if they contain the entire search phrase.
+                "qf": "preferred_name_exactish^500 names_exactish^400 preferred_name^4 names^2",
+                # pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
                 # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-                "pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
-                # Boosts
+                "pf": "preferred_name_exactish^20 names_exactish^10 preferred_name^4 names^2",
                 "bq": [],
                 "boost": [
-                    # The boost is multiplied with score -- calculating the log() reduces how quickly this increases
+                    # Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
                     # the score for increasing clique identifier counts.
-                    "log(sum(clique_identifier_count, 1))"
+                    # "max(log(sum(clique_identifier_count, 1)), 4)",
+                    #
+                    # However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
+                    # to be returned when we don't have an otherwise good match. So instead we make it stepwise:
+                    #   - If clique_identifier_count == 1, we reduce the boost by 0.7x
+                    # "if(eq(clique_identifier_count, 1), 0.7, 1)",
+                    #   - If clique_identifier_count > 10, we boost by a further 2x
+                    "if(gt(clique_identifier_count, 20), 5, if(gt(clique_identifier_count, 10), 3, 1))",
+                    #   - If clique_identifier_count > 20, we boost by a further 3x
+                    # Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
+                    '''sum(1,
+                        if(not(taxon_specific), 100, 0),
+                        product(termfreq(taxa,"NCBITaxon:9606"),100),
+                        product(termfreq(taxa,"NCBITaxon:10090"),40),
+                        product(termfreq(taxa,"NCBITaxon:10116"),30),
+                        product(termfreq(taxa,"NCBITaxon:7955"),20),
+                        product(termfreq(taxa,"NCBITaxon:6239"),10)
+                    )'''
                 ],
             },
         },