Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/release-name-resolution.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: 'Release a new version of NameResolution to Github Packages'

on:
push:
release:
types: [published]

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release-nameres-loading.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: 'Release a new version of NameResolution Data Loading to Github Packages'

on:
push:
release:
types: [published]

Expand Down
30 changes: 23 additions & 7 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,18 +455,34 @@ async def lookup(string: str,
"query": {
"edismax": {
"query": query,
# qf = query fields, i.e. how should we boost these fields if they contain the same fields as the input.
# qf = query fields, i.e. how should we boost these fields if they contain the query terms.
# https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#qf-query-fields-parameter
"qf": "preferred_name_exactish^250 names_exactish^100 preferred_name^25 names^10",
# pf = phrase fields, i.e. how should we boost these fields if they contain the entire search phrase.
"qf": "preferred_name_exactish^500 names_exactish^400 preferred_name^4 names^2",
# pf = phrase fields, i.e. how should we boost these fields if they contain query terms close together.
# https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
"pf": "preferred_name_exactish^300 names_exactish^200 preferred_name^30 names^20",
# Boosts
"pf": "preferred_name_exactish^20 names_exactish^10 preferred_name^4 names^2",
"bq": [],
"boost": [
# The boost is multiplied with score -- calculating the log() reduces how quickly this increases
# Boosts are MULTIPLIED with score -- calculating the log() reduces how quickly this increases
# the score for increasing clique identifier counts.
"log(sum(clique_identifier_count, 1))"
# "max(log(sum(clique_identifier_count, 1)), 4)",
#
# However, this approach causes very large clique_identifier_count entries (like diphenhydramine, cic=1332)
# to be returned when we don't have an otherwise good match. So instead we make it stepwise:
# - If clique_identifier_count == 1, we reduce the boost by 0.7x
# "if(eq(clique_identifier_count, 1), 0.7, 1)",
# - If clique_identifier_count > 10, we boost by a further 2x
"if(gt(clique_identifier_count, 20), 5, if(gt(clique_identifier_count, 10), 3, 1))",
# - If clique_identifier_count > 20, we boost by a further 3x
# Slightly boost model organisms: humans, mice, rats, zebrafish and C. elegans
'''sum(1,
if(not(taxon_specific), 100, 0),
product(termfreq(taxa,"NCBITaxon:9606"),100),
product(termfreq(taxa,"NCBITaxon:10090"),40),
product(termfreq(taxa,"NCBITaxon:10116"),30),
product(termfreq(taxa,"NCBITaxon:7955"),20),
product(termfreq(taxa,"NCBITaxon:6239"),10)
)'''
],
},
},
Expand Down
Loading