From 574f0c9624992695d7e093f38e62a716d7af0c3a Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Fri, 10 Apr 2026 04:15:52 -0400 Subject: [PATCH 1/2] fix: move speciesOrder logic to base Indexer and fix non-self key values The buildSpeciesOrder method was setting non-self species keys to each species' own phylogenetic order from the lookup map. This meant all non-self documents shared the same speciesOrder value for a given focus taxon key, causing species to interleave on gene page disease/expression tables. Now all non-self keys are set to the subject species' phylogenetic order, giving each species a unique value when sorted by speciesOrder.. Also consolidates the duplicated SpeciesInterface proxy, species lookup, and buildSpeciesOrder from both DiseaseAnnotationCurationIndexer and GeneExpressionAnnotationIndexer into the shared Indexer base class with lazy initialization on first use. --- .../indexer/TestSpeciesOrder.java | 5 +- .../indexer/indexers/Indexer.java | 55 +++++++++++++++++++ .../DiseaseAnnotationCurationIndexer.java | 29 ---------- .../GeneExpressionAnnotationIndexer.java | 28 ---------- 4 files changed, 58 insertions(+), 59 deletions(-) diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java index b314193a2..8dd34fc07 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java @@ -57,8 +57,9 @@ public static void main(String[] args) throws Exception { private static HashMap buildSpeciesOrder(Map speciesOrderLookup, String taxonCurie) { HashMap order = new HashMap<>(); String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); + Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0); + for (String key : speciesOrderLookup.keySet()) { + order.put(key, subjectOrder); } order.put(subjectTaxonIdPart, 0); return order; diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java index 2133061ef..dab10eea3 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java @@ -11,12 +11,18 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.TimeUnit; +import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.core.util.StatsCollector; import org.alliancegenome.curation_api.model.document.es.ESDocument; +import org.alliancegenome.curation_api.model.entities.Species; +import org.alliancegenome.es.rest.RestConfig; import org.alliancegenome.es.util.EsClientFactory; import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.exceptional.client.ExceptionCatcher; import org.alliancegenome.indexer.config.IndexerConfig; +import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; + +import si.mazi.rescu.RestProxyFactory; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.bulk.BackoffPolicy; import org.elasticsearch.action.bulk.BulkProcessor; @@ -43,6 +49,10 @@ public abstract class Indexer extends Thread { public static String indexName; protected IndexerConfig indexerConfig; private RestHighLevelClient searchClient; + + private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); + // taxonIdPart (e.g. "9606") -> phylogeneticOrder + protected Map speciesOrderLookup; protected Runtime runtime = Runtime.getRuntime(); protected DecimalFormat df = new DecimalFormat("#"); protected ObjectMapper om = new ObjectMapper(); @@ -230,6 +240,51 @@ protected List> partition(List list, int size) { return parts; } + protected void loadSpeciesOrderLookup() { + speciesOrderLookup = new HashMap<>(); + List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); + for (Species species : allSpecies) { + if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { + String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); + speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); + } + } + log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup"); + } + + /** + * Builds a speciesOrder map for an ES document based on its subject's taxon. + * + * The returned map has one entry per species, keyed by NCBI taxon ID part (e.g. "9606"). + * The subject's own species is set to 0; all other species are set to the subject's + * phylogenetic order value. This allows the API to sort by speciesOrder. + * and get the focus species first (0), with all other species sorted by their own + * phylogenetic position — because each species' documents carry that species' own + * phylogenetic order as the non-self value. + * + * Example for a Rat document (phylogeneticOrder=20): + * { "9606": 20, "10116": 0, "10090": 20, "7955": 20, ... } + * + * Example for a Human document (phylogeneticOrder=10): + * { "9606": 0, "10116": 10, "10090": 10, "7955": 10, ... } + * + * When the API sorts by speciesOrder.10090 (mouse gene page), documents sort as: + * Mouse=0, Human=10, Rat=20, Zebrafish=40, ... (each species has a unique value) + */ + protected HashMap buildSpeciesOrder(String taxonCurie) { + if (speciesOrderLookup == null) { + loadSpeciesOrderLookup(); + } + HashMap order = new HashMap<>(); + String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); + Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0); + for (String key : speciesOrderLookup.keySet()) { + order.put(key, subjectOrder); + } + order.put(subjectTaxonIdPart, 0); + return order; + } + protected abstract void index(ProcessDisplayHelper display); protected abstract void startSingleThread(LinkedBlockingDeque queue); diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java index 2a816be25..5112eeee3 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java @@ -20,7 +20,6 @@ import org.alliancegenome.api.entity.DiseaseAnnotationDocument; import org.alliancegenome.api.entity.GeneDiseaseAnnotationDocument; import org.alliancegenome.core.helpers.DiseaseAnnotationHelper; -import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.curation_api.model.entities.AGMDiseaseAnnotation; import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel; import org.alliancegenome.curation_api.model.entities.Allele; @@ -32,7 +31,6 @@ import org.alliancegenome.curation_api.model.entities.Gene; import org.alliancegenome.curation_api.model.entities.GeneDiseaseAnnotation; import org.alliancegenome.curation_api.model.entities.Reference; -import org.alliancegenome.curation_api.model.entities.Species; import org.alliancegenome.curation_api.model.entities.VocabularyTerm; import org.alliancegenome.curation_api.model.entities.base.SubmittedObject; import org.alliancegenome.curation_api.model.entities.ontology.DOTerm; @@ -42,13 +40,11 @@ import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.indexer.config.IndexerConfig; import org.alliancegenome.indexer.indexers.Indexer; -import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; import org.alliancegenome.indexer.indexers.curation.service.AGMDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.AlleleDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.GeneDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.VocabularyTermService; -import si.mazi.rescu.RestProxyFactory; import org.alliancegenome.neo4j.repository.DiseaseRepository; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.tuple.Pair; @@ -60,16 +56,11 @@ @Slf4j public class DiseaseAnnotationCurationIndexer extends Indexer { - private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); - private GeneDiseaseAnnotationService geneService; private AlleleDiseaseAnnotationService alleleService; private AGMDiseaseAnnotationService agmService; private VocabularyTermService vocabTermService; - // taxonIdPart (e.g. "9606") -> phylogeneticOrder - private Map speciesOrderLookup; - private Map> closureMap; private Map>> geneMap = new HashMap<>(); private Map>> generatedImplicatedGeneMap = new HashMap<>(); @@ -104,16 +95,6 @@ protected void index(ProcessDisplayHelper display) { DiseaseRepository diseaseRepository = new DiseaseRepository(); closureMap = diseaseRepository.getDOClosureChildMapping(); - speciesOrderLookup = new HashMap<>(); - List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); - for (Species species : allSpecies) { - if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { - String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); - speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); - } - } - log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup"); - indexGenes(); indexAlleles(); indexAGMs(); @@ -390,16 +371,6 @@ private List createAGMDiseaseAnnotationDocuments() return ret; } - private HashMap buildSpeciesOrder(String taxonCurie) { - HashMap order = new HashMap<>(); - String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); - } - order.put(subjectTaxonIdPart, 0); - return order; - } - private void populateBaseDiseaseAnnotationDocument(BiologicalEntity biologicalEntity, DiseaseAnnotation da, DiseaseAnnotationDocument dad) { if (dad.getCountId() == null) { dad.setCountId(uniqueAnnotationCounter.getAndIncrement()); diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java index ebd99032d..10326adb2 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java @@ -1,21 +1,17 @@ package org.alliancegenome.indexer.indexers.curation; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.concurrent.LinkedBlockingDeque; import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.curation_api.interfaces.document.GeneExpressionDocumentInterface; import org.alliancegenome.curation_api.model.document.es.GeneExpressionDocument; -import org.alliancegenome.curation_api.model.entities.Species; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.es.rest.RestConfig; import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.exceptional.client.ExceptionCatcher; import org.alliancegenome.indexer.config.IndexerConfig; import org.alliancegenome.indexer.indexers.Indexer; -import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; import org.apache.commons.collections.CollectionUtils; import com.fasterxml.jackson.databind.ObjectMapper; @@ -27,13 +23,9 @@ public class GeneExpressionAnnotationIndexer extends Indexer { private final GeneExpressionDocumentInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionDocumentInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); - private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); private List> idBatches; - // taxonIdPart (e.g. "9606") -> phylogeneticOrder - private Map speciesOrderLookup; - public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) { super(indexerConfig); } @@ -41,16 +33,6 @@ public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) { @Override protected void index(ProcessDisplayHelper display) { try { - speciesOrderLookup = new HashMap<>(); - List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); - for (Species species : allSpecies) { - if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { - String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); - speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); - } - } - log.info("Loaded {} species for speciesOrder lookup", speciesOrderLookup.size()); - log.info("Fetching all gene IDs..."); SearchResponse idsResponse = geneExpressionApi.getGeneIds(); @@ -103,16 +85,6 @@ protected void startSingleThread(LinkedBlockingDeque queue) { } } - private HashMap buildSpeciesOrder(String taxonCurie) { - HashMap order = new HashMap<>(); - String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); - } - order.put(subjectTaxonIdPart, 0); - return order; - } - @Override protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) { return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper(); From 803da9a229f819be454787f91222c98f0d54059b Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Fri, 10 Apr 2026 04:38:36 -0400 Subject: [PATCH 2/2] fix: set species fullName on variant search result documents The Species object created in VariantSummaryConverter only had abbreviation set. VariantSearchResultConverter calls getSpecies().getFullName() to populate the species field on variant_search_result documents, which returned null because fullName was never set. This caused the species facet to show no values in the variant search category. --- .../core/variant/converters/VariantSummaryConverter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java b/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java index c9ee5d451..64638626d 100644 --- a/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java +++ b/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java @@ -117,6 +117,7 @@ public List convertContextToDocument(VariantContext ctx, taxon.setName(speciesType.getName()); taxon.setCurie(speciesType.getTaxonID()); Species species = new Species(); + species.setFullName(speciesType.getName()); species.setAbbreviation(speciesType.getAbbreviation()); taxon.setSpecies(species); }