diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java index b314193a2..8dd34fc07 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java @@ -57,8 +57,9 @@ public static void main(String[] args) throws Exception { private static HashMap buildSpeciesOrder(Map speciesOrderLookup, String taxonCurie) { HashMap order = new HashMap<>(); String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); + Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0); + for (String key : speciesOrderLookup.keySet()) { + order.put(key, subjectOrder); } order.put(subjectTaxonIdPart, 0); return order; diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java index 2133061ef..dab10eea3 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java @@ -11,12 +11,18 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.TimeUnit; +import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.core.util.StatsCollector; import org.alliancegenome.curation_api.model.document.es.ESDocument; +import org.alliancegenome.curation_api.model.entities.Species; +import org.alliancegenome.es.rest.RestConfig; import org.alliancegenome.es.util.EsClientFactory; import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.exceptional.client.ExceptionCatcher; import org.alliancegenome.indexer.config.IndexerConfig; +import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; + +import si.mazi.rescu.RestProxyFactory; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.bulk.BackoffPolicy; import org.elasticsearch.action.bulk.BulkProcessor; @@ -43,6 +49,10 @@ public abstract class Indexer extends Thread { public static String indexName; protected IndexerConfig indexerConfig; private RestHighLevelClient searchClient; + + private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); + // taxonIdPart (e.g. "9606") -> phylogeneticOrder + protected Map speciesOrderLookup; protected Runtime runtime = Runtime.getRuntime(); protected DecimalFormat df = new DecimalFormat("#"); protected ObjectMapper om = new ObjectMapper(); @@ -230,6 +240,51 @@ protected List> partition(List list, int size) { return parts; } + protected void loadSpeciesOrderLookup() { + speciesOrderLookup = new HashMap<>(); + List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); + for (Species species : allSpecies) { + if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { + String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); + speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); + } + } + log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup"); + } + + /** + * Builds a speciesOrder map for an ES document based on its subject's taxon. + * + * The returned map has one entry per species, keyed by NCBI taxon ID part (e.g. "9606"). + * The subject's own species is set to 0; all other species are set to the subject's + * phylogenetic order value. This allows the API to sort by speciesOrder. + * and get the focus species first (0), with all other species sorted by their own + * phylogenetic position — because each species' documents carry that species' own + * phylogenetic order as the non-self value. + * + * Example for a Rat document (phylogeneticOrder=20): + * { "9606": 20, "10116": 0, "10090": 20, "7955": 20, ... } + * + * Example for a Human document (phylogeneticOrder=10): + * { "9606": 0, "10116": 10, "10090": 10, "7955": 10, ... } + * + * When the API sorts by speciesOrder.10090 (mouse gene page), documents sort as: + * Mouse=0, Human=10, Rat=20, Zebrafish=40, ... (each species has a unique value) + */ + protected HashMap buildSpeciesOrder(String taxonCurie) { + if (speciesOrderLookup == null) { + loadSpeciesOrderLookup(); + } + HashMap order = new HashMap<>(); + String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); + Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0); + for (String key : speciesOrderLookup.keySet()) { + order.put(key, subjectOrder); + } + order.put(subjectTaxonIdPart, 0); + return order; + } + protected abstract void index(ProcessDisplayHelper display); protected abstract void startSingleThread(LinkedBlockingDeque queue); diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java index 2a816be25..5112eeee3 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java @@ -20,7 +20,6 @@ import org.alliancegenome.api.entity.DiseaseAnnotationDocument; import org.alliancegenome.api.entity.GeneDiseaseAnnotationDocument; import org.alliancegenome.core.helpers.DiseaseAnnotationHelper; -import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.curation_api.model.entities.AGMDiseaseAnnotation; import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel; import org.alliancegenome.curation_api.model.entities.Allele; @@ -32,7 +31,6 @@ import org.alliancegenome.curation_api.model.entities.Gene; import org.alliancegenome.curation_api.model.entities.GeneDiseaseAnnotation; import org.alliancegenome.curation_api.model.entities.Reference; -import org.alliancegenome.curation_api.model.entities.Species; import org.alliancegenome.curation_api.model.entities.VocabularyTerm; import org.alliancegenome.curation_api.model.entities.base.SubmittedObject; import org.alliancegenome.curation_api.model.entities.ontology.DOTerm; @@ -42,13 +40,11 @@ import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.indexer.config.IndexerConfig; import org.alliancegenome.indexer.indexers.Indexer; -import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; import org.alliancegenome.indexer.indexers.curation.service.AGMDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.AlleleDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.GeneDiseaseAnnotationService; import org.alliancegenome.indexer.indexers.curation.service.VocabularyTermService; -import si.mazi.rescu.RestProxyFactory; import org.alliancegenome.neo4j.repository.DiseaseRepository; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.tuple.Pair; @@ -60,16 +56,11 @@ @Slf4j public class DiseaseAnnotationCurationIndexer extends Indexer { - private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); - private GeneDiseaseAnnotationService geneService; private AlleleDiseaseAnnotationService alleleService; private AGMDiseaseAnnotationService agmService; private VocabularyTermService vocabTermService; - // taxonIdPart (e.g. "9606") -> phylogeneticOrder - private Map speciesOrderLookup; - private Map> closureMap; private Map>> geneMap = new HashMap<>(); private Map>> generatedImplicatedGeneMap = new HashMap<>(); @@ -104,16 +95,6 @@ protected void index(ProcessDisplayHelper display) { DiseaseRepository diseaseRepository = new DiseaseRepository(); closureMap = diseaseRepository.getDOClosureChildMapping(); - speciesOrderLookup = new HashMap<>(); - List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); - for (Species species : allSpecies) { - if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { - String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); - speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); - } - } - log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup"); - indexGenes(); indexAlleles(); indexAGMs(); @@ -390,16 +371,6 @@ private List createAGMDiseaseAnnotationDocuments() return ret; } - private HashMap buildSpeciesOrder(String taxonCurie) { - HashMap order = new HashMap<>(); - String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); - } - order.put(subjectTaxonIdPart, 0); - return order; - } - private void populateBaseDiseaseAnnotationDocument(BiologicalEntity biologicalEntity, DiseaseAnnotation da, DiseaseAnnotationDocument dad) { if (dad.getCountId() == null) { dad.setCountId(uniqueAnnotationCounter.getAndIncrement()); diff --git a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java index ebd99032d..10326adb2 100644 --- a/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java +++ b/agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java @@ -1,21 +1,17 @@ package org.alliancegenome.indexer.indexers.curation; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.concurrent.LinkedBlockingDeque; import org.alliancegenome.core.config.ConfigHelper; import org.alliancegenome.curation_api.interfaces.document.GeneExpressionDocumentInterface; import org.alliancegenome.curation_api.model.document.es.GeneExpressionDocument; -import org.alliancegenome.curation_api.model.entities.Species; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.es.rest.RestConfig; import org.alliancegenome.es.util.ProcessDisplayHelper; import org.alliancegenome.exceptional.client.ExceptionCatcher; import org.alliancegenome.indexer.config.IndexerConfig; import org.alliancegenome.indexer.indexers.Indexer; -import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface; import org.apache.commons.collections.CollectionUtils; import com.fasterxml.jackson.databind.ObjectMapper; @@ -27,13 +23,9 @@ public class GeneExpressionAnnotationIndexer extends Indexer { private final GeneExpressionDocumentInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionDocumentInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); - private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config); private List> idBatches; - // taxonIdPart (e.g. "9606") -> phylogeneticOrder - private Map speciesOrderLookup; - public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) { super(indexerConfig); } @@ -41,16 +33,6 @@ public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) { @Override protected void index(ProcessDisplayHelper display) { try { - speciesOrderLookup = new HashMap<>(); - List allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults(); - for (Species species : allSpecies) { - if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) { - String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", ""); - speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder()); - } - } - log.info("Loaded {} species for speciesOrder lookup", speciesOrderLookup.size()); - log.info("Fetching all gene IDs..."); SearchResponse idsResponse = geneExpressionApi.getGeneIds(); @@ -103,16 +85,6 @@ protected void startSingleThread(LinkedBlockingDeque queue) { } } - private HashMap buildSpeciesOrder(String taxonCurie) { - HashMap order = new HashMap<>(); - String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", ""); - for (Map.Entry entry : speciesOrderLookup.entrySet()) { - order.put(entry.getKey(), entry.getValue()); - } - order.put(subjectTaxonIdPart, 0); - return order; - } - @Override protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) { return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper(); diff --git a/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java b/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java index c9ee5d451..64638626d 100644 --- a/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java +++ b/agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java @@ -117,6 +117,7 @@ public List convertContextToDocument(VariantContext ctx, taxon.setName(speciesType.getName()); taxon.setCurie(speciesType.getTaxonID()); Species species = new Species(); + species.setFullName(speciesType.getName()); species.setAbbreviation(speciesType.getAbbreviation()); taxon.setSpecies(species); }