Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ public static void main(String[] args) throws Exception {
private static HashMap<String, Integer> buildSpeciesOrder(Map<String, Integer> speciesOrderLookup, String taxonCurie) {
HashMap<String, Integer> order = new HashMap<>();
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
order.put(entry.getKey(), entry.getValue());
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
for (String key : speciesOrderLookup.keySet()) {
order.put(key, subjectOrder);
}
order.put(subjectTaxonIdPart, 0);
return order;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;

import org.alliancegenome.core.config.ConfigHelper;
import org.alliancegenome.core.util.StatsCollector;
import org.alliancegenome.curation_api.model.document.es.ESDocument;
import org.alliancegenome.curation_api.model.entities.Species;
import org.alliancegenome.es.rest.RestConfig;
import org.alliancegenome.es.util.EsClientFactory;
import org.alliancegenome.es.util.ProcessDisplayHelper;
import org.alliancegenome.exceptional.client.ExceptionCatcher;
import org.alliancegenome.indexer.config.IndexerConfig;
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;

import si.mazi.rescu.RestProxyFactory;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
Expand All @@ -43,6 +49,10 @@ public abstract class Indexer extends Thread {
public static String indexName;
protected IndexerConfig indexerConfig;
private RestHighLevelClient searchClient;

private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
protected Map<String, Integer> speciesOrderLookup;
protected Runtime runtime = Runtime.getRuntime();
protected DecimalFormat df = new DecimalFormat("#");
protected ObjectMapper om = new ObjectMapper();
Expand Down Expand Up @@ -230,6 +240,51 @@ protected <T> List<List<T>> partition(List<T> list, int size) {
return parts;
}

protected void loadSpeciesOrderLookup() {
speciesOrderLookup = new HashMap<>();
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
for (Species species : allSpecies) {
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
}
}
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");
}

/**
* Builds a speciesOrder map for an ES document based on its subject's taxon.
*
* The returned map has one entry per species, keyed by NCBI taxon ID part (e.g. "9606").
* The subject's own species is set to 0; all other species are set to the subject's
* phylogenetic order value. This allows the API to sort by speciesOrder.<focusTaxonId>
* and get the focus species first (0), with all other species sorted by their own
* phylogenetic position — because each species' documents carry that species' own
* phylogenetic order as the non-self value.
*
* Example for a Rat document (phylogeneticOrder=20):
* { "9606": 20, "10116": 0, "10090": 20, "7955": 20, ... }
*
* Example for a Human document (phylogeneticOrder=10):
* { "9606": 0, "10116": 10, "10090": 10, "7955": 10, ... }
*
* When the API sorts by speciesOrder.10090 (mouse gene page), documents sort as:
* Mouse=0, Human=10, Rat=20, Zebrafish=40, ... (each species has a unique value)
*/
protected HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
if (speciesOrderLookup == null) {
loadSpeciesOrderLookup();
}
HashMap<String, Integer> order = new HashMap<>();
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
for (String key : speciesOrderLookup.keySet()) {
order.put(key, subjectOrder);
}
order.put(subjectTaxonIdPart, 0);
return order;
}

protected abstract void index(ProcessDisplayHelper display);

protected abstract void startSingleThread(LinkedBlockingDeque<String> queue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.alliancegenome.api.entity.DiseaseAnnotationDocument;
import org.alliancegenome.api.entity.GeneDiseaseAnnotationDocument;
import org.alliancegenome.core.helpers.DiseaseAnnotationHelper;
import org.alliancegenome.core.config.ConfigHelper;
import org.alliancegenome.curation_api.model.entities.AGMDiseaseAnnotation;
import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel;
import org.alliancegenome.curation_api.model.entities.Allele;
Expand All @@ -32,7 +31,6 @@
import org.alliancegenome.curation_api.model.entities.Gene;
import org.alliancegenome.curation_api.model.entities.GeneDiseaseAnnotation;
import org.alliancegenome.curation_api.model.entities.Reference;
import org.alliancegenome.curation_api.model.entities.Species;
import org.alliancegenome.curation_api.model.entities.VocabularyTerm;
import org.alliancegenome.curation_api.model.entities.base.SubmittedObject;
import org.alliancegenome.curation_api.model.entities.ontology.DOTerm;
Expand All @@ -42,13 +40,11 @@
import org.alliancegenome.es.util.ProcessDisplayHelper;
import org.alliancegenome.indexer.config.IndexerConfig;
import org.alliancegenome.indexer.indexers.Indexer;
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
import org.alliancegenome.indexer.indexers.curation.service.AGMDiseaseAnnotationService;
import org.alliancegenome.indexer.indexers.curation.service.AlleleDiseaseAnnotationService;
import org.alliancegenome.indexer.indexers.curation.service.GeneDiseaseAnnotationService;
import org.alliancegenome.indexer.indexers.curation.service.VocabularyTermService;

import si.mazi.rescu.RestProxyFactory;
import org.alliancegenome.neo4j.repository.DiseaseRepository;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.tuple.Pair;
Expand All @@ -60,16 +56,11 @@
@Slf4j
public class DiseaseAnnotationCurationIndexer extends Indexer {

private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);

private GeneDiseaseAnnotationService geneService;
private AlleleDiseaseAnnotationService alleleService;
private AGMDiseaseAnnotationService agmService;
private VocabularyTermService vocabTermService;

// taxonIdPart (e.g. "9606") -> phylogeneticOrder
private Map<String, Integer> speciesOrderLookup;

private Map<String, Set<String>> closureMap;
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> geneMap = new HashMap<>();
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> generatedImplicatedGeneMap = new HashMap<>();
Expand Down Expand Up @@ -104,16 +95,6 @@ protected void index(ProcessDisplayHelper display) {
DiseaseRepository diseaseRepository = new DiseaseRepository();
closureMap = diseaseRepository.getDOClosureChildMapping();

speciesOrderLookup = new HashMap<>();
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
for (Species species : allSpecies) {
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
}
}
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");

indexGenes();
indexAlleles();
indexAGMs();
Expand Down Expand Up @@ -390,16 +371,6 @@ private List<AGMDiseaseAnnotationDocument> createAGMDiseaseAnnotationDocuments()
return ret;
}

private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
HashMap<String, Integer> order = new HashMap<>();
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
order.put(entry.getKey(), entry.getValue());
}
order.put(subjectTaxonIdPart, 0);
return order;
}

private void populateBaseDiseaseAnnotationDocument(BiologicalEntity biologicalEntity, DiseaseAnnotation da, DiseaseAnnotationDocument dad) {
if (dad.getCountId() == null) {
dad.setCountId(uniqueAnnotationCounter.getAndIncrement());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
package org.alliancegenome.indexer.indexers.curation;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;

import org.alliancegenome.core.config.ConfigHelper;
import org.alliancegenome.curation_api.interfaces.document.GeneExpressionDocumentInterface;
import org.alliancegenome.curation_api.model.document.es.GeneExpressionDocument;
import org.alliancegenome.curation_api.model.entities.Species;
import org.alliancegenome.curation_api.response.SearchResponse;
import org.alliancegenome.es.rest.RestConfig;
import org.alliancegenome.es.util.ProcessDisplayHelper;
import org.alliancegenome.exceptional.client.ExceptionCatcher;
import org.alliancegenome.indexer.config.IndexerConfig;
import org.alliancegenome.indexer.indexers.Indexer;
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
import org.apache.commons.collections.CollectionUtils;

import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -27,30 +23,16 @@
public class GeneExpressionAnnotationIndexer extends Indexer {

private final GeneExpressionDocumentInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionDocumentInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);

private List<List<String>> idBatches;

// taxonIdPart (e.g. "9606") -> phylogeneticOrder
private Map<String, Integer> speciesOrderLookup;

public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) {
super(indexerConfig);
}

@Override
protected void index(ProcessDisplayHelper display) {
try {
speciesOrderLookup = new HashMap<>();
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
for (Species species : allSpecies) {
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
}
}
log.info("Loaded {} species for speciesOrder lookup", speciesOrderLookup.size());

log.info("Fetching all gene IDs...");
SearchResponse<String> idsResponse = geneExpressionApi.getGeneIds();

Expand Down Expand Up @@ -103,16 +85,6 @@ protected void startSingleThread(LinkedBlockingDeque<String> queue) {
}
}

private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
HashMap<String, Integer> order = new HashMap<>();
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
order.put(entry.getKey(), entry.getValue());
}
order.put(subjectTaxonIdPart, 0);
return order;
}

@Override
protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) {
return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ public List<VariantSummaryDocument> convertContextToDocument(VariantContext ctx,
taxon.setName(speciesType.getName());
taxon.setCurie(speciesType.getTaxonID());
Species species = new Species();
species.setFullName(speciesType.getName());
species.setAbbreviation(speciesType.getAbbreviation());
taxon.setSpecies(species);
}
Expand Down
Loading