Skip to content

Commit 3a50833

Browse files
authored
Merge pull request #1561 from alliance-genome/SCRUM-5933
SCRUM-5933 fix speciesOrder for gene page species sorting
2 parents 93707e1 + 803da9a commit 3a50833

File tree

5 files changed

+59
-59
lines changed

5 files changed

+59
-59
lines changed

agr_indexer/src/main/java/org/alliancegenome/indexer/TestSpeciesOrder.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ public static void main(String[] args) throws Exception {
5757
private static HashMap<String, Integer> buildSpeciesOrder(Map<String, Integer> speciesOrderLookup, String taxonCurie) {
5858
HashMap<String, Integer> order = new HashMap<>();
5959
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
60-
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
61-
order.put(entry.getKey(), entry.getValue());
60+
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
61+
for (String key : speciesOrderLookup.keySet()) {
62+
order.put(key, subjectOrder);
6263
}
6364
order.put(subjectTaxonIdPart, 0);
6465
return order;

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,18 @@
1111
import java.util.concurrent.LinkedBlockingDeque;
1212
import java.util.concurrent.TimeUnit;
1313

14+
import org.alliancegenome.core.config.ConfigHelper;
1415
import org.alliancegenome.core.util.StatsCollector;
1516
import org.alliancegenome.curation_api.model.document.es.ESDocument;
17+
import org.alliancegenome.curation_api.model.entities.Species;
18+
import org.alliancegenome.es.rest.RestConfig;
1619
import org.alliancegenome.es.util.EsClientFactory;
1720
import org.alliancegenome.es.util.ProcessDisplayHelper;
1821
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1922
import org.alliancegenome.indexer.config.IndexerConfig;
23+
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
24+
25+
import si.mazi.rescu.RestProxyFactory;
2026
import org.elasticsearch.action.DocWriteRequest;
2127
import org.elasticsearch.action.bulk.BackoffPolicy;
2228
import org.elasticsearch.action.bulk.BulkProcessor;
@@ -43,6 +49,10 @@ public abstract class Indexer extends Thread {
4349
public static String indexName;
4450
protected IndexerConfig indexerConfig;
4551
private RestHighLevelClient searchClient;
52+
53+
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
54+
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
55+
protected Map<String, Integer> speciesOrderLookup;
4656
protected Runtime runtime = Runtime.getRuntime();
4757
protected DecimalFormat df = new DecimalFormat("#");
4858
protected ObjectMapper om = new ObjectMapper();
@@ -230,6 +240,51 @@ protected <T> List<List<T>> partition(List<T> list, int size) {
230240
return parts;
231241
}
232242

243+
protected void loadSpeciesOrderLookup() {
244+
speciesOrderLookup = new HashMap<>();
245+
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
246+
for (Species species : allSpecies) {
247+
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
248+
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
249+
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
250+
}
251+
}
252+
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");
253+
}
254+
255+
/**
256+
* Builds a speciesOrder map for an ES document based on its subject's taxon.
257+
*
258+
* The returned map has one entry per species, keyed by NCBI taxon ID part (e.g. "9606").
259+
* The subject's own species is set to 0; all other species are set to the subject's
260+
* phylogenetic order value. This allows the API to sort by speciesOrder.<focusTaxonId>
261+
* and get the focus species first (0), with all other species sorted by their own
262+
* phylogenetic position — because each species' documents carry that species' own
263+
* phylogenetic order as the non-self value.
264+
*
265+
* Example for a Rat document (phylogeneticOrder=20):
266+
* { "9606": 20, "10116": 0, "10090": 20, "7955": 20, ... }
267+
*
268+
* Example for a Human document (phylogeneticOrder=10):
269+
* { "9606": 0, "10116": 10, "10090": 10, "7955": 10, ... }
270+
*
271+
* When the API sorts by speciesOrder.10090 (mouse gene page), documents sort as:
272+
* Mouse=0, Human=10, Rat=20, Zebrafish=40, ... (each species has a unique value)
273+
*/
274+
protected HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
275+
if (speciesOrderLookup == null) {
276+
loadSpeciesOrderLookup();
277+
}
278+
HashMap<String, Integer> order = new HashMap<>();
279+
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
280+
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
281+
for (String key : speciesOrderLookup.keySet()) {
282+
order.put(key, subjectOrder);
283+
}
284+
order.put(subjectTaxonIdPart, 0);
285+
return order;
286+
}
287+
233288
protected abstract void index(ProcessDisplayHelper display);
234289

235290
protected abstract void startSingleThread(LinkedBlockingDeque<String> queue);

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.alliancegenome.api.entity.DiseaseAnnotationDocument;
2121
import org.alliancegenome.api.entity.GeneDiseaseAnnotationDocument;
2222
import org.alliancegenome.core.helpers.DiseaseAnnotationHelper;
23-
import org.alliancegenome.core.config.ConfigHelper;
2423
import org.alliancegenome.curation_api.model.entities.AGMDiseaseAnnotation;
2524
import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel;
2625
import org.alliancegenome.curation_api.model.entities.Allele;
@@ -32,7 +31,6 @@
3231
import org.alliancegenome.curation_api.model.entities.Gene;
3332
import org.alliancegenome.curation_api.model.entities.GeneDiseaseAnnotation;
3433
import org.alliancegenome.curation_api.model.entities.Reference;
35-
import org.alliancegenome.curation_api.model.entities.Species;
3634
import org.alliancegenome.curation_api.model.entities.VocabularyTerm;
3735
import org.alliancegenome.curation_api.model.entities.base.SubmittedObject;
3836
import org.alliancegenome.curation_api.model.entities.ontology.DOTerm;
@@ -42,13 +40,11 @@
4240
import org.alliancegenome.es.util.ProcessDisplayHelper;
4341
import org.alliancegenome.indexer.config.IndexerConfig;
4442
import org.alliancegenome.indexer.indexers.Indexer;
45-
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
4643
import org.alliancegenome.indexer.indexers.curation.service.AGMDiseaseAnnotationService;
4744
import org.alliancegenome.indexer.indexers.curation.service.AlleleDiseaseAnnotationService;
4845
import org.alliancegenome.indexer.indexers.curation.service.GeneDiseaseAnnotationService;
4946
import org.alliancegenome.indexer.indexers.curation.service.VocabularyTermService;
5047

51-
import si.mazi.rescu.RestProxyFactory;
5248
import org.alliancegenome.neo4j.repository.DiseaseRepository;
5349
import org.apache.commons.collections4.CollectionUtils;
5450
import org.apache.commons.lang3.tuple.Pair;
@@ -60,16 +56,11 @@
6056
@Slf4j
6157
public class DiseaseAnnotationCurationIndexer extends Indexer {
6258

63-
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
64-
6559
private GeneDiseaseAnnotationService geneService;
6660
private AlleleDiseaseAnnotationService alleleService;
6761
private AGMDiseaseAnnotationService agmService;
6862
private VocabularyTermService vocabTermService;
6963

70-
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
71-
private Map<String, Integer> speciesOrderLookup;
72-
7364
private Map<String, Set<String>> closureMap;
7465
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> geneMap = new HashMap<>();
7566
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> generatedImplicatedGeneMap = new HashMap<>();
@@ -104,16 +95,6 @@ protected void index(ProcessDisplayHelper display) {
10495
DiseaseRepository diseaseRepository = new DiseaseRepository();
10596
closureMap = diseaseRepository.getDOClosureChildMapping();
10697

107-
speciesOrderLookup = new HashMap<>();
108-
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
109-
for (Species species : allSpecies) {
110-
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
111-
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
112-
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
113-
}
114-
}
115-
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");
116-
11798
indexGenes();
11899
indexAlleles();
119100
indexAGMs();
@@ -390,16 +371,6 @@ private List<AGMDiseaseAnnotationDocument> createAGMDiseaseAnnotationDocuments()
390371
return ret;
391372
}
392373

393-
private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
394-
HashMap<String, Integer> order = new HashMap<>();
395-
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
396-
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
397-
order.put(entry.getKey(), entry.getValue());
398-
}
399-
order.put(subjectTaxonIdPart, 0);
400-
return order;
401-
}
402-
403374
private void populateBaseDiseaseAnnotationDocument(BiologicalEntity biologicalEntity, DiseaseAnnotation da, DiseaseAnnotationDocument dad) {
404375
if (dad.getCountId() == null) {
405376
dad.setCountId(uniqueAnnotationCounter.getAndIncrement());

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
11
package org.alliancegenome.indexer.indexers.curation;
22

3-
import java.util.HashMap;
43
import java.util.List;
5-
import java.util.Map;
64
import java.util.concurrent.LinkedBlockingDeque;
75

86
import org.alliancegenome.core.config.ConfigHelper;
97
import org.alliancegenome.curation_api.interfaces.document.GeneExpressionDocumentInterface;
108
import org.alliancegenome.curation_api.model.document.es.GeneExpressionDocument;
11-
import org.alliancegenome.curation_api.model.entities.Species;
129
import org.alliancegenome.curation_api.response.SearchResponse;
1310
import org.alliancegenome.es.rest.RestConfig;
1411
import org.alliancegenome.es.util.ProcessDisplayHelper;
1512
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1613
import org.alliancegenome.indexer.config.IndexerConfig;
1714
import org.alliancegenome.indexer.indexers.Indexer;
18-
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
1915
import org.apache.commons.collections.CollectionUtils;
2016

2117
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -27,30 +23,16 @@
2723
public class GeneExpressionAnnotationIndexer extends Indexer {
2824

2925
private final GeneExpressionDocumentInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionDocumentInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
30-
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
3126

3227
private List<List<String>> idBatches;
3328

34-
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
35-
private Map<String, Integer> speciesOrderLookup;
36-
3729
public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) {
3830
super(indexerConfig);
3931
}
4032

4133
@Override
4234
protected void index(ProcessDisplayHelper display) {
4335
try {
44-
speciesOrderLookup = new HashMap<>();
45-
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
46-
for (Species species : allSpecies) {
47-
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
48-
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
49-
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
50-
}
51-
}
52-
log.info("Loaded {} species for speciesOrder lookup", speciesOrderLookup.size());
53-
5436
log.info("Fetching all gene IDs...");
5537
SearchResponse<String> idsResponse = geneExpressionApi.getGeneIds();
5638

@@ -103,16 +85,6 @@ protected void startSingleThread(LinkedBlockingDeque<String> queue) {
10385
}
10486
}
10587

106-
private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
107-
HashMap<String, Integer> order = new HashMap<>();
108-
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
109-
for (Map.Entry<String, Integer> entry : speciesOrderLookup.entrySet()) {
110-
order.put(entry.getKey(), entry.getValue());
111-
}
112-
order.put(subjectTaxonIdPart, 0);
113-
return order;
114-
}
115-
11688
@Override
11789
protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) {
11890
return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper();

agr_java_core/src/main/java/org/alliancegenome/core/variant/converters/VariantSummaryConverter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public List<VariantSummaryDocument> convertContextToDocument(VariantContext ctx,
117117
taxon.setName(speciesType.getName());
118118
taxon.setCurie(speciesType.getTaxonID());
119119
Species species = new Species();
120+
species.setFullName(speciesType.getName());
120121
species.setAbbreviation(speciesType.getAbbreviation());
121122
taxon.setSpecies(species);
122123
}

0 commit comments

Comments
 (0)