Skip to content

Commit 304de9b

Browse files
committed
refactor: move speciesOrder lookup and builder into base Indexer class
Consolidate the duplicated SpeciesInterface proxy, speciesOrderLookup map, loadSpeciesOrderLookup(), and buildSpeciesOrder() from both DiseaseAnnotationCurationIndexer and GeneExpressionAnnotationIndexer into the shared Indexer base class.
1 parent a69ece8 commit 304de9b

File tree

3 files changed

+35
-57
lines changed

3 files changed

+35
-57
lines changed

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/Indexer.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,18 @@
1111
import java.util.concurrent.LinkedBlockingDeque;
1212
import java.util.concurrent.TimeUnit;
1313

14+
import org.alliancegenome.core.config.ConfigHelper;
1415
import org.alliancegenome.core.util.StatsCollector;
1516
import org.alliancegenome.curation_api.model.document.es.ESDocument;
17+
import org.alliancegenome.curation_api.model.entities.Species;
18+
import org.alliancegenome.es.rest.RestConfig;
1619
import org.alliancegenome.es.util.EsClientFactory;
1720
import org.alliancegenome.es.util.ProcessDisplayHelper;
1821
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1922
import org.alliancegenome.indexer.config.IndexerConfig;
23+
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
24+
25+
import si.mazi.rescu.RestProxyFactory;
2026
import org.elasticsearch.action.DocWriteRequest;
2127
import org.elasticsearch.action.bulk.BackoffPolicy;
2228
import org.elasticsearch.action.bulk.BulkProcessor;
@@ -43,6 +49,10 @@ public abstract class Indexer extends Thread {
4349
public static String indexName;
4450
protected IndexerConfig indexerConfig;
4551
private RestHighLevelClient searchClient;
52+
53+
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
54+
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
55+
protected Map<String, Integer> speciesOrderLookup;
4656
protected Runtime runtime = Runtime.getRuntime();
4757
protected DecimalFormat df = new DecimalFormat("#");
4858
protected ObjectMapper om = new ObjectMapper();
@@ -230,6 +240,29 @@ protected <T> List<List<T>> partition(List<T> list, int size) {
230240
return parts;
231241
}
232242

243+
protected void loadSpeciesOrderLookup() {
244+
speciesOrderLookup = new HashMap<>();
245+
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
246+
for (Species species : allSpecies) {
247+
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
248+
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
249+
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
250+
}
251+
}
252+
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");
253+
}
254+
255+
protected HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
256+
HashMap<String, Integer> order = new HashMap<>();
257+
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
258+
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
259+
for (String key : speciesOrderLookup.keySet()) {
260+
order.put(key, subjectOrder);
261+
}
262+
order.put(subjectTaxonIdPart, 0);
263+
return order;
264+
}
265+
233266
protected abstract void index(ProcessDisplayHelper display);
234267

235268
protected abstract void startSingleThread(LinkedBlockingDeque<String> queue);

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/DiseaseAnnotationCurationIndexer.java

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.alliancegenome.api.entity.DiseaseAnnotationDocument;
2121
import org.alliancegenome.api.entity.GeneDiseaseAnnotationDocument;
2222
import org.alliancegenome.core.helpers.DiseaseAnnotationHelper;
23-
import org.alliancegenome.core.config.ConfigHelper;
2423
import org.alliancegenome.curation_api.model.entities.AGMDiseaseAnnotation;
2524
import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel;
2625
import org.alliancegenome.curation_api.model.entities.Allele;
@@ -32,7 +31,6 @@
3231
import org.alliancegenome.curation_api.model.entities.Gene;
3332
import org.alliancegenome.curation_api.model.entities.GeneDiseaseAnnotation;
3433
import org.alliancegenome.curation_api.model.entities.Reference;
35-
import org.alliancegenome.curation_api.model.entities.Species;
3634
import org.alliancegenome.curation_api.model.entities.VocabularyTerm;
3735
import org.alliancegenome.curation_api.model.entities.base.SubmittedObject;
3836
import org.alliancegenome.curation_api.model.entities.ontology.DOTerm;
@@ -42,13 +40,11 @@
4240
import org.alliancegenome.es.util.ProcessDisplayHelper;
4341
import org.alliancegenome.indexer.config.IndexerConfig;
4442
import org.alliancegenome.indexer.indexers.Indexer;
45-
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
4643
import org.alliancegenome.indexer.indexers.curation.service.AGMDiseaseAnnotationService;
4744
import org.alliancegenome.indexer.indexers.curation.service.AlleleDiseaseAnnotationService;
4845
import org.alliancegenome.indexer.indexers.curation.service.GeneDiseaseAnnotationService;
4946
import org.alliancegenome.indexer.indexers.curation.service.VocabularyTermService;
5047

51-
import si.mazi.rescu.RestProxyFactory;
5248
import org.alliancegenome.neo4j.repository.DiseaseRepository;
5349
import org.apache.commons.collections4.CollectionUtils;
5450
import org.apache.commons.lang3.tuple.Pair;
@@ -60,16 +56,11 @@
6056
@Slf4j
6157
public class DiseaseAnnotationCurationIndexer extends Indexer {
6258

63-
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
64-
6559
private GeneDiseaseAnnotationService geneService;
6660
private AlleleDiseaseAnnotationService alleleService;
6761
private AGMDiseaseAnnotationService agmService;
6862
private VocabularyTermService vocabTermService;
6963

70-
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
71-
private Map<String, Integer> speciesOrderLookup;
72-
7364
private Map<String, Set<String>> closureMap;
7465
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> geneMap = new HashMap<>();
7566
private Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> generatedImplicatedGeneMap = new HashMap<>();
@@ -104,15 +95,7 @@ protected void index(ProcessDisplayHelper display) {
10495
DiseaseRepository diseaseRepository = new DiseaseRepository();
10596
closureMap = diseaseRepository.getDOClosureChildMapping();
10697

107-
speciesOrderLookup = new HashMap<>();
108-
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
109-
for (Species species : allSpecies) {
110-
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
111-
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
112-
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
113-
}
114-
}
115-
log.info("Loaded " + speciesOrderLookup.size() + " species for speciesOrder lookup");
98+
loadSpeciesOrderLookup();
11699

117100
indexGenes();
118101
indexAlleles();
@@ -390,17 +373,6 @@ private List<AGMDiseaseAnnotationDocument> createAGMDiseaseAnnotationDocuments()
390373
return ret;
391374
}
392375

393-
private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
394-
HashMap<String, Integer> order = new HashMap<>();
395-
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
396-
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
397-
for (String key : speciesOrderLookup.keySet()) {
398-
order.put(key, subjectOrder);
399-
}
400-
order.put(subjectTaxonIdPart, 0);
401-
return order;
402-
}
403-
404376
private void populateBaseDiseaseAnnotationDocument(BiologicalEntity biologicalEntity, DiseaseAnnotation da, DiseaseAnnotationDocument dad) {
405377
if (dad.getCountId() == null) {
406378
dad.setCountId(uniqueAnnotationCounter.getAndIncrement());

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneExpressionAnnotationIndexer.java

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
11
package org.alliancegenome.indexer.indexers.curation;
22

3-
import java.util.HashMap;
43
import java.util.List;
5-
import java.util.Map;
64
import java.util.concurrent.LinkedBlockingDeque;
75

86
import org.alliancegenome.core.config.ConfigHelper;
97
import org.alliancegenome.curation_api.interfaces.document.GeneExpressionDocumentInterface;
108
import org.alliancegenome.curation_api.model.document.es.GeneExpressionDocument;
11-
import org.alliancegenome.curation_api.model.entities.Species;
129
import org.alliancegenome.curation_api.response.SearchResponse;
1310
import org.alliancegenome.es.rest.RestConfig;
1411
import org.alliancegenome.es.util.ProcessDisplayHelper;
1512
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1613
import org.alliancegenome.indexer.config.IndexerConfig;
1714
import org.alliancegenome.indexer.indexers.Indexer;
18-
import org.alliancegenome.indexer.indexers.curation.interfaces.SpeciesInterface;
1915
import org.apache.commons.collections.CollectionUtils;
2016

2117
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -27,29 +23,17 @@
2723
public class GeneExpressionAnnotationIndexer extends Indexer {
2824

2925
private final GeneExpressionDocumentInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionDocumentInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
30-
private final SpeciesInterface speciesApi = RestProxyFactory.createProxy(SpeciesInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
3126

3227
private List<List<String>> idBatches;
3328

34-
// taxonIdPart (e.g. "9606") -> phylogeneticOrder
35-
private Map<String, Integer> speciesOrderLookup;
36-
3729
public GeneExpressionAnnotationIndexer(IndexerConfig indexerConfig) {
3830
super(indexerConfig);
3931
}
4032

4133
@Override
4234
protected void index(ProcessDisplayHelper display) {
4335
try {
44-
speciesOrderLookup = new HashMap<>();
45-
List<Species> allSpecies = speciesApi.findForPublic(0, 100, "FieldsOnly", new HashMap<>()).getResults();
46-
for (Species species : allSpecies) {
47-
if (species.getTaxon() != null && species.getPhylogeneticOrder() != null) {
48-
String taxonIdPart = species.getTaxon().getCurie().replace("NCBITaxon:", "");
49-
speciesOrderLookup.put(taxonIdPart, species.getPhylogeneticOrder());
50-
}
51-
}
52-
log.info("Loaded {} species for speciesOrder lookup", speciesOrderLookup.size());
36+
loadSpeciesOrderLookup();
5337

5438
log.info("Fetching all gene IDs...");
5539
SearchResponse<String> idsResponse = geneExpressionApi.getGeneIds();
@@ -103,17 +87,6 @@ protected void startSingleThread(LinkedBlockingDeque<String> queue) {
10387
}
10488
}
10589

106-
private HashMap<String, Integer> buildSpeciesOrder(String taxonCurie) {
107-
HashMap<String, Integer> order = new HashMap<>();
108-
String subjectTaxonIdPart = taxonCurie.replace("NCBITaxon:", "");
109-
Integer subjectOrder = speciesOrderLookup.getOrDefault(subjectTaxonIdPart, 0);
110-
for (String key : speciesOrderLookup.keySet()) {
111-
order.put(key, subjectOrder);
112-
}
113-
order.put(subjectTaxonIdPart, 0);
114-
return order;
115-
}
116-
11790
@Override
11891
protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) {
11992
return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper();

0 commit comments

Comments
 (0)