Skip to content

Commit fa60120

Browse files
committed
refactor: remove Neo4j ID set filtering from curation indexers
The curation API is now the authoritative data source. The Neo4j ID cross-reference filters were a legacy gate from when Neo4j was the source of truth. Existing obsolete/internal checks remain as the data quality filter. Removed Neo4j filtering from: - GeneToGeneOrthologyIndexer (objectGene filter) - GeneMolecularInteractionService (interacting genes filter) - GeneGeneticInteractionService (genes + alleles filter) - GenePhenotypeAnnotationService (subject gene filter) - AllelePhenotypeAnnotationService (subject allele filter) - AGMPhenotypeAnnotationService (subject AGM filter) - SiteMapAccessionCurationIndexer (retainAll intersection) - BaseService (all Neo4j repository usage and ID set methods) - BaseInteractionService (hasInteractingGenesInNeo method)
1 parent 3a50833 commit fa60120

File tree

9 files changed

+20
-168
lines changed

9 files changed

+20
-168
lines changed

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/GeneToGeneOrthologyIndexer.java

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.alliancegenome.indexer.indexers.curation;
22

3-
import java.util.ArrayList;
43
import java.util.HashSet;
54
import java.util.List;
65
import java.util.Map;
@@ -18,7 +17,6 @@
1817
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1918
import org.alliancegenome.indexer.config.IndexerConfig;
2019
import org.alliancegenome.indexer.indexers.Indexer;
21-
import org.alliancegenome.indexer.indexers.curation.service.BaseService;
2220
import org.apache.commons.collections4.CollectionUtils;
2321

2422
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -33,7 +31,6 @@ public class GeneToGeneOrthologyIndexer extends Indexer {
3331
private final GeneExpressionAnnotationCrudInterface geneExpressionApi = RestProxyFactory.createProxy(GeneExpressionAnnotationCrudInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
3432
private final GeneDiseaseAnnotationCrudInterface geneDiseaseApi = RestProxyFactory.createProxy(GeneDiseaseAnnotationCrudInterface.class, ConfigHelper.getCurationApiUrl(), RestConfig.config);
3533

36-
private Set<String> allNeoGeneIDs;
3734
private Set<String> geneExpressionSet;
3835
private Set<String> geneAnnotationSet;
3936

@@ -45,8 +42,6 @@ public GeneToGeneOrthologyIndexer(IndexerConfig config) {
4542

4643
@Override
4744
public void index(ProcessDisplayHelper display) {
48-
BaseService baseService = new BaseService();
49-
allNeoGeneIDs = baseService.getAllNeoGeneIDs();
5045
geneExpressionSet = new HashSet<>(geneExpressionApi.annotatedGeneList().getEntities());
5146
geneAnnotationSet = new HashSet<>(geneDiseaseApi.annotatedGeneList().getEntities());
5247

@@ -99,8 +94,7 @@ protected void startSingleThread(LinkedBlockingDeque<String> queue) {
9994
}
10095
}
10196

102-
List<GeneToGeneOrthologyDocument> filteredResults = filterValidResults(results);
103-
indexDocuments(filteredResults);
97+
indexDocuments(results);
10498
} catch (Exception e) {
10599
log.error("Error while indexing...", e);
106100
ExceptionCatcher.report(e);
@@ -115,14 +109,4 @@ protected ObjectMapper customizeObjectMapper(ObjectMapper objectMapper) {
115109
return RestConfig.config.getJacksonObjectMapperFactory().createObjectMapper();
116110
}
117111

118-
private List<GeneToGeneOrthologyDocument> filterValidResults(List<GeneToGeneOrthologyDocument> docs) {
119-
List<GeneToGeneOrthologyDocument> result = new ArrayList<>();
120-
for (GeneToGeneOrthologyDocument doc : docs) {
121-
String curie = doc.getGeneToGeneOrthologyGenerated().getObjectGene().getIdentifier();
122-
if (allNeoGeneIDs.contains(curie)) {
123-
result.add(doc);
124-
}
125-
}
126-
return result;
127-
}
128112
}

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/SiteMapAccessionCurationIndexer.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import org.alliancegenome.exceptional.client.ExceptionCatcher;
1313
import org.alliancegenome.indexer.config.IndexerConfig;
1414
import org.alliancegenome.indexer.indexers.Indexer;
15-
import org.alliancegenome.indexer.indexers.curation.service.BaseService;
1615
import org.alliancegenome.indexer.indexers.document.SiteMapIdDocument;
1716

1817
import si.mazi.rescu.RestProxyFactory;
@@ -33,10 +32,6 @@ protected void index(ProcessDisplayHelper display) {
3332

3433
Map<String, List<String>> map = document.getIdsByType();
3534

36-
BaseService base = new BaseService();
37-
38-
map.get("allele").retainAll(base.getAllNeoAlleleIDs());
39-
4035
List<List<String>> alleleIdLists = partition(map.get("allele"), 15000);
4136

4237
for (int i = 0; i < alleleIdLists.size(); i++) {
@@ -49,8 +44,6 @@ protected void index(ProcessDisplayHelper display) {
4944
indexDocument(doc);
5045
}
5146

52-
map.get("gene").retainAll(base.getAllNeoGeneIDs());
53-
5447
List<List<String>> geneIdLists = partition(map.get("gene"), 15000);
5548

5649
for (int i = 0; i < geneIdLists.size(); i++) {

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/AGMPhenotypeAnnotationService.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,7 @@ public void run() {
109109

110110
SearchResponse<AGMPhenotypeAnnotation> response = agmApi.findForPublic(page, bufferSize, params);
111111
for (AGMPhenotypeAnnotation pa : response.getResults()) {
112-
if (isValidNeoEntity(getAllNeoModelIDs(), pa.getPhenotypeAnnotationSubject().getIdentifier())) {
113-
fullList.offer(pa);
114-
}
112+
fullList.offer(pa);
115113
display.progressProcess();
116114
}
117115

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/AllelePhenotypeAnnotationService.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,7 @@ public void run() {
107107

108108
SearchResponse<AllelePhenotypeAnnotation> response = alleleApi.findForPublic(page, bufferSize, params);
109109
for (AllelePhenotypeAnnotation pa : response.getResults()) {
110-
if (isValidNeoEntity(getAllNeoAlleleIDs(), pa.getPhenotypeAnnotationSubject().getIdentifier())) {
111-
fullList.offer(pa);
112-
}
110+
fullList.offer(pa);
113111
display.progressProcess();
114112
}
115113

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/BaseInteractionService.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,6 @@ protected <E extends GeneInteraction> boolean hasNoObsoletedOrInternalEntities(E
3838
return hasNoExcludedEntities(entitiesToBeValidated);
3939
}
4040

41-
protected <E extends GeneInteraction> boolean hasInteractingGenesInNeo(E interaction) {
42-
if (interaction.getGeneAssociationSubject() != null) {
43-
if (!isValidNeoEntity(getAllNeoGeneIDs(), interaction.getGeneAssociationSubject().getIdentifier())) {
44-
return false;
45-
}
46-
}
47-
if (interaction.getGeneGeneAssociationObject() != null) {
48-
if (!isValidNeoEntity(getAllNeoGeneIDs(), interaction.getGeneGeneAssociationObject().getIdentifier())) {
49-
return false;
50-
}
51-
}
52-
return true;
53-
}
54-
5541
protected <E extends GeneInteraction> E reverseInteraction(E forwardInteraction, E reverseInteraction) {
5642
if (forwardInteraction.getGeneAssociationSubject() == null || forwardInteraction.getGeneGeneAssociationObject() == null) {
5743
return null;
Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,98 +1,17 @@
11
package org.alliancegenome.indexer.indexers.curation.service;
22

33
import java.io.File;
4-
import java.util.ArrayList;
5-
import java.util.HashSet;
64
import java.util.List;
75
import java.util.concurrent.atomic.AtomicBoolean;
86

97
import org.alliancegenome.curation_api.model.entities.base.AuditedObject;
10-
import org.alliancegenome.neo4j.repository.AlleleRepository;
11-
import org.alliancegenome.neo4j.repository.GeneRepository;
12-
import org.alliancegenome.neo4j.repository.VariantRepository;
13-
import org.apache.commons.collections4.CollectionUtils;
148

159
import lombok.extern.log4j.Log4j2;
1610
import net.nilosplace.process_display.util.ObjectFileStorage;
1711

1812
@Log4j2
1913
public class BaseService {
2014

21-
private static HashSet<String> allNeoAlleleIDs;
22-
private static HashSet<String> allNeoGeneIDs;
23-
private static HashSet<String> allNeoModelIDs;
24-
private static HashSet<String> allNeoVariantIDs;
25-
26-
public HashSet<String> getAllNeoAlleleIDs() {
27-
if (allNeoAlleleIDs == null) {
28-
String alleleIdsFileName = "allele_ids.gz";
29-
List<String> alleleList = readFromCache(alleleIdsFileName, List.class);
30-
31-
if (CollectionUtils.isNotEmpty(alleleList)) {
32-
allNeoAlleleIDs = new HashSet<>(alleleList);
33-
} else {
34-
AlleleRepository alleleRepository = new AlleleRepository();
35-
allNeoAlleleIDs = new HashSet<>(alleleRepository.getAllAlleleIDs());
36-
alleleRepository.close();
37-
writeToCache(alleleIdsFileName, new ArrayList<>(allNeoAlleleIDs));
38-
}
39-
}
40-
return allNeoAlleleIDs;
41-
}
42-
43-
public HashSet<String> getAllNeoGeneIDs() {
44-
45-
if (allNeoGeneIDs == null) {
46-
String geneIdsFileName = "gene_ids.gz";
47-
List<String> geneList = readFromCache(geneIdsFileName, List.class);
48-
49-
if (CollectionUtils.isNotEmpty(geneList)) {
50-
allNeoGeneIDs = new HashSet<>(geneList);
51-
} else {
52-
GeneRepository geneRepository = new GeneRepository();
53-
allNeoGeneIDs = new HashSet<>(geneRepository.getAllGeneKeys());
54-
geneRepository.close();
55-
writeToCache(geneIdsFileName, new ArrayList<>(allNeoGeneIDs));
56-
}
57-
}
58-
59-
return allNeoGeneIDs;
60-
}
61-
62-
public HashSet<String> getAllNeoModelIDs() {
63-
if (allNeoModelIDs == null) {
64-
String modelIdsFileName = "model_ids.gz";
65-
List<String> modelList = readFromCache(modelIdsFileName, List.class);
66-
67-
if (CollectionUtils.isNotEmpty(modelList)) {
68-
allNeoModelIDs = new HashSet<>(modelList);
69-
} else {
70-
AlleleRepository alleleRepository = new AlleleRepository();
71-
allNeoModelIDs = new HashSet<>(alleleRepository.getAllModelKeys());
72-
alleleRepository.close();
73-
writeToCache(modelIdsFileName, new ArrayList<>(allNeoModelIDs));
74-
}
75-
}
76-
return allNeoModelIDs;
77-
}
78-
79-
public HashSet<String> getAllNeoVariantIDs() {
80-
if (allNeoVariantIDs == null) {
81-
String variantIdsFileName = "variant_ids.gz";
82-
List<String> variantList = readFromCache(variantIdsFileName, List.class);
83-
84-
if (CollectionUtils.isNotEmpty(variantList)) {
85-
allNeoVariantIDs = new HashSet<>(variantList);
86-
} else {
87-
VariantRepository variantRepository = new VariantRepository();
88-
allNeoVariantIDs = new HashSet<>(variantRepository.getAllVariantKeys());
89-
variantRepository.close();
90-
writeToCache(variantIdsFileName, new ArrayList<>(allNeoVariantIDs));
91-
}
92-
}
93-
return allNeoVariantIDs;
94-
}
95-
9615
protected <E> E readFromCache(String fileName, Class<E> clazz) {
9716
try {
9817
ObjectFileStorage<E> storage = new ObjectFileStorage<>();
@@ -128,8 +47,4 @@ protected boolean hasNoExcludedEntities(List<AuditedObject> entitiesToBeValidate
12847
return hasNoExcludedEntities.get();
12948
}
13049

131-
protected static boolean isValidNeoEntity(HashSet<String> neoEntityIds, String curie) {
132-
return neoEntityIds.contains(curie);
133-
}
134-
13550
}

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/GeneGeneticInteractionService.java

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,22 @@ public List<GeneGeneticInteraction> getFilteredAndReversedInteractions(List<Gene
1313
List<GeneGeneticInteraction> validInteractions = new ArrayList<>();
1414

1515
for (GeneGeneticInteraction interaction: forwardInteractions) {
16-
if (hasPerturbatingAllelesInNeo(interaction)) {
17-
if (hasInteractingGenesInNeo(interaction)) {
18-
if (hasNoObsoletedOrInternalEntities(interaction)) {
19-
validInteractions.add(interaction);
20-
try {
21-
GeneGeneticInteraction reverseInteraction = generateReverseInteraction(interaction);
22-
if (reverseInteraction != null) {
23-
validInteractions.add(reverseInteraction);
24-
}
25-
} catch (IOException e) {
26-
e.printStackTrace();
27-
}
16+
if (hasNoObsoletedOrInternalEntities(interaction)) {
17+
validInteractions.add(interaction);
18+
try {
19+
GeneGeneticInteraction reverseInteraction = generateReverseInteraction(interaction);
20+
if (reverseInteraction != null) {
21+
validInteractions.add(reverseInteraction);
2822
}
23+
} catch (IOException e) {
24+
e.printStackTrace();
2925
}
3026
}
3127
}
3228

3329
return validInteractions;
3430
}
3531

36-
private boolean hasPerturbatingAllelesInNeo(GeneGeneticInteraction interaction) {
37-
if (interaction.getInteractorAGeneticPerturbation() != null) {
38-
if (!isValidNeoEntity(getAllNeoAlleleIDs(), interaction.getInteractorAGeneticPerturbation().getIdentifier())) {
39-
return false;
40-
}
41-
}
42-
if (interaction.getInteractorBGeneticPerturbation() != null) {
43-
if (!isValidNeoEntity(getAllNeoAlleleIDs(), interaction.getInteractorBGeneticPerturbation().getIdentifier())) {
44-
return false;
45-
}
46-
}
47-
return true;
48-
}
49-
5032
private GeneGeneticInteraction generateReverseInteraction(GeneGeneticInteraction forwardInteraction) throws IOException {
5133
GeneGeneticInteraction newInteraction = mapper.readValue(mapper.writeValueAsString(forwardInteraction), GeneGeneticInteraction.class);
5234
newInteraction.setInteractorAGeneticPerturbation(forwardInteraction.getInteractorBGeneticPerturbation());

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/GeneMolecularInteractionService.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,15 @@ public List<GeneMolecularInteraction> getFilteredAndReversedInteractions(List<Ge
1313
List<GeneMolecularInteraction> validInteractions = new ArrayList<>();
1414

1515
for (GeneMolecularInteraction interaction: forwardInteractions) {
16-
if (hasInteractingGenesInNeo(interaction)) {
17-
if (hasNoObsoletedOrInternalEntities(interaction)) {
18-
validInteractions.add(interaction);
19-
try {
20-
GeneMolecularInteraction reverseInteraction = generateReverseInteraction(interaction);
21-
if (reverseInteraction != null) {
22-
validInteractions.add(reverseInteraction);
23-
}
24-
} catch (IOException e) {
25-
e.printStackTrace();
16+
if (hasNoObsoletedOrInternalEntities(interaction)) {
17+
validInteractions.add(interaction);
18+
try {
19+
GeneMolecularInteraction reverseInteraction = generateReverseInteraction(interaction);
20+
if (reverseInteraction != null) {
21+
validInteractions.add(reverseInteraction);
2622
}
23+
} catch (IOException e) {
24+
e.printStackTrace();
2725
}
2826
}
2927
}

agr_indexer/src/main/java/org/alliancegenome/indexer/indexers/curation/service/GenePhenotypeAnnotationService.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,7 @@ public void run() {
108108

109109
SearchResponse<GenePhenotypeAnnotation> response = geneApi.findForPublic(page, bufferSize, "PhenotypeAnnotationView", params);
110110
for (GenePhenotypeAnnotation pa : response.getResults()) {
111-
if (isValidNeoEntity(getAllNeoGeneIDs(), pa.getPhenotypeAnnotationSubject().getIdentifier())) {
112-
fullList.offer(pa);
113-
}
111+
fullList.offer(pa);
114112
display.progressProcess();
115113
}
116114

0 commit comments

Comments
 (0)