Skip to content

Commit 05b717e

Browse files
authored
Merge pull request #1551 from alliance-genome/variant_indexer_improvements
Variant indexer performance improvements
2 parents a3e05b1 + c78705f commit 05b717e

3 files changed

Lines changed: 83 additions & 71 deletions

File tree

agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/Mapping.java

Lines changed: 61 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,14 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
8181
//new FieldBuilder(builder, "id", "keyword");
8282
//new FieldBuilder(builder, "molecularFunction", "text").keyword().build();
8383
//new FieldBuilder(builder, "references.crossReferences", "nested").build();
84-
//new FieldBuilder(builder, "searchSymbol", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().sort().build();
85-
//new FieldBuilder(builder, "displayText", "text").keyword().sort().build();
84+
//new FieldBuilder(builder, "searchSymbol", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().sortSmartAlpha().build();
85+
//new FieldBuilder(builder, "displayText", "text").keyword().sortSmartAlpha().build();
8686
//new FieldBuilder(builder, "stage", "text").keyword().build();
8787
//new FieldBuilder(builder, "systematicName", "text").analyzer("symbols").build();
8888
//new FieldBuilder(builder, "taxonId", "keyword").build();
8989

9090
// Allele Only Fields
91-
new FieldBuilder(builder, "alterationType", "text").keyword().sort().build(); // Allele
91+
new FieldBuilder(builder, "alterationType", "text").keyword().sortSmartAlpha().build(); // Allele
9292
new FieldBuilder(builder, "constructs", "text").keyword().classicText().build(); // allele
9393
new FieldBuilder(builder, "constructExpressedComponent", "text").keyword().build(); // allele
9494
new FieldBuilder(builder, "constructKnockdownComponent", "text").keyword().build(); // allele
@@ -132,25 +132,25 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
132132
new FieldBuilder(builder, "diseasesAgrSlim", "text").keyword().build(); // gene, allele, model
133133
new FieldBuilder(builder, "diseasesWithParents", "text").keyword().build(); // gene, allele, model
134134
new FieldBuilder(builder, "phenotypeStatements", "text").keyword().build(); // gene, allele, model
135-
new FieldBuilder(builder, "phenotypeStatement", "text").keyword().sort().build(); // phenotype annotation
135+
new FieldBuilder(builder, "phenotypeStatement", "text").keyword().sortSmartAlpha().build(); // phenotype annotation
136136

137-
new FieldBuilder(builder, "object.name", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
138-
new FieldBuilder(builder, "object.curie", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
139-
new FieldBuilder(builder, "subject.primaryExternalId", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
140-
new FieldBuilder(builder, "subject.taxon.species.fullName", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
137+
new FieldBuilder(builder, "object.name", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
138+
new FieldBuilder(builder, "object.curie", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
139+
new FieldBuilder(builder, "subject.primaryExternalId", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
140+
new FieldBuilder(builder, "subject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation
141141

142142
new FieldBuilder(builder, "anatomicalExpressionSlim", "text").keyword().build(); // gene, dataset
143143
new FieldBuilder(builder, "whereExpressed", "text").keyword().build(); // gene, dataset
144144

145-
new FieldBuilder(builder, "associatedSpecies", "text").keyword().synonym().sort().build(); // go, disease
146-
new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_AND_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects
147-
new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects
148-
new FieldBuilder(builder, AffectedGenomicModel.HAS_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects
149-
new FieldBuilder(builder, "model.agmFullName.displayText", "text").keyword().sort().build(); //
150-
new FieldBuilder(builder, "model.agmFullName.formatText", "text").keyword().sort().build(); //
151-
new FieldBuilder(builder, "alleleDocument.allele.alleleSymbol.formatText", "text").keyword().sort().build(); //
152-
new FieldBuilder(builder, "alleleDocument.allele.taxon.species.fullName", "text").keyword().sort().build(); //
153-
new FieldBuilder(builder, "alleleDocument.phylogeneticSortingIndex", "long").keyword().sort().build(); //
145+
new FieldBuilder(builder, "associatedSpecies", "text").keyword().synonym().sortSmartAlpha().build(); // go, disease
146+
new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_AND_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects
147+
new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects
148+
new FieldBuilder(builder, AffectedGenomicModel.HAS_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects
149+
new FieldBuilder(builder, "model.agmFullName.displayText", "text").keyword().sortSmartAlpha().build(); //
150+
new FieldBuilder(builder, "model.agmFullName.formatText", "text").keyword().sortSmartAlpha().build(); //
151+
new FieldBuilder(builder, "alleleDocument.allele.alleleSymbol.formatText", "text").keyword().sortSmartAlpha().build(); //
152+
new FieldBuilder(builder, "alleleDocument.allele.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); //
153+
new FieldBuilder(builder, "alleleDocument.phylogeneticSortingIndex", "long").keyword().sortSmartAlpha().build(); //
154154
new FieldBuilder(builder, "definition", "text").standardText().build(); // go, disease
155155

156156
new FieldBuilder(builder, "models", "text").keyword().autocomplete().build(); // gene, disease
@@ -175,32 +175,32 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
175175
new FieldBuilder(builder, "nameKey", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().htmlSmoosh().standardBigrams().build(); // allele, gene, model, dataset, disease, go_search_result
176176

177177

178-
new FieldBuilder(builder, "subject.alleleSymbol.displayText", "text").keyword().sort().build(); // allele_disease_annotation
179-
new FieldBuilder(builder, "subject.geneSymbol.displayText", "text").keyword().sort().build(); // gene_disease_annotation
180-
new FieldBuilder(builder, "subject.name", "text").keyword().sort().build(); // agm_disease_annotation
178+
new FieldBuilder(builder, "subject.alleleSymbol.displayText", "text").keyword().sortSmartAlpha().build(); // allele_disease_annotation
179+
new FieldBuilder(builder, "subject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation
180+
new FieldBuilder(builder, "subject.name", "text").keyword().sortSmartAlpha().build(); // agm_disease_annotation
181181

182182

183183
new FieldBuilder(builder, "popularity", "double").build(); // gene, model, dataset, disease
184184
new FieldBuilder(builder, "primaryKey", "keyword").build(); // allele, gene, model, go, dataset, disease
185-
new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sort().build(); // allele, gene
185+
new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sortSmartAlpha().build(); // allele, gene
186186

187187

188-
new FieldBuilder(builder, "species", "text").keyword().synonym().sort().build(); // allele, gene, model, dataset
188+
new FieldBuilder(builder, "species", "text").keyword().synonym().sortSmartAlpha().build(); // allele, gene, model, dataset
189189
new FieldBuilder(builder, "synonyms", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().htmlSmoosh().standardBigrams().build(); // gene, go, disease, model
190190

191191
// GeneMolecularInteractionDocument: dynamic false (~12 indexed fields)
192192
builder.startObject("geneMolecularInteraction");
193193
builder.field("type", "object");
194194
builder.field("dynamic", false);
195195
builder.endObject();
196-
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sort().build();
197-
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sort().build();
198-
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sort().build();
196+
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build();
197+
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sortSmartAlpha().build();
198+
new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build();
199199
new FieldBuilder(builder, "geneMolecularInteraction.geneAssociationSubject.curie", "text").keyword().build();
200200
new FieldBuilder(builder, "geneMolecularInteraction.geneAssociationSubject.primaryExternalId", "text").keyword().build();
201-
new FieldBuilder(builder, "geneMolecularInteraction.interactorAType.name", "text").keyword().sort().build();
202-
new FieldBuilder(builder, "geneMolecularInteraction.interactorBType.name", "text").keyword().sort().build();
203-
new FieldBuilder(builder, "geneMolecularInteraction.detectionMethod.name", "text").keyword().sort().build();
201+
new FieldBuilder(builder, "geneMolecularInteraction.interactorAType.name", "text").keyword().sortSmartAlpha().build();
202+
new FieldBuilder(builder, "geneMolecularInteraction.interactorBType.name", "text").keyword().sortSmartAlpha().build();
203+
new FieldBuilder(builder, "geneMolecularInteraction.detectionMethod.name", "text").keyword().sortSmartAlpha().build();
204204
new FieldBuilder(builder, "geneMolecularInteraction.evidence.referenceID", "text").keyword().build();
205205
new FieldBuilder(builder, "geneMolecularInteraction.interactionId", "text").keyword().build();
206206
new FieldBuilder(builder, "geneMolecularInteraction.aggregationDatabase.name", "text").keyword().build();
@@ -212,9 +212,9 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
212212
builder.field("type", "object");
213213
builder.field("dynamic", false);
214214
builder.endObject();
215-
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sort().build();
216-
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sort().build();
217-
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sort().build();
215+
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build();
216+
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sortSmartAlpha().build();
217+
new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build();
218218
new FieldBuilder(builder, "geneGeneticInteraction.geneAssociationSubject.curie", "text").keyword().build();
219219
new FieldBuilder(builder, "geneGeneticInteraction.geneAssociationSubject.primaryExternalId", "text").keyword().build();
220220
new FieldBuilder(builder, "geneGeneticInteraction.interactorARole.name", "text").keyword().build();
@@ -256,13 +256,13 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
256256
builder.field("type", "object");
257257
builder.field("dynamic", false);
258258
builder.endObject();
259-
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.geneSymbol.displayText", "text").keyword().sort().build();
259+
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build();
260260
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.primaryExternalId", "text").keyword().build();
261-
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.name", "text").keyword().sort().build();
262-
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.species.fullName", "text").keyword().sort().build();
263-
new FieldBuilder(builder, "geneExpressionAnnotation.whereExpressedStatement", "text").keyword().sort().build();
264-
new FieldBuilder(builder, "geneExpressionAnnotation.whenExpressedStageName", "text").keyword().sort().build();
265-
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAssayUsed.name", "text").keyword().sort().build();
261+
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.name", "text").keyword().sortSmartAlpha().build();
262+
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build();
263+
new FieldBuilder(builder, "geneExpressionAnnotation.whereExpressedStatement", "text").keyword().sortSmartAlpha().build();
264+
new FieldBuilder(builder, "geneExpressionAnnotation.whenExpressedStageName", "text").keyword().sortSmartAlpha().build();
265+
new FieldBuilder(builder, "geneExpressionAnnotation.expressionAssayUsed.name", "text").keyword().sortSmartAlpha().build();
266266
new FieldBuilder(builder, "geneExpressionAnnotation.crossReferences.referencedCurie", "text").keyword().build();
267267

268268
// AlleleSummaryDocument: dynamic false on deep curation API objects
@@ -273,10 +273,10 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
273273
new FieldBuilder(builder, "primaryExternalId", "text").keyword().build();
274274
builder.endObject();
275275
builder.endObject();
276-
new FieldBuilder(builder, "allele.alleleSynonyms.displayText", "text").keyword().sort().build();
276+
new FieldBuilder(builder, "allele.alleleSynonyms.displayText", "text").keyword().sortSmartAlpha().build();
277277
new FieldBuilder(builder, "hasDisease", "boolean").build();
278278
new FieldBuilder(builder, "hasPhenotype", "boolean").build();
279-
new FieldBuilder(builder, "alterationTypeSortOrder", "integer").keyword().sort().build();
279+
new FieldBuilder(builder, "alterationTypeSortOrder", "integer").keyword().sortSmartAlpha().build();
280280

281281
builder.startObject("alleleOfGene");
282282
builder.field("dynamic", false);
@@ -290,9 +290,9 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
290290
builder.field("type", "object");
291291
builder.field("dynamic", false);
292292
builder.endObject();
293-
new FieldBuilder(builder, "variants.variantType.name", "text").keyword().sort().build();
294-
new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sort().build();
295-
new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build();
293+
new FieldBuilder(builder, "variants.variantType.name", "text").keyword().sortSmartAlpha().build();
294+
new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sortSmartAlpha().build();
295+
new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build();
296296
builder.startObject("crossReference");
297297
builder.field("type", "object");
298298
builder.field("dynamic", false);
@@ -305,8 +305,8 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
305305
builder.startObject("properties");
306306
builder.endObject();
307307
builder.endObject();
308-
new FieldBuilder(builder, "variant.variantType.name", "text").keyword().sort().build();
309-
new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build();
308+
new FieldBuilder(builder, "variant.variantType.name", "text").keyword().sortSmartAlpha().build();
309+
new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build();
310310
new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.start", "integer").build();
311311
new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.end", "integer").build();
312312
new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.variantGenomicLocationAssociationObject.name", "text").keyword().build();
@@ -322,11 +322,11 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
322322
new FieldBuilder(builder, "intronExonLocation", "text").keyword().build();
323323
builder.endObject();
324324
builder.endObject();
325-
new FieldBuilder(builder, "consequence.vepImpact.name", "text").keyword().sort().build();
326-
new FieldBuilder(builder, "consequence.vepConsequences.name", "text").keyword().sort().build();
325+
new FieldBuilder(builder, "consequence.vepImpact.name", "text").keyword().sortSmartAlpha().build();
326+
new FieldBuilder(builder, "consequence.vepConsequences.name", "text").keyword().sortSmartAlpha().build();
327327
new FieldBuilder(builder, "consequence.siftPrediction.name", "text").keyword().build();
328328
new FieldBuilder(builder, "consequence.polyphenPrediction.name", "text").keyword().build();
329-
new FieldBuilder(builder, "consequence.variantTranscript.name", "text").keyword().sort().build();
329+
new FieldBuilder(builder, "consequence.variantTranscript.name", "text").keyword().sortSmartAlpha().build();
330330
new FieldBuilder(builder, "consequence.variantTranscript.transcriptType.name", "text").keyword().build();
331331
new FieldBuilder(builder, "consequence.variantTranscript.transcriptGeneAssociations.transcriptGeneAssociationObject.geneSymbol.displayText", "text").keyword().build();
332332

@@ -344,7 +344,8 @@ public static class FieldBuilder {
344344
boolean keyword;
345345
boolean keywordAutocomplete;
346346
boolean letterText;
347-
boolean sort;
347+
boolean sortSmartAlpha;
348+
boolean sortLowercase;
348349
boolean standardBigrams;
349350
boolean standardText;
350351
boolean symbol;
@@ -392,8 +393,13 @@ public FieldBuilder letterText() {
392393
return this;
393394
}
394395

395-
public FieldBuilder sort() {
396-
this.sort = true;
396+
public FieldBuilder sortSmartAlpha() {
397+
this.sortSmartAlpha = true;
398+
return this;
399+
}
400+
401+
public FieldBuilder sortLowercase() {
402+
this.sortLowercase = true;
397403
return this;
398404
}
399405

@@ -458,7 +464,7 @@ public void build() throws IOException {
458464
if (analyzer != null) {
459465
builder.field("analyzer", analyzer);
460466
}
461-
if (symbol || autocomplete || keyword || keywordAutocomplete || synonym || sort || standardText) {
467+
if (symbol || autocomplete || keyword || keywordAutocomplete || synonym || sortSmartAlpha || sortLowercase || standardText) {
462468
builder.startObject("fields");
463469
if (keyword) {
464470
buildProperty("keyword", "keyword");
@@ -481,10 +487,12 @@ public void build() throws IOException {
481487
if (synonym) {
482488
buildProperty("synonyms", "text", "generic_synonym", "autocomplete_search", null);
483489
}
484-
if (sort) {
485-
//buildProperty("sort", "keyword", null, null, "lowercase");
490+
if (sortSmartAlpha) {
486491
buildProperty("sort", "keyword", null, null, "smart_alpha_sort");
487492
}
493+
if (sortLowercase) {
494+
buildProperty("sort", "keyword", null, null, "lowercase");
495+
}
488496
if (htmlSmoosh) {
489497
buildProperty("htmlSmoosh", "text", "html_smoosh");
490498
}

0 commit comments

Comments
 (0)