From c78705f44d8890793139c8dc8c95b50b5c343c68 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Fri, 3 Apr 2026 13:24:12 -0400 Subject: [PATCH] feat: variant indexer performance improvements - Add sortSmartAlpha() and sortLowercase() to FieldBuilder, replacing the single sort() method. sortSmartAlpha uses the regex-based smart_alpha_sort normalizer (zero-pad numbers for natural sort). sortLowercase uses the simple lowercase normalizer (no regex). - Switch 12 of 17 variant index sort fields to sortLowercase where field values are pure text (vepImpact, siftPrediction, polyphenPrediction, transcriptType, variantType, vepConsequences, alterationTypeSortOrder, associatedPhenotype, diseaseTerms, alleleSynonyms). Only 5 fields with meaningful numbers (symbol, hgvs, variantTranscript.name, geneSymbol) remain on sortSmartAlpha. This eliminates ~7 billion regex evaluations during variant indexing (600M+ docs). The smart_alpha_sort regex (PatternReplaceCharFilter) was consuming 95% of ES write thread CPU per hot thread analysis. - Add catch-all Exception handler in RoutedBulkIndexer to log uncaught RuntimeExceptions that silently kill BP threads, then System.exit(-1) to fail fast. Investigation found 7 of 32 MGD BP threads dying silently during indexing with no stack trace. - All site_index sort() calls renamed to sortSmartAlpha() (no behavior change). --- .../es/index/site/schema/Mapping.java | 114 ++++++++++-------- .../es/index/site/schema/VariantMapping.java | 34 +++--- .../es/managers/RoutedBulkIndexer.java | 6 +- 3 files changed, 83 insertions(+), 71 deletions(-) diff --git a/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/Mapping.java b/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/Mapping.java index 5100c061a..38f82fc12 100644 --- a/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/Mapping.java +++ b/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/Mapping.java @@ -81,14 +81,14 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { //new FieldBuilder(builder, "id", "keyword"); //new FieldBuilder(builder, "molecularFunction", "text").keyword().build(); //new FieldBuilder(builder, "references.crossReferences", "nested").build(); - //new FieldBuilder(builder, "searchSymbol", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().sort().build(); - //new FieldBuilder(builder, "displayText", "text").keyword().sort().build(); + //new FieldBuilder(builder, "searchSymbol", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().sortSmartAlpha().build(); + //new FieldBuilder(builder, "displayText", "text").keyword().sortSmartAlpha().build(); //new FieldBuilder(builder, "stage", "text").keyword().build(); //new FieldBuilder(builder, "systematicName", "text").analyzer("symbols").build(); //new FieldBuilder(builder, "taxonId", "keyword").build(); // Allele Only Fields - new FieldBuilder(builder, "alterationType", "text").keyword().sort().build(); // Allele + new FieldBuilder(builder, "alterationType", "text").keyword().sortSmartAlpha().build(); // Allele new FieldBuilder(builder, "constructs", "text").keyword().classicText().build(); // allele new FieldBuilder(builder, "constructExpressedComponent", "text").keyword().build(); // allele new FieldBuilder(builder, "constructKnockdownComponent", "text").keyword().build(); // allele @@ -132,25 +132,25 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { new FieldBuilder(builder, "diseasesAgrSlim", "text").keyword().build(); // gene, allele, model new FieldBuilder(builder, "diseasesWithParents", "text").keyword().build(); // gene, allele, model new FieldBuilder(builder, "phenotypeStatements", "text").keyword().build(); // gene, allele, model - new FieldBuilder(builder, "phenotypeStatement", "text").keyword().sort().build(); // phenotype annotation + new FieldBuilder(builder, "phenotypeStatement", "text").keyword().sortSmartAlpha().build(); // phenotype annotation - new FieldBuilder(builder, "object.name", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation - new FieldBuilder(builder, "object.curie", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation - new FieldBuilder(builder, "subject.primaryExternalId", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation - new FieldBuilder(builder, "subject.taxon.species.fullName", "text").keyword().sort().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation + new FieldBuilder(builder, "object.name", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation + new FieldBuilder(builder, "object.curie", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation + new FieldBuilder(builder, "subject.primaryExternalId", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation + new FieldBuilder(builder, "subject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation, allele_disease_annotation, agm_disease_annotation new FieldBuilder(builder, "anatomicalExpressionSlim", "text").keyword().build(); // gene, dataset new FieldBuilder(builder, "whereExpressed", "text").keyword().build(); // gene, dataset - new FieldBuilder(builder, "associatedSpecies", "text").keyword().synonym().sort().build(); // go, disease - new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_AND_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects - new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects - new FieldBuilder(builder, AffectedGenomicModel.HAS_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sort().build(); // associated phenotypes for model objects - new FieldBuilder(builder, "model.agmFullName.displayText", "text").keyword().sort().build(); // - new FieldBuilder(builder, "model.agmFullName.formatText", "text").keyword().sort().build(); // - new FieldBuilder(builder, "alleleDocument.allele.alleleSymbol.formatText", "text").keyword().sort().build(); // - new FieldBuilder(builder, "alleleDocument.allele.taxon.species.fullName", "text").keyword().sort().build(); // - new FieldBuilder(builder, "alleleDocument.phylogeneticSortingIndex", "long").keyword().sort().build(); // + new FieldBuilder(builder, "associatedSpecies", "text").keyword().synonym().sortSmartAlpha().build(); // go, disease + new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_AND_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects + new FieldBuilder(builder, AffectedGenomicModel.HAS_DISEASE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects + new FieldBuilder(builder, AffectedGenomicModel.HAS_PHENOTYPE_ANNOTATIONS.getFieldName(), "text").keyword().sortSmartAlpha().build(); // associated phenotypes for model objects + new FieldBuilder(builder, "model.agmFullName.displayText", "text").keyword().sortSmartAlpha().build(); // + new FieldBuilder(builder, "model.agmFullName.formatText", "text").keyword().sortSmartAlpha().build(); // + new FieldBuilder(builder, "alleleDocument.allele.alleleSymbol.formatText", "text").keyword().sortSmartAlpha().build(); // + new FieldBuilder(builder, "alleleDocument.allele.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); // + new FieldBuilder(builder, "alleleDocument.phylogeneticSortingIndex", "long").keyword().sortSmartAlpha().build(); // new FieldBuilder(builder, "definition", "text").standardText().build(); // go, disease new FieldBuilder(builder, "models", "text").keyword().autocomplete().build(); // gene, disease @@ -175,17 +175,17 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { new FieldBuilder(builder, "nameKey", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().htmlSmoosh().standardBigrams().build(); // allele, gene, model, dataset, disease, go_search_result - new FieldBuilder(builder, "subject.alleleSymbol.displayText", "text").keyword().sort().build(); // allele_disease_annotation - new FieldBuilder(builder, "subject.geneSymbol.displayText", "text").keyword().sort().build(); // gene_disease_annotation - new FieldBuilder(builder, "subject.name", "text").keyword().sort().build(); // agm_disease_annotation + new FieldBuilder(builder, "subject.alleleSymbol.displayText", "text").keyword().sortSmartAlpha().build(); // allele_disease_annotation + new FieldBuilder(builder, "subject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); // gene_disease_annotation + new FieldBuilder(builder, "subject.name", "text").keyword().sortSmartAlpha().build(); // agm_disease_annotation new FieldBuilder(builder, "popularity", "double").build(); // gene, model, dataset, disease new FieldBuilder(builder, "primaryKey", "keyword").build(); // allele, gene, model, go, dataset, disease - new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sort().build(); // allele, gene + new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sortSmartAlpha().build(); // allele, gene - new FieldBuilder(builder, "species", "text").keyword().synonym().sort().build(); // allele, gene, model, dataset + new FieldBuilder(builder, "species", "text").keyword().synonym().sortSmartAlpha().build(); // allele, gene, model, dataset new FieldBuilder(builder, "synonyms", "text").analyzer("symbols").autocomplete().keyword().keywordAutocomplete().htmlSmoosh().standardBigrams().build(); // gene, go, disease, model // GeneMolecularInteractionDocument: dynamic false (~12 indexed fields) @@ -193,14 +193,14 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { builder.field("type", "object"); builder.field("dynamic", false); builder.endObject(); - new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sort().build(); + new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneMolecularInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "geneMolecularInteraction.geneAssociationSubject.curie", "text").keyword().build(); new FieldBuilder(builder, "geneMolecularInteraction.geneAssociationSubject.primaryExternalId", "text").keyword().build(); - new FieldBuilder(builder, "geneMolecularInteraction.interactorAType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneMolecularInteraction.interactorBType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneMolecularInteraction.detectionMethod.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "geneMolecularInteraction.interactorAType.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneMolecularInteraction.interactorBType.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneMolecularInteraction.detectionMethod.name", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "geneMolecularInteraction.evidence.referenceID", "text").keyword().build(); new FieldBuilder(builder, "geneMolecularInteraction.interactionId", "text").keyword().build(); new FieldBuilder(builder, "geneMolecularInteraction.aggregationDatabase.name", "text").keyword().build(); @@ -212,9 +212,9 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { builder.field("type", "object"); builder.field("dynamic", false); builder.endObject(); - new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sort().build(); + new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneGeneticInteraction.geneGeneAssociationObject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "geneGeneticInteraction.geneAssociationSubject.curie", "text").keyword().build(); new FieldBuilder(builder, "geneGeneticInteraction.geneAssociationSubject.primaryExternalId", "text").keyword().build(); new FieldBuilder(builder, "geneGeneticInteraction.interactorARole.name", "text").keyword().build(); @@ -256,13 +256,13 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { builder.field("type", "object"); builder.field("dynamic", false); builder.endObject(); - new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.geneSymbol.displayText", "text").keyword().sort().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.primaryExternalId", "text").keyword().build(); - new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.species.fullName", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneExpressionAnnotation.whereExpressedStatement", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneExpressionAnnotation.whenExpressedStageName", "text").keyword().sort().build(); - new FieldBuilder(builder, "geneExpressionAnnotation.expressionAssayUsed.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.expressionAnnotationSubject.taxon.species.fullName", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.whereExpressedStatement", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.whenExpressedStageName", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "geneExpressionAnnotation.expressionAssayUsed.name", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "geneExpressionAnnotation.crossReferences.referencedCurie", "text").keyword().build(); // AlleleSummaryDocument: dynamic false on deep curation API objects @@ -273,10 +273,10 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { new FieldBuilder(builder, "primaryExternalId", "text").keyword().build(); builder.endObject(); builder.endObject(); - new FieldBuilder(builder, "allele.alleleSynonyms.displayText", "text").keyword().sort().build(); + new FieldBuilder(builder, "allele.alleleSynonyms.displayText", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "hasDisease", "boolean").build(); new FieldBuilder(builder, "hasPhenotype", "boolean").build(); - new FieldBuilder(builder, "alterationTypeSortOrder", "integer").keyword().sort().build(); + new FieldBuilder(builder, "alterationTypeSortOrder", "integer").keyword().sortSmartAlpha().build(); builder.startObject("alleleOfGene"); builder.field("dynamic", false); @@ -290,9 +290,9 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { builder.field("type", "object"); builder.field("dynamic", false); builder.endObject(); - new FieldBuilder(builder, "variants.variantType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build(); + new FieldBuilder(builder, "variants.variantType.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "variants.curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build(); builder.startObject("crossReference"); builder.field("type", "object"); builder.field("dynamic", false); @@ -305,8 +305,8 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { builder.startObject("properties"); builder.endObject(); builder.endObject(); - new FieldBuilder(builder, "variant.variantType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build(); + new FieldBuilder(builder, "variant.variantType.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.start", "integer").build(); new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.end", "integer").build(); new FieldBuilder(builder, "variant.curatedVariantGenomicLocations.variantGenomicLocationAssociationObject.name", "text").keyword().build(); @@ -322,11 +322,11 @@ protected void buildSharedSearchableDocumentMappings() throws IOException { new FieldBuilder(builder, "intronExonLocation", "text").keyword().build(); builder.endObject(); builder.endObject(); - new FieldBuilder(builder, "consequence.vepImpact.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "consequence.vepConsequences.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "consequence.vepImpact.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "consequence.vepConsequences.name", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "consequence.siftPrediction.name", "text").keyword().build(); new FieldBuilder(builder, "consequence.polyphenPrediction.name", "text").keyword().build(); - new FieldBuilder(builder, "consequence.variantTranscript.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "consequence.variantTranscript.name", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "consequence.variantTranscript.transcriptType.name", "text").keyword().build(); new FieldBuilder(builder, "consequence.variantTranscript.transcriptGeneAssociations.transcriptGeneAssociationObject.geneSymbol.displayText", "text").keyword().build(); @@ -344,7 +344,8 @@ public static class FieldBuilder { boolean keyword; boolean keywordAutocomplete; boolean letterText; - boolean sort; + boolean sortSmartAlpha; + boolean sortLowercase; boolean standardBigrams; boolean standardText; boolean symbol; @@ -392,8 +393,13 @@ public FieldBuilder letterText() { return this; } - public FieldBuilder sort() { - this.sort = true; + public FieldBuilder sortSmartAlpha() { + this.sortSmartAlpha = true; + return this; + } + + public FieldBuilder sortLowercase() { + this.sortLowercase = true; return this; } @@ -458,7 +464,7 @@ public void build() throws IOException { if (analyzer != null) { builder.field("analyzer", analyzer); } - if (symbol || autocomplete || keyword || keywordAutocomplete || synonym || sort || standardText) { + if (symbol || autocomplete || keyword || keywordAutocomplete || synonym || sortSmartAlpha || sortLowercase || standardText) { builder.startObject("fields"); if (keyword) { buildProperty("keyword", "keyword"); @@ -481,10 +487,12 @@ public void build() throws IOException { if (synonym) { buildProperty("synonyms", "text", "generic_synonym", "autocomplete_search", null); } - if (sort) { - //buildProperty("sort", "keyword", null, null, "lowercase"); + if (sortSmartAlpha) { buildProperty("sort", "keyword", null, null, "smart_alpha_sort"); } + if (sortLowercase) { + buildProperty("sort", "keyword", null, null, "lowercase"); + } if (htmlSmoosh) { buildProperty("htmlSmoosh", "text", "html_smoosh"); } diff --git a/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/VariantMapping.java b/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/VariantMapping.java index 5d5c8c28d..2ea64066f 100644 --- a/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/VariantMapping.java +++ b/agr_java_core/src/main/java/org/alliancegenome/es/index/site/schema/VariantMapping.java @@ -19,12 +19,12 @@ public void buildMapping() { new FieldBuilder(builder, "alterationType", "text").keyword().build(); new FieldBuilder(builder, "genes", "text").keyword().build(); new FieldBuilder(builder, "geneIds", "keyword").build(); - new FieldBuilder(builder, "associatedPhenotype", "text").keyword().sort().build(); - new FieldBuilder(builder, "diseaseTerms.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "associatedPhenotype", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "diseaseTerms.name", "text").keyword().sortLowercase().build(); new FieldBuilder(builder, "hasDisease", "boolean").build(); new FieldBuilder(builder, "hasPhenotype", "boolean").build(); - new FieldBuilder(builder, "alterationTypeSortOrder", "integer").sort().build(); - new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sort().build(); // allele, gene + new FieldBuilder(builder, "alterationTypeSortOrder", "integer").sortLowercase().build(); + new FieldBuilder(builder, "symbol", "text").analyzer("symbols").autocomplete().htmlSmoosh().keyword().keywordAutocomplete().sortSmartAlpha().build(); // allele, gene // allele: dynamic false prevents indexing the deep curation API Allele tree // Only map fields actually queried in ES @@ -33,7 +33,7 @@ public void buildMapping() { builder.startObject("properties"); // VariantSummaryDocument: queried via MatchQuery on allele.primaryExternalId new FieldBuilder(builder, "primaryExternalId", "text").keyword().build(); - new FieldBuilder(builder, "alleleSynonyms.displayText", "text").keyword().sort().build(); + new FieldBuilder(builder, "alleleSynonyms.displayText", "text").keyword().sortLowercase().build(); builder.endObject(); builder.endObject(); @@ -41,9 +41,9 @@ public void buildMapping() { builder.startObject("variants"); builder.field("dynamic", false); builder.startObject("properties"); - new FieldBuilder(builder, "variantType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build(); + new FieldBuilder(builder, "variantType.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "curatedVariantGenomicLocations.predictedVariantConsequences.vepConsequences.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build(); builder.endObject(); builder.endObject(); @@ -52,8 +52,8 @@ public void buildMapping() { builder.startObject("variant"); builder.field("dynamic", false); builder.startObject("properties"); - new FieldBuilder(builder, "variantType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "curatedVariantGenomicLocations.hgvs", "text").keyword().sort().build(); + new FieldBuilder(builder, "variantType.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "curatedVariantGenomicLocations.hgvs", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "curatedVariantGenomicLocations.start", "integer").build(); new FieldBuilder(builder, "curatedVariantGenomicLocations.end", "integer").build(); new FieldBuilder(builder, "curatedVariantGenomicLocations.variantGenomicLocationAssociationObject.name", "text").keyword().build(); @@ -64,15 +64,15 @@ public void buildMapping() { builder.startObject("consequence"); builder.field("dynamic", false); builder.startObject("properties"); - new FieldBuilder(builder, "variantTranscript.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "variantTranscript.transcriptType.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "variantTranscript.transcriptGeneAssociations.transcriptGeneAssociationObject.geneSymbol.displayText", "text").keyword().sort().build(); + new FieldBuilder(builder, "variantTranscript.name", "text").keyword().sortSmartAlpha().build(); + new FieldBuilder(builder, "variantTranscript.transcriptType.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "variantTranscript.transcriptGeneAssociations.transcriptGeneAssociationObject.geneSymbol.displayText", "text").keyword().sortSmartAlpha().build(); new FieldBuilder(builder, "intronExonLocation", "text").keyword().build(); - new FieldBuilder(builder, "vepConsequences.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "vepImpact.name", "text").keyword().sort().build(); - new FieldBuilder(builder, "siftPrediction.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "vepConsequences.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "vepImpact.name", "text").keyword().sortLowercase().build(); + new FieldBuilder(builder, "siftPrediction.name", "text").keyword().sortLowercase().build(); new FieldBuilder(builder, "siftScore", "float").build(); - new FieldBuilder(builder, "polyphenPrediction.name", "text").keyword().sort().build(); + new FieldBuilder(builder, "polyphenPrediction.name", "text").keyword().sortLowercase().build(); new FieldBuilder(builder, "polyphenScore", "float").build(); builder.endObject(); builder.endObject(); diff --git a/agr_variant_indexer/src/main/java/org/alliancegenome/indexer/variant/es/managers/RoutedBulkIndexer.java b/agr_variant_indexer/src/main/java/org/alliancegenome/indexer/variant/es/managers/RoutedBulkIndexer.java index 8b3631854..9941a7780 100644 --- a/agr_variant_indexer/src/main/java/org/alliancegenome/indexer/variant/es/managers/RoutedBulkIndexer.java +++ b/agr_variant_indexer/src/main/java/org/alliancegenome/indexer/variant/es/managers/RoutedBulkIndexer.java @@ -84,7 +84,7 @@ public void run() { for (byte[] smileDoc : docs) { int docBytes = smileDoc.length; - + if (gatherStats) { docStats.addValue(docBytes); } @@ -122,6 +122,10 @@ public void run() { submitBatch(pendingDocs); } + } catch (Exception e) { + log.error(label + " Unexpected exception in indexer thread, exiting", e); + e.printStackTrace(); + System.exit(-1); } finally { if (gatherStats) { ph.finishProcess();