From d3a8459a1c5190c99345f128c61289feb5900e33 Mon Sep 17 00:00:00 2001 From: Samuel Greene Date: Thu, 25 Apr 2024 17:52:33 -0700 Subject: [PATCH 1/3] move off query_string --- .../src/example-types/filters/tagsQuery.js | 122 +++++++----------- 1 file changed, 44 insertions(+), 78 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js index 29360294b..72fcd95f4 100644 --- a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js +++ b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js @@ -2,9 +2,7 @@ import _ from 'lodash/fp.js' import F from 'futil' import { Permutation } from 'js-combinatorics' import { stripLegacySubFields } from '../../utils/fields.js' -import { sanitizeTagInputs } from 'contexture-util/keywordGenerations.js' -import { queryStringCharacterBlacklist } from 'contexture-util/exampleTypes/tagsQuery.js' -import escapeStringRegexp from 'escape-string-regexp' +import { sanitizeTagInputs } from '../../utils/keywordGenerations.js' let maxTagCount = 100 @@ -17,79 +15,53 @@ let wordPermutations = _.flow( _.map(_.join(' ')) ) -/* - * Quote phrases and set edit distance. - * See: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_fuzziness - */ -let addQuotesAndDistance = _.curry((tag, text) => { - // Multiple words - if (_.includes(' ', text)) { - return F.quote(text) + (tag.distance ? `~${tag.distance}` : '') - } - // Single word. - // Note: ~1 for misspellings allows for the insertion, deletion or substitution of a single character, or transposition of two adjacent characters. - return text + (tag.misspellings ? '~1' : '') -}) - -let replaceRegexp = new RegExp( - `[${escapeStringRegexp(queryStringCharacterBlacklist)}]`, - 'g' -) - -let replaceReservedChars = _.flow( - _.toString, - _.replace(replaceRegexp, ' '), - // These characters are not stripped out by our analyzers but they are - // `query_string` reserved characters so we need to escape them. - _.replace(/([&+\-=:/])/g, '\\$1') -) - -let tagToQueryString = (tag) => { - let _tag = replaceReservedChars(tag.word) - - if (tag.distance === 'unlimited') { - return F.parens(_tag.replace(/\s+/g, ' AND ')) - } else if (!tag.distance && tag.anyOrder) { - return _.flow( - wordPermutations, - _.map(addQuotesAndDistance(tag)), - _.join(' OR '), - F.parens - )(_tag) - } else { - return addQuotesAndDistance(tag, _tag) - } -} - -let joinTags = _.curry((join, tags) => { - if (!tags.length) return '' - - let separator = { all: ' AND ', any: ' OR ' }[join] || ' OR ' - let joinedTags = tags.join(separator) - - if (join === 'none') return `NOT (${joinedTags})` - return joinedTags -}) - -let limitResultsToCertainTags = _.find('onlyShowTheseResults') - -let tagsToQueryString = (tags, join) => - _.flow( - F.when(limitResultsToCertainTags, _.filter('onlyShowTheseResults')), - F.compactMap(tagToQueryString), - joinTags(join) - )(tags) - let hasValue = _.get('tags.length') -let filter = ({ tags, join, field, exact }) => ({ - query_string: { - query: tagsToQueryString(tags, join), - default_operator: 'AND', - default_field: stripLegacySubFields(field) + (exact ? '.exact' : ''), - ...(exact && { analyzer: 'exact' }), +let filterIfFind = (f) => F.when(_.find(f), _.filter(f)) +let joinMap = { all: 'must', any: 'should', none: 'must_not' } +let combinator = (join, filters) => ({ + bool: { + [joinMap[join]]: filters, + ...(join === 'any' && { minimum_should_match: 1 }), }, }) +let tagMapper = (field, exact) => (tag) => { + let queryType = _.includes(' ', tag.text) ? 'match_phrase' : 'match' + let queryField = stripLegacySubFields(field) + (exact ? '.exact' : '') + + let body = { + query: tag.text, + ...(tag.misspellings && { fuzziness: 1 }), // consider switching to 'AUTO' + ...(tag.distance && tag.distance !== 'unlimited' && { slop: tag.distance }), + ...(exact && { analyzer: 'exact' }), + } + if (tag.distance === 'unlimited') + body.query = tag.text.replace(/\s+/g, ' AND ') + + if (!tag.distance && tag.anyOrder) { + return combinator( + 'any', + _.map( + (query) => ({ + match_phrase: { + [queryField]: { query, ...(exact && { analyzer: 'exact' }) }, + }, + }), + wordPermutations(tag.text) + ) + ) + } + + return { [queryType]: { [queryField]: body } } +} +let filter = ({ tags, join = 'any', field, exact }) => + combinator( + join, + _.flow( + filterIfFind('onlyShowTheseResults'), + _.map(tagMapper(field, exact)) + )(tags) + ) let buildResultQuery = ( node, @@ -162,12 +134,6 @@ let validContext = (node) => { export default ({ generateKeywords } = {}) => ({ wordPermutations, - limitResultsToCertainTags, - addQuotesAndDistance, - replaceReservedChars, - tagToQueryString, - joinTags, - tagsToQueryString, hasValue, filter, validContext, From 0887a1d3f9c75b1027e09332b3109d48c4dfdb02 Mon Sep 17 00:00:00 2001 From: Samuel Greene Date: Thu, 25 Apr 2024 17:56:32 -0700 Subject: [PATCH 2/3] fix ci error Signed-off-by: Samuel Greene --- .../src/example-types/filters/tagsQuery.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js index 72fcd95f4..159f34388 100644 --- a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js +++ b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js @@ -2,7 +2,7 @@ import _ from 'lodash/fp.js' import F from 'futil' import { Permutation } from 'js-combinatorics' import { stripLegacySubFields } from '../../utils/fields.js' -import { sanitizeTagInputs } from '../../utils/keywordGenerations.js' +import { sanitizeTagInputs } from 'contexture-util/keywordGenerations.js' let maxTagCount = 100 From dee29ab7dc21618f5a5bfab6fe22015dd9efd27f Mon Sep 17 00:00:00 2001 From: Samuel Greene Date: Thu, 25 Apr 2024 17:58:43 -0700 Subject: [PATCH 3/3] fix typo Signed-off-by: Samuel Greene --- .../src/example-types/filters/tagsQuery.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js index 159f34388..39dd09abb 100644 --- a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js +++ b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js @@ -26,17 +26,17 @@ let combinator = (join, filters) => ({ }, }) let tagMapper = (field, exact) => (tag) => { - let queryType = _.includes(' ', tag.text) ? 'match_phrase' : 'match' + let queryType = _.includes(' ', tag.word) ? 'match_phrase' : 'match' let queryField = stripLegacySubFields(field) + (exact ? '.exact' : '') let body = { - query: tag.text, + query: tag.word, ...(tag.misspellings && { fuzziness: 1 }), // consider switching to 'AUTO' ...(tag.distance && tag.distance !== 'unlimited' && { slop: tag.distance }), ...(exact && { analyzer: 'exact' }), } if (tag.distance === 'unlimited') - body.query = tag.text.replace(/\s+/g, ' AND ') + body.query = tag.word.replace(/\s+/g, ' AND ') if (!tag.distance && tag.anyOrder) { return combinator( @@ -47,7 +47,7 @@ let tagMapper = (field, exact) => (tag) => { [queryField]: { query, ...(exact && { analyzer: 'exact' }) }, }, }), - wordPermutations(tag.text) + wordPermutations(tag.word) ) ) }