diff --git a/Gemfile b/Gemfile index 1afbacea1..6d9913212 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,7 @@ source "https://rubygems.org" gem "activesupport" gem "aws-sdk-s3" gem "bootsnap", require: false -gem "elasticsearch", "~> 6" # We need a 6.x release to interface with Elasticsearch 6 +gem "elasticsearch", "~> 7.10.0", "< 7.11" # We need a 6.x release to interface with Elasticsearch 6 gem "gds-api-adapters" gem "google-analytics-data-v1beta" gem "google-api-client" diff --git a/Gemfile.lock b/Gemfile.lock index feea68cf1..cfe08f9fb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -82,12 +82,12 @@ GEM docile (1.4.0) domain_name (0.6.20240107) drb (2.2.3) - elasticsearch (6.8.3) - elasticsearch-api (= 6.8.3) - elasticsearch-transport (= 6.8.3) - elasticsearch-api (6.8.3) + elasticsearch (7.10.1) + elasticsearch-api (= 7.10.1) + elasticsearch-transport (= 7.10.1) + elasticsearch-api (7.10.1) multi_json - elasticsearch-transport (6.8.3) + elasticsearch-transport (7.10.1) faraday (~> 1) multi_json erb (6.0.2) @@ -116,7 +116,7 @@ GEM faraday-net_http_persistent (1.2.0) faraday-patron (1.0.0) faraday-rack (1.0.0) - faraday-retry (1.0.3) + faraday-retry (1.0.4) ffi (1.15.5) find_a_port (1.0.1) gapic-common (1.2.0) @@ -761,7 +761,7 @@ DEPENDENCIES bootsnap bunny-mock climate_control - elasticsearch (~> 6) + elasticsearch (~> 7.10.0, < 7.11) gds-api-adapters google-analytics-data-v1beta google-api-client diff --git a/config/schema/elasticsearch_schema.yml b/config/schema/elasticsearch_schema.yml index bd0e4b829..d7fb35b5f 100644 --- a/config/schema/elasticsearch_schema.yml +++ b/config/schema/elasticsearch_schema.yml @@ -14,19 +14,19 @@ index: default: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, stop, stemmer_override, stemmer_english] + filter: [asciifolding, lowercase, stop, stemmer_override, stemmer_english] char_filter: [normalize_quotes, strip_quotes] # Analyzer used at query time for old-style shingle matching. shingled_query_analyzer: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english, bigrams] + filter: [asciifolding, lowercase, stemmer_override, stemmer_english, bigrams] with_shingles: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english, bigrams] + filter: [asciifolding, lowercase, stemmer_override, stemmer_english, bigrams] char_filter: [normalize_quotes, strip_quotes] # This analyzer does not filter out these stopwords: @@ -38,7 +38,7 @@ index: searchable_text: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english] + filter: [asciifolding, lowercase, stemmer_override, stemmer_english] char_filter: [normalize_quotes, strip_quotes] # Analyzer used at index time for the .synonym variants of searchable @@ -46,7 +46,7 @@ index: with_index_synonyms: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, index_synonym, stop, stemmer_override, stemmer_english] + filter: [asciifolding, lowercase, index_synonym, stop, stemmer_override, stemmer_english] char_filter: [normalize_quotes, strip_quotes] # Analyzer used at search time for the .synonym variants of searchable @@ -54,7 +54,7 @@ index: with_search_synonyms: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, search_synonym, stop, stemmer_override, stemmer_english] + filter: [asciifolding, lowercase, search_synonym, stop, stemmer_override, stemmer_english] char_filter: [normalize_quotes, strip_quotes] # An analyzer for doing "exact" word matching (but stripping wrapping whitespace, and case insensitive). @@ -68,14 +68,14 @@ index: best_bet_stemmed_match: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english] + filter: [asciifolding, lowercase, stemmer_override, stemmer_english] char_filter: [normalize_quotes, strip_quotes] # Analyzer used to process text supplied to the field for use in spelling correction. spelling_analyzer: type: custom tokenizer: standard - filter: [standard, asciifolding, lowercase, shingle] + filter: [asciifolding, lowercase, shingle] char_filter: [normalize_quotes, strip_quotes] # Analyzer used to process text fields for use for sorting. diff --git a/lib/indexer/compare_enumerator.rb b/lib/indexer/compare_enumerator.rb index e4f4b2aa6..c5381348a 100644 --- a/lib/indexer/compare_enumerator.rb +++ b/lib/indexer/compare_enumerator.rb @@ -3,11 +3,11 @@ class CompareEnumerator < Enumerator NO_VALUE = :__no_value_found__ BATCH_SIZE = 250 DEFAULT_QUERY = { match_all: {} }.freeze - # sort by the document_type then the _uid, which is "type#id" - + # sort by the document_type then the _id, which is "type#id" - # sorting on the id directly is not possible, and the type will # always be "generic-document", which is why we first need to sort # by document_type. - DEFAULT_SORT = %i[document_type _uid].freeze + DEFAULT_SORT = %i[document_type _id].freeze def initialize(left_index_name, right_index_name, cluster = Clusters.default_cluster, search_body = {}, options = {}) @cluster = cluster diff --git a/lib/search/query_components/booster.rb b/lib/search/query_components/booster.rb index 3701e029f..983934db9 100644 --- a/lib/search/query_components/booster.rb +++ b/lib/search/query_components/booster.rb @@ -12,7 +12,7 @@ def wrap(core_query) score_mode: :multiply, query: { bool: { - should: [core_query], + should: [core_query].flatten, }, }, functions: boost_filters, diff --git a/lib/search/query_components/popularity.rb b/lib/search/query_components/popularity.rb index 568b80bf2..457038831 100644 --- a/lib/search/query_components/popularity.rb +++ b/lib/search/query_components/popularity.rb @@ -19,7 +19,7 @@ def default_popularity_boost(boosted_query) script_score: { script: { lang: "painless", - source: "doc['popularity'].value + #{POPULARITY_OFFSET}", + source: "doc['popularity'].size() == 0 ? #{POPULARITY_OFFSET} : doc['popularity'].value + #{POPULARITY_OFFSET}", }, }, }, diff --git a/lib/tasks/export.rake b/lib/tasks/export.rake index e60b6f041..881d31c99 100644 --- a/lib/tasks/export.rake +++ b/lib/tasks/export.rake @@ -10,7 +10,7 @@ namespace :export do .transform_values { |v| [v] } search_params = SearchConfig.parse_parameters(params) query = search_params.search_config.generate_query_for_params(search_params) - query[:sort] = %i[document_type _uid] + query[:sort] = %i[document_type _id] fields = search_params.return_fields.uniq base_uri = search_params.search_config.base_uri diff --git a/spec/integration/scroll_enumerator_spec.rb b/spec/integration/scroll_enumerator_spec.rb index 50cca3174..350920a85 100644 --- a/spec/integration/scroll_enumerator_spec.rb +++ b/spec/integration/scroll_enumerator_spec.rb @@ -24,7 +24,7 @@ results = ScrollEnumerator.new( client:, index_names: "govuk_test", - search_body: { query: { match_all: {} }, sort: [{ _uid: { order: "asc" } }] }, + search_body: { query: { match_all: {} }, sort: [{ _id: { order: "asc" } }] }, batch_size: 4, ) { |d| d } diff --git a/spec/unit/elasticsearch_index_spec.rb b/spec/unit/elasticsearch_index_spec.rb index 28079cb56..f17335571 100644 --- a/spec/unit/elasticsearch_index_spec.rb +++ b/spec/unit/elasticsearch_index_spec.rb @@ -84,7 +84,7 @@ end it "can be searched" do - stub_get = stub_request(:get, "http://example.com:9200/government_test/generic-document/_search").with( + stub_get = stub_request(:post, "http://example.com:9200/government_test/generic-document/_search").with( body: %r{"query":"keyword search"}, ).to_return( body: '{"hits":{"hits":[]}}', @@ -110,7 +110,7 @@ it "can fetch documents by format" do search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=500&version=true" - stub_request(:get, search_pattern).with( + stub_request(:post, search_pattern).with( body: { query: { term: { format: "organisation" } }, _source: { includes: %w[title link] }, sort: %w[_doc] }, ).to_return( body: { _scroll_id: "abcdefgh", hits: { total: 10, hits: [] } }.to_json, @@ -135,7 +135,7 @@ it "can fetch documents by format with certain fields" do search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=500&version=true" - stub_request(:get, search_pattern).with( + stub_request(:post, search_pattern).with( body: "{\"query\":{\"term\":{\"format\":\"organisation\"}},\"_source\":{\"includes\":[\"title\",\"link\"]},\"sort\":[\"_doc\"]}", ).to_return( body: { _scroll_id: "abcdefgh", hits: { total: 10, hits: [] } }.to_json, @@ -160,7 +160,7 @@ it "can count the documents without retrieving them all" do search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=50&version=true" - stub_request(:get, search_pattern).with( + stub_request(:post, search_pattern).with( body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json, ).to_return( body: { _scroll_id: "abcdefgh", hits: { total: 100 } }.to_json, @@ -172,7 +172,7 @@ it "can retrieve all documents" do search_uri = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=50&version=true" - stub_request(:get, search_uri).with( + stub_request(:post, search_uri).with( body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json, ).to_return( body: { _scroll_id: "abcdefgh", hits: { total: 100, hits: [] } }.to_json, @@ -202,7 +202,7 @@ allow(described_class).to receive(:scroll_batch_size).and_return(2) - stub_request(:get, search_uri).with( + stub_request(:post, search_uri).with( body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json, ).to_return( body: { _scroll_id: "abcdefgh", hits: { total: 3, hits: [] } }.to_json, @@ -259,7 +259,7 @@ private def scroll_uri(scroll_id) - "http://example.com:9200/_search/scroll?scroll=1m&scroll_id=#{scroll_id}" + "http://example.com:9200/_search/scroll/#{scroll_id}?scroll=1m" end def scroll_response_body(scroll_id, total_results, results) @@ -277,7 +277,7 @@ def build_government_index def stub_popularity_index_requests(paths, popularity, total_pages = 10, total_requested = total_pages, paths_to_return = paths) # stub the request for total results - stub_request(:get, "http://example.com:9200/page-traffic_test/generic-document/_search") + stub_request(:post, "http://example.com:9200/page-traffic_test/generic-document/_search") .with(body: { "query" => { "match_all" => {} }, "size" => 0 }.to_json) .to_return( body: { "hits" => { "total" => total_pages } }.to_json, @@ -312,7 +312,7 @@ def stub_popularity_index_requests(paths, popularity, total_pages = 10, total_re }, } - stub_request(:get, "http://example.com:9200/page-traffic_test/generic-document/_search") + stub_request(:post, "http://example.com:9200/page-traffic_test/generic-document/_search") .with(body: expected_query.to_json) .to_return( body: response.to_json, diff --git a/spec/unit/index_spec.rb b/spec/unit/index_spec.rb index 9e1242129..60c91f7f4 100644 --- a/spec/unit/index_spec.rb +++ b/spec/unit/index_spec.rb @@ -16,7 +16,7 @@ }, }, } - stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/generic-document}) + stub = stub_request(:put, %r{#{base_uri}/govuk-abc/generic-document/_mappings}) .with(body: mappings["generic-document"]) .to_return({ status: 200, @@ -28,7 +28,7 @@ "type" => "illegal_argument_exception", "reason" => "invalid mapping", } }.to_json - failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/failing-document}) + failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/failing-document/_mappings}) .with(body: mappings["failing-document"]) .to_return({ status: 400, diff --git a/spec/unit/time_based_index_cleanup_spec.rb b/spec/unit/time_based_index_cleanup_spec.rb index b8aab8275..fc6089ced 100644 --- a/spec/unit/time_based_index_cleanup_spec.rb +++ b/spec/unit/time_based_index_cleanup_spec.rb @@ -152,7 +152,7 @@ }, } - stub_request(:get, %r{#{base_uri}/test(.*?)/_search}) + stub_request(:post, %r{#{base_uri}/test(.*?)/_search}) .with( body: expected_timed_delete_body, ).to_return(