From a37caa07fef4f69e53b3d522983d0b53e0e8bab3 Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 11:54:40 +0000
Subject: [PATCH 1/8] Bump the Elasticsearch client gem to version 7.10.x.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We’re planning an upcoming upgrade of our cluster to 7.10, so this
change allows us to adopt the newer client early and ensure the codebase
remains compatible.
---
 Gemfile      |  2 +-
 Gemfile.lock | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Gemfile b/Gemfile
index 1afbacea1..6d9913212 100644
--- a/Gemfile
+++ b/Gemfile
@@ -3,7 +3,7 @@ source "https://rubygems.org"
 gem "activesupport"
 gem "aws-sdk-s3"
 gem "bootsnap", require: false
-gem "elasticsearch", "~> 6" # We need a 6.x release to interface with Elasticsearch 6
+gem "elasticsearch", "~> 7.10.0", "< 7.11" # We need a 6.x release to interface with Elasticsearch 6
 gem "gds-api-adapters"
 gem "google-analytics-data-v1beta"
 gem "google-api-client"
diff --git a/Gemfile.lock b/Gemfile.lock
index feea68cf1..cfe08f9fb 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -82,12 +82,12 @@ GEM
     docile (1.4.0)
     domain_name (0.6.20240107)
     drb (2.2.3)
-    elasticsearch (6.8.3)
-      elasticsearch-api (= 6.8.3)
-      elasticsearch-transport (= 6.8.3)
-    elasticsearch-api (6.8.3)
+    elasticsearch (7.10.1)
+      elasticsearch-api (= 7.10.1)
+      elasticsearch-transport (= 7.10.1)
+    elasticsearch-api (7.10.1)
       multi_json
-    elasticsearch-transport (6.8.3)
+    elasticsearch-transport (7.10.1)
       faraday (~> 1)
       multi_json
     erb (6.0.2)
@@ -116,7 +116,7 @@ GEM
     faraday-net_http_persistent (1.2.0)
     faraday-patron (1.0.0)
     faraday-rack (1.0.0)
-    faraday-retry (1.0.3)
+    faraday-retry (1.0.4)
     ffi (1.15.5)
     find_a_port (1.0.1)
     gapic-common (1.2.0)
@@ -761,7 +761,7 @@ DEPENDENCIES
   bootsnap
   bunny-mock
   climate_control
-  elasticsearch (~> 6)
+  elasticsearch (~> 7.10.0, < 7.11)
   gds-api-adapters
   google-analytics-data-v1beta
   google-api-client

From a10247318b77d3051347f6c21b77cfb3006e64b8 Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 11:55:45 +0000
Subject: [PATCH 2/8] Remove the standard token filter.

This filter is deprecated, as it no longer provides any functional behavior.

https://stackoverflow.com/questions/76108163/getting-error-the-standard-token-filter-has-been-removed-when-running-a-que
---
 config/schema/elasticsearch_schema.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/config/schema/elasticsearch_schema.yml b/config/schema/elasticsearch_schema.yml
index bd0e4b829..d7fb35b5f 100644
--- a/config/schema/elasticsearch_schema.yml
+++ b/config/schema/elasticsearch_schema.yml
@@ -14,19 +14,19 @@ index:
         default:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, stop, stemmer_override, stemmer_english]
+          filter: [asciifolding, lowercase, stop, stemmer_override, stemmer_english]
           char_filter: [normalize_quotes, strip_quotes]
 
         # Analyzer used at query time for old-style shingle matching.
         shingled_query_analyzer:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english, bigrams]
+          filter: [asciifolding, lowercase, stemmer_override, stemmer_english, bigrams]
 
         with_shingles:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english, bigrams]
+          filter: [asciifolding, lowercase, stemmer_override, stemmer_english, bigrams]
           char_filter: [normalize_quotes, strip_quotes]
 
         # This analyzer does not filter out these stopwords:
@@ -38,7 +38,7 @@ index:
         searchable_text:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english]
+          filter: [asciifolding, lowercase, stemmer_override, stemmer_english]
           char_filter: [normalize_quotes, strip_quotes]
 
         # Analyzer used at index time for the .synonym variants of searchable
@@ -46,7 +46,7 @@ index:
         with_index_synonyms:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, index_synonym, stop, stemmer_override, stemmer_english]
+          filter: [asciifolding, lowercase, index_synonym, stop, stemmer_override, stemmer_english]
           char_filter: [normalize_quotes, strip_quotes]
 
         # Analyzer used at search time for the .synonym variants of searchable
@@ -54,7 +54,7 @@ index:
         with_search_synonyms:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, search_synonym, stop, stemmer_override, stemmer_english]
+          filter: [asciifolding, lowercase, search_synonym, stop, stemmer_override, stemmer_english]
           char_filter: [normalize_quotes, strip_quotes]
 
         # An analyzer for doing "exact" word matching (but stripping wrapping whitespace, and case insensitive).
@@ -68,14 +68,14 @@ index:
         best_bet_stemmed_match:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, stemmer_override, stemmer_english]
+          filter: [asciifolding, lowercase, stemmer_override, stemmer_english]
           char_filter: [normalize_quotes, strip_quotes]
 
         # Analyzer used to process text supplied to the field for use in spelling correction.
         spelling_analyzer:
           type: custom
           tokenizer: standard
-          filter: [standard, asciifolding, lowercase, shingle]
+          filter: [asciifolding, lowercase, shingle]
           char_filter: [normalize_quotes, strip_quotes]
 
         # Analyzer used to process text fields for use for sorting.

From e44e4262210313814b358e86ac1a6013f536a887 Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 14:29:43 +0000
Subject: [PATCH 3/8] Expect POST requests when performing searches with the
 Elasticsearch 7.10 client.

The Elasticsearch client gem has been updated and now issues POST
requests for search operations instead of GET.
---
 spec/unit/elasticsearch_index_spec.rb      | 16 ++++++++--------
 spec/unit/time_based_index_cleanup_spec.rb |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/spec/unit/elasticsearch_index_spec.rb b/spec/unit/elasticsearch_index_spec.rb
index 28079cb56..d53657e9b 100644
--- a/spec/unit/elasticsearch_index_spec.rb
+++ b/spec/unit/elasticsearch_index_spec.rb
@@ -84,7 +84,7 @@
   end
 
   it "can be searched" do
-    stub_get = stub_request(:get, "http://example.com:9200/government_test/generic-document/_search").with(
+    stub_get = stub_request(:post, "http://example.com:9200/government_test/generic-document/_search").with(
       body: %r{"query":"keyword search"},
     ).to_return(
       body: '{"hits":{"hits":[]}}',
@@ -110,7 +110,7 @@
 
   it "can fetch documents by format" do
     search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=500&version=true"
-    stub_request(:get, search_pattern).with(
+    stub_request(:post, search_pattern).with(
       body: { query: { term: { format: "organisation" } }, _source: { includes: %w[title link] }, sort: %w[_doc] },
     ).to_return(
       body: { _scroll_id: "abcdefgh", hits: { total: 10, hits: [] } }.to_json,
@@ -135,7 +135,7 @@
   it "can fetch documents by format with certain fields" do
     search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=500&version=true"
 
-    stub_request(:get, search_pattern).with(
+    stub_request(:post, search_pattern).with(
       body: "{\"query\":{\"term\":{\"format\":\"organisation\"}},\"_source\":{\"includes\":[\"title\",\"link\"]},\"sort\":[\"_doc\"]}",
     ).to_return(
       body: { _scroll_id: "abcdefgh", hits: { total: 10, hits: [] } }.to_json,
@@ -160,7 +160,7 @@
 
   it "can count the documents without retrieving them all" do
     search_pattern = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=50&version=true"
-    stub_request(:get, search_pattern).with(
+    stub_request(:post, search_pattern).with(
       body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json,
     ).to_return(
       body: { _scroll_id: "abcdefgh", hits: { total: 100 } }.to_json,
@@ -172,7 +172,7 @@
   it "can retrieve all documents" do
     search_uri = "http://example.com:9200/government_test/_search?scroll=1m&search_type=query_then_fetch&size=50&version=true"
 
-    stub_request(:get, search_uri).with(
+    stub_request(:post, search_uri).with(
       body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json,
     ).to_return(
       body: { _scroll_id: "abcdefgh", hits: { total: 100, hits: [] } }.to_json,
@@ -202,7 +202,7 @@
 
     allow(described_class).to receive(:scroll_batch_size).and_return(2)
 
-    stub_request(:get, search_uri).with(
+    stub_request(:post, search_uri).with(
       body: { query: expected_all_documents_query, sort: %w[_doc] }.to_json,
     ).to_return(
       body: { _scroll_id: "abcdefgh", hits: { total: 3, hits: [] } }.to_json,
@@ -277,7 +277,7 @@ def build_government_index
 
   def stub_popularity_index_requests(paths, popularity, total_pages = 10, total_requested = total_pages, paths_to_return = paths)
     # stub the request for total results
-    stub_request(:get, "http://example.com:9200/page-traffic_test/generic-document/_search")
+    stub_request(:post, "http://example.com:9200/page-traffic_test/generic-document/_search")
       .with(body: { "query" => { "match_all" => {} }, "size" => 0 }.to_json)
       .to_return(
         body: { "hits" => { "total" => total_pages } }.to_json,
@@ -312,7 +312,7 @@ def stub_popularity_index_requests(paths, popularity, total_pages = 10, total_re
       },
     }
 
-    stub_request(:get, "http://example.com:9200/page-traffic_test/generic-document/_search")
+    stub_request(:post, "http://example.com:9200/page-traffic_test/generic-document/_search")
       .with(body: expected_query.to_json)
       .to_return(
         body: response.to_json,
diff --git a/spec/unit/time_based_index_cleanup_spec.rb b/spec/unit/time_based_index_cleanup_spec.rb
index b8aab8275..fc6089ced 100644
--- a/spec/unit/time_based_index_cleanup_spec.rb
+++ b/spec/unit/time_based_index_cleanup_spec.rb
@@ -152,7 +152,7 @@
       },
     }
 
-    stub_request(:get, %r{#{base_uri}/test(.*?)/_search})
+    stub_request(:post, %r{#{base_uri}/test(.*?)/_search})
       .with(
         body: expected_timed_delete_body,
       ).to_return(

From 1f9231097db6d383af6e224f2b413efaee01fd2c Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 14:33:10 +0000
Subject: [PATCH 4/8] Update expectations for mapping calls made to the
 Elasticsearch client.

When updating a mapping, the Elasticsearch client may now use either
of the following endpoint formats:

$HOSTNAME/{index}/_mapping/{type}
$HOSTNAME/{index}/{type}/_mappings

Both URL patterns are supported by Elasticsearch 6.8.
---
 spec/unit/index_spec.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spec/unit/index_spec.rb b/spec/unit/index_spec.rb
index 9e1242129..60c91f7f4 100644
--- a/spec/unit/index_spec.rb
+++ b/spec/unit/index_spec.rb
@@ -16,7 +16,7 @@
         },
       },
     }
-    stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/generic-document})
+    stub = stub_request(:put, %r{#{base_uri}/govuk-abc/generic-document/_mappings})
       .with(body: mappings["generic-document"])
       .to_return({
         status: 200,
@@ -28,7 +28,7 @@
       "type" => "illegal_argument_exception",
       "reason" => "invalid mapping",
     } }.to_json
-    failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/failing-document})
+    failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/failing-document/_mappings})
       .with(body: mappings["failing-document"])
       .to_return({
         status: 400,

From 15dc31332b7e31276ce9482092f177a369583bfd Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 14:40:02 +0000
Subject: [PATCH 5/8] Update expectations for scroll search calls made to the
 Elasticsearch client.

When performing a scroll search, the Elasticsearch client may now use
either of the following endpoint formats:

$HOSTNAME/_search/scroll/{scroll_id}?scroll=1m
$HOSTNAME/_search/scroll?scroll=1m&scroll_id={scroll_id}

Both URL patterns are supported by Elasticsearch 6.8.
---
 spec/unit/elasticsearch_index_spec.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/unit/elasticsearch_index_spec.rb b/spec/unit/elasticsearch_index_spec.rb
index d53657e9b..f17335571 100644
--- a/spec/unit/elasticsearch_index_spec.rb
+++ b/spec/unit/elasticsearch_index_spec.rb
@@ -259,7 +259,7 @@
 private
 
   def scroll_uri(scroll_id)
-    "http://example.com:9200/_search/scroll?scroll=1m&scroll_id=#{scroll_id}"
+    "http://example.com:9200/_search/scroll/#{scroll_id}?scroll=1m"
   end
 
   def scroll_response_body(scroll_id, total_results, results)

From ef6f24c7adbc680fbd58378e3a99fd9b004339e7 Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 15:08:49 +0000
Subject: [PATCH 6/8] Replace _uid references with _id

Although _uid is still present in Elasticsearch 6.8, it is deprecated. All usages have
been updated to _id to ensure forward compatibility with future Elasticsearch versions.

Reference:
https://www.elastic.co/guide/en/elasticsearch/reference/6.8/mapping-uid-field.html
---
 lib/indexer/compare_enumerator.rb          | 4 ++--
 lib/tasks/export.rake                      | 2 +-
 spec/integration/scroll_enumerator_spec.rb | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/indexer/compare_enumerator.rb b/lib/indexer/compare_enumerator.rb
index e4f4b2aa6..c5381348a 100644
--- a/lib/indexer/compare_enumerator.rb
+++ b/lib/indexer/compare_enumerator.rb
@@ -3,11 +3,11 @@ class CompareEnumerator < Enumerator
     NO_VALUE = :__no_value_found__
     BATCH_SIZE = 250
     DEFAULT_QUERY = { match_all: {} }.freeze
-    # sort by the document_type then the _uid, which is "type#id" -
+    # sort by the document_type then the _id, which is "type#id" -
     # sorting on the id directly is not possible, and the type will
     # always be "generic-document", which is why we first need to sort
     # by document_type.
-    DEFAULT_SORT = %i[document_type _uid].freeze
+    DEFAULT_SORT = %i[document_type _id].freeze
 
     def initialize(left_index_name, right_index_name, cluster = Clusters.default_cluster, search_body = {}, options = {})
       @cluster = cluster
diff --git a/lib/tasks/export.rake b/lib/tasks/export.rake
index e60b6f041..881d31c99 100644
--- a/lib/tasks/export.rake
+++ b/lib/tasks/export.rake
@@ -10,7 +10,7 @@ namespace :export do
                .transform_values { |v| [v] }
     search_params = SearchConfig.parse_parameters(params)
     query = search_params.search_config.generate_query_for_params(search_params)
-    query[:sort] = %i[document_type _uid]
+    query[:sort] = %i[document_type _id]
     fields = search_params.return_fields.uniq
     base_uri = search_params.search_config.base_uri
 
diff --git a/spec/integration/scroll_enumerator_spec.rb b/spec/integration/scroll_enumerator_spec.rb
index 50cca3174..350920a85 100644
--- a/spec/integration/scroll_enumerator_spec.rb
+++ b/spec/integration/scroll_enumerator_spec.rb
@@ -24,7 +24,7 @@
     results = ScrollEnumerator.new(
       client:,
       index_names: "govuk_test",
-      search_body: { query: { match_all: {} }, sort: [{ _uid: { order: "asc" } }] },
+      search_body: { query: { match_all: {} }, sort: [{ _id: { order: "asc" } }] },
       batch_size: 4,
     ) { |d| d }
 

From 07c76afd011938277004442828cd417335794257 Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 21:01:34 +0000
Subject: [PATCH 7/8] Adjust popularity scoring to handle missing values

Use a fallback when doc['popularity'] is empty by applying
POPULARITY_OFFSET directly; otherwise add the offset to the existing
value.  This is necessary because Elasticsearch 7+ requires the
property to exist if accessed

popularity data is missing.
---
 lib/search/query_components/popularity.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/search/query_components/popularity.rb b/lib/search/query_components/popularity.rb
index 568b80bf2..457038831 100644
--- a/lib/search/query_components/popularity.rb
+++ b/lib/search/query_components/popularity.rb
@@ -19,7 +19,7 @@ def default_popularity_boost(boosted_query)
           script_score: {
             script: {
               lang: "painless",
-              source: "doc['popularity'].value + #{POPULARITY_OFFSET}",
+              source: "doc['popularity'].size() == 0 ? #{POPULARITY_OFFSET} : doc['popularity'].value + #{POPULARITY_OFFSET}",
             },
           },
         },

From 8c926f4a488818cc545981d1d418e41f9dbb76dd Mon Sep 17 00:00:00 2001
From: koetsier <jos.koetsier@digital.cabinet-office.gov.uk>
Date: Tue, 24 Mar 2026 21:16:35 +0000
Subject: [PATCH 8/8] Flatten 'should' query in booster

Elasticsearch 7+ does not allow nested 'should' queries
---
 lib/search/query_components/booster.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/search/query_components/booster.rb b/lib/search/query_components/booster.rb
index 3701e029f..983934db9 100644
--- a/lib/search/query_components/booster.rb
+++ b/lib/search/query_components/booster.rb
@@ -12,7 +12,7 @@ def wrap(core_query)
           score_mode: :multiply,
           query: {
             bool: {
-              should: [core_query],
+              should: [core_query].flatten,
             },
           },
           functions: boost_filters,