From a909a474566d05e1b1e0babc2ad4bc059aeffe40 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 19 May 2022 13:04:06 -0500 Subject: [PATCH 01/62] upgrade Puma --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 1df5d39..22060bc 100644 --- a/Gemfile +++ b/Gemfile @@ -11,7 +11,7 @@ gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record gem 'sqlite3' # Use Puma as the app server -gem 'puma', '~> 3.7' +gem 'puma', '~> 4.3' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder # gem 'jbuilder', '~> 2.5' # Use Redis adapter to run Action Cable in production From 43bb2ba5c4d6e27247d01b4d2ae42bc796f1f9d3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 19 May 2022 13:13:40 -0500 Subject: [PATCH 02/62] add specific version of puma to avoid security warnings --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 22060bc..2d4b71e 100644 --- a/Gemfile +++ b/Gemfile @@ -11,7 +11,7 @@ gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record gem 'sqlite3' # Use Puma as the app server -gem 'puma', '~> 4.3' +gem 'puma', '>= 4.3.12' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder # gem 'jbuilder', '~> 2.5' # Use Redis adapter to run Action Cable in production From cd38c467b0a3373814207f1c8c7c416f0d8033e3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 25 May 2022 10:50:33 -0500 Subject: [PATCH 03/62] update to later version of puma --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 2d4b71e..9c639fc 100644 --- a/Gemfile +++ b/Gemfile @@ -11,7 +11,7 @@ gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record gem 'sqlite3' # Use Puma as the app server -gem 'puma', '>= 4.3.12' +gem 'puma', '>= 5.6' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder # gem 'jbuilder', '~> 2.5' # Use Redis adapter to run Action Cable in production From 866fae1ffe193de40158cee2efed6e484781cc8b Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 25 May 2022 10:51:14 -0500 Subject: [PATCH 04/62] another round of gem updates --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 07ee9b2..5bc4909 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -73,7 +73,7 @@ GEM globalid (1.0.0) activesupport (>= 5.0) http-accept (1.7.0) - http-cookie (1.0.4) + http-cookie (1.0.5) domain_name (~> 0.5) i18n (1.12.0) concurrent-ruby (~> 1.0) From 03b0030b6e0dd115be620ab409734fb747e7a8d3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 24 May 2022 13:40:03 -0500 Subject: [PATCH 05/62] add facet for matching nested facet --- app/services/search_item_req.rb | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index e9260c0..64c683b 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -104,6 +104,45 @@ def facets "order" => { f_type => dir }, } } + elsif f.include?("[") + #or nest it inside the next one? + #this will be the same + facet = f.split("[") + path = facet.split(".").first + condition = f[/(?<=\[).+?(?=\])/] + subject = condition.split(".").first + predicate = condition.split(".").last + aggs[f] = { + "nested" => { + "path" => path + }, + "aggs" => { + "query" => { + "term" => { + subject => predicate + } + }, + "aggs" => { + f => { + "terms" => { + "field" => facet, + "order" => { type => dir }, + "size" => size + }, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ f ] + }, + "size" => 1 + } + } + } + } + } + } + } # if nested, has extra syntax elsif f.include?(".") path = f.split(".").first From 7ce8727ac62777cc54ed87a03ab9fc737d1e9f81 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 24 May 2022 14:31:49 -0500 Subject: [PATCH 06/62] add filter for matching nested facet --- app/services/search_item_req.rb | 37 ++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 64c683b..c58036d 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -104,9 +104,8 @@ def facets "order" => { f_type => dir }, } } + #nested facet, matching on another nested facet elsif f.include?("[") - #or nest it inside the next one? - #this will be the same facet = f.split("[") path = facet.split(".").first condition = f[/(?<=\[).+?(?=\])/] @@ -143,7 +142,7 @@ def facets } } } - # if nested, has extra syntax + # ordinary nested facet elsif f.include?(".") path = f.split(".").first aggs[f] = { @@ -200,8 +199,36 @@ def filters # (type 2 will only be used for dates) filters = fields.map {|f| f.split(@@filter_separator, 3) } filters.each do |filter| - # NESTED FIELD FILTER - if filter[0].include?(".") + # NESTED matching + if filter[0].include?("[") + facet = f.split("[") + path = facet.split(".").first + condition = f[/(?<=\[).+?(?=\])/] + subject = condition.split(".").first + predicate = condition.split(".").last + # this is a nested field and must be treated differently + nested = { + "nested" => { + + "path" => path, + "query" => { + "bool" => { + "must" => { + "term" => { + # "person.name" => "oliver wendell holmes" + # Remove CR's added by hidden input field values with returns + facet => filter[1].gsub(/\r/, "") + # "person.role" => "judge" + subject => predicate + } + } + } + } + } + } + filter_list << nested + #ordinary nested facet + elsif filter[0].include?(".") path = filter[0].split(".").first # this is a nested field and must be treated differently nested = { From 5073460db4d1eebe41947e2628c78bdc9fdae09c Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 24 May 2022 17:26:08 -0500 Subject: [PATCH 07/62] change split character, add missing comma --- app/services/search_item_req.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index c58036d..f79894e 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -109,8 +109,8 @@ def facets facet = f.split("[") path = facet.split(".").first condition = f[/(?<=\[).+?(?=\])/] - subject = condition.split(".").first - predicate = condition.split(".").last + subject = condition.split("|").first + predicate = condition.split("|").last aggs[f] = { "nested" => { "path" => path @@ -204,8 +204,8 @@ def filters facet = f.split("[") path = facet.split(".").first condition = f[/(?<=\[).+?(?=\])/] - subject = condition.split(".").first - predicate = condition.split(".").last + subject = condition.split("|").first + predicate = condition.split("|").last # this is a nested field and must be treated differently nested = { "nested" => { @@ -217,7 +217,7 @@ def filters "term" => { # "person.name" => "oliver wendell holmes" # Remove CR's added by hidden input field values with returns - facet => filter[1].gsub(/\r/, "") + facet => filter[1].gsub(/\r/, ""), # "person.role" => "judge" subject => predicate } From a4a0c719f94456ce844a2b3da468529ad94e35d9 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 26 May 2022 11:24:05 -0500 Subject: [PATCH 08/62] parse the array for matching nested fields --- app/services/search_item_req.rb | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index f79894e..d63b755 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -84,7 +84,6 @@ def facets Array.wrap(@params["facet"]).each do |f| # histograms use a different ordering terminology than normal aggs f_type = type == "_term" ? "_key" : "_count" - if f.include?("date") || f[/_d$/] # NOTE: if nested fields will ever have dates we will # need to refactor this to be available to both @@ -105,13 +104,18 @@ def facets } } #nested facet, matching on another nested facet + elsif f.include?("[") - facet = f.split("[") - path = facet.split(".").first - condition = f[/(?<=\[).+?(?=\])/] + # will be an array including the original, and an alternate aggregation name + options = JSON.parse(f) + agg_name = options[1] + original = options[2] + facet = original.split("[") + path = original.split(".").first + condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last - aggs[f] = { + aggs[agg_name] = { "nested" => { "path" => path }, @@ -122,7 +126,7 @@ def facets } }, "aggs" => { - f => { + agg_name => { "terms" => { "field" => facet, "order" => { type => dir }, @@ -132,7 +136,7 @@ def facets "top_matches" => { "top_hits" => { "_source" => { - "includes" => [ f ] + "includes" => [ agg_name ] }, "size" => 1 } @@ -201,9 +205,11 @@ def filters filters.each do |filter| # NESTED matching if filter[0].include?("[") - facet = f.split("[") - path = facet.split(".").first - condition = f[/(?<=\[).+?(?=\])/] + options = JSON.parse(f) + original = options[2] + facet = original.split("[") + path = original.split(".").first + condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last # this is a nested field and must be treated differently From eb5dd1b5382031cea3a506e77da6870e4b8572af Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 26 May 2022 16:21:47 -0500 Subject: [PATCH 09/62] change how compound facet name is parsed --- app/services/search_item_req.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index d63b755..e8231a0 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -108,10 +108,10 @@ def facets elsif f.include?("[") # will be an array including the original, and an alternate aggregation name options = JSON.parse(f) + original = options[0] agg_name = options[1] - original = options[2] - facet = original.split("[") - path = original.split(".").first + facet = original.split("[")[0] + path = facet.split(".").first condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last @@ -206,9 +206,9 @@ def filters # NESTED matching if filter[0].include?("[") options = JSON.parse(f) - original = options[2] - facet = original.split("[") - path = original.split(".").first + original = options[1] + facet = original.split("[")[0] + path = facet.split(".").first condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last From 896177bf2adc18ccd06eb5223d858700bba33774 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 26 May 2022 16:22:44 -0500 Subject: [PATCH 10/62] use facet name as agg name this is not the most semantically correct, but it doesn't break anything and orchid seems to expect it --- app/services/search_item_req.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index e8231a0..c30c9ea 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -115,7 +115,7 @@ def facets condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last - aggs[agg_name] = { + aggs[f] = { "nested" => { "path" => path }, @@ -126,7 +126,7 @@ def facets } }, "aggs" => { - agg_name => { + f => { "terms" => { "field" => facet, "order" => { type => dir }, From 06be739d43018bb8069f266dabd5f2520d0d6371 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 26 May 2022 16:23:20 -0500 Subject: [PATCH 11/62] change query to filter this is necessary to make this sort of query work --- app/services/search_item_req.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index c30c9ea..0981253 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -120,7 +120,7 @@ def facets "path" => path }, "aggs" => { - "query" => { + "filter" => { "term" => { subject => predicate } From 89e50635ebe3ea0a3ab7cdbd1ddba41ecd099d5a Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 27 May 2022 12:31:31 -0500 Subject: [PATCH 12/62] fix nested filter aggregation so it doesn't cause 400 error --- app/services/search_item_req.rb | 44 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 0981253..a3aa58d 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -115,30 +115,32 @@ def facets condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("|").first predicate = condition.split("|").last - aggs[f] = { + aggs[agg_name] = { "nested" => { "path" => path }, "aggs" => { - "filter" => { - "term" => { - subject => predicate - } - }, - "aggs" => { - f => { - "terms" => { - "field" => facet, - "order" => { type => dir }, - "size" => size - }, - "aggs" => { - "top_matches" => { - "top_hits" => { - "_source" => { - "includes" => [ agg_name ] - }, - "size" => 1 + agg_name => { + "filter" => { + "term" => { + subject => predicate + } + }, + "aggs" => { + agg_name => { + "terms" => { + "field" => facet, + "order" => { type => dir }, + "size" => size + }, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ agg_name ] + }, + "size" => 1 + } } } } @@ -220,7 +222,7 @@ def filters "query" => { "bool" => { "must" => { - "term" => { + "terms" => { # "person.name" => "oliver wendell holmes" # Remove CR's added by hidden input field values with returns facet => filter[1].gsub(/\r/, ""), From 5c43688fd0233dc6df7b093308693c369f124068 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 31 May 2022 12:14:30 -0500 Subject: [PATCH 13/62] check for deeper nesting of buckets --- app/services/search_item_res.rb | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index a82a199..04843d9 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -18,7 +18,6 @@ def build_response # strip out only the fields for the item response items = combine_highlights facets = reformat_facets - { "code" => 200, "count" => count, @@ -89,8 +88,7 @@ def reformat_facets facets = {} raw_facets.each do |field, info| facets[field] = {} - # nested fields do not have buckets at this level of response structure - buckets = info.key?("buckets") ? info["buckets"] : info.dig(field, "buckets") + buckets = get_buckets(info, field) if buckets buckets.each { |b| format_bucket_value(facets, field, b) } else @@ -110,4 +108,18 @@ def remove_nonword_chars(term) transliterated.gsub(/<\/?(?:em|strong|u)>|\W/, "").downcase end + def get_buckets(info, field) + buckets = nil + # ordinary facet + if info.key?("buckets") + buckets = info["buckets"] + # nested facet + elsif info.dig(field, "buckets") + buckets = info.dig(field, "buckets") + # filtered facet + else + buckets = info.dig(field, field, "buckets") + end + buckets + end end From 2c8b64a2359105aec57a846537d1030fc3af728d Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 1 Jun 2022 09:35:50 -0500 Subject: [PATCH 14/62] Change separator --- app/services/search_item_req.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index a3aa58d..e647119 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -113,8 +113,8 @@ def facets facet = original.split("[")[0] path = facet.split(".").first condition = original[/(?<=\[).+?(?=\])/] - subject = condition.split("|").first - predicate = condition.split("|").last + subject = condition.split("#").first + predicate = condition.split("#").last aggs[agg_name] = { "nested" => { "path" => path @@ -212,8 +212,8 @@ def filters facet = original.split("[")[0] path = facet.split(".").first condition = original[/(?<=\[).+?(?=\])/] - subject = condition.split("|").first - predicate = condition.split("|").last + subject = condition.split("#").first + predicate = condition.split("#").last # this is a nested field and must be treated differently nested = { "nested" => { From b2026a34738c57b81fa5738eb014e1fc46dec123 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 1 Jun 2022 09:36:21 -0500 Subject: [PATCH 15/62] Fix parsing and query for filter matching --- app/services/search_item_req.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index e647119..2c39b5b 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -205,10 +205,9 @@ def filters # (type 2 will only be used for dates) filters = fields.map {|f| f.split(@@filter_separator, 3) } filters.each do |filter| - # NESTED matching + # filter aggregation with nesting if filter[0].include?("[") - options = JSON.parse(f) - original = options[1] + original = filter[0] facet = original.split("[")[0] path = facet.split(".").first condition = original[/(?<=\[).+?(?=\])/] @@ -217,17 +216,14 @@ def filters # this is a nested field and must be treated differently nested = { "nested" => { - "path" => path, "query" => { "bool" => { "must" => { - "terms" => { + "term" => { # "person.name" => "oliver wendell holmes" # Remove CR's added by hidden input field values with returns - facet => filter[1].gsub(/\r/, ""), - # "person.role" => "judge" - subject => predicate + facet => filter[1].gsub(/\r/, "") } } } From f44188c75882a18bc9e8e6272e886e2432d7841f Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 2 Jun 2022 15:04:58 -0500 Subject: [PATCH 16/62] rewrite filtered aggregation to be either nested or not --- app/services/search_item_req.rb | 71 +++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 2c39b5b..81bd0ae 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -107,48 +107,61 @@ def facets elsif f.include?("[") # will be an array including the original, and an alternate aggregation name + + options = JSON.parse(f) original = options[0] agg_name = options[1] facet = original.split("[")[0] - path = facet.split(".").first + # may or may not be nested + nested = facet.include?(".") + if nested + path = facet.split(".").first + end condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last - aggs[agg_name] = { - "nested" => { - "path" => path - }, - "aggs" => { - agg_name => { - "filter" => { - "term" => { - subject => predicate - } - }, - "aggs" => { - agg_name => { - "terms" => { - "field" => facet, - "order" => { type => dir }, - "size" => size - }, - "aggs" => { - "top_matches" => { - "top_hits" => { - "_source" => { - "includes" => [ agg_name ] - }, - "size" => 1 - } + aggregation = { + # common to nested and non-nested + "filter" => { + "term" => { + subject => predicate + } + }, + "aggs" => { + agg_name => { + "terms" => { + "field" => facet, + "order" => { type => dir }, + "size" => size + }, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ agg_name ] + }, + "size" => 1 } } } } } } - } - # ordinary nested facet + #interpolate above hash into nested query + if nested + aggs[agg_name] = { + "nested" => { + "path" => path + }, + "aggs" => { + agg_name => aggregation + } + } + else + #otherwise it is the whole query + aggs[agg_name] = aggregation + end elsif f.include?(".") path = f.split(".").first aggs[f] = { From d81c070384b9f0fb728184de23de4bbbd6a15ef3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 2 Jun 2022 16:05:44 -0500 Subject: [PATCH 17/62] filtering on a single item can either be nested or not --- app/services/search_item_req.rb | 35 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 81bd0ae..cd51f8f 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -222,28 +222,33 @@ def filters if filter[0].include?("[") original = filter[0] facet = original.split("[")[0] - path = facet.split(".").first + nested = facet.include?(".") + if nested + path = facet.split(".").first + end condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last - # this is a nested field and must be treated differently - nested = { - "nested" => { - "path" => path, - "query" => { - "bool" => { - "must" => { - "term" => { - # "person.name" => "oliver wendell holmes" - # Remove CR's added by hidden input field values with returns - facet => filter[1].gsub(/\r/, "") - } + query = { + "term" => { + # "person.name" => "oliver wendell holmes" + # Remove CR's added by hidden input field values with returns + facet => filter[1].gsub(/\r/, "") + } + } + if nested + query = { + "nested" => { + "path" => path, + "query" => { + "bool" => { + "must" => query } } } } - } - filter_list << nested + end + filter_list << query #ordinary nested facet elsif filter[0].include?(".") path = filter[0].split(".").first From 322504d30a067ac3159440983424271ef1ec1bfb Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 26 Sep 2022 10:15:19 -0500 Subject: [PATCH 18/62] update config for server --- .ruby-gemset | 2 +- config/environments/development.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.ruby-gemset b/.ruby-gemset index eedd89b..fcf5595 100644 --- a/.ruby-gemset +++ b/.ruby-gemset @@ -1 +1 @@ -api +api-v2 diff --git a/config/environments/development.rb b/config/environments/development.rb index fc3ea89..b41968b 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -73,4 +73,5 @@ # CDRH CONFIGURATION config.hosts << "cdrhdev1.unl.edu" + config.hosts << "whitman-dev.unl.edu" end From 15c3c90477504b6b65d8bb38d88e5760678c8f52 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 19 Oct 2022 12:03:51 -0500 Subject: [PATCH 19/62] revise query to match both the facet and the filter --- app/services/search_item_req.rb | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index cd51f8f..b34a75a 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -229,12 +229,13 @@ def filters condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last - query = { - "term" => { - # "person.name" => "oliver wendell holmes" - # Remove CR's added by hidden input field values with returns - facet => filter[1].gsub(/\r/, "") - } + term_match = { + # "person.name" => "oliver wendell holmes" + # Remove CR's added by hidden input field values with returns + facet => filter[1].gsub(/\r/, "") + } + term_filter = { + subject => predicate } if nested query = { @@ -242,7 +243,10 @@ def filters "path" => path, "query" => { "bool" => { - "must" => query + "must" => [ + { "match" => term_filter }, + { "match" => term_match } + ] } } } From 5977f2b0f4d7afc65849159032ed7be5ae8a468b Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 20 Oct 2022 09:57:32 -0500 Subject: [PATCH 20/62] use reverse nested agg for correct item count --- app/services/search_item_req.rb | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index b34a75a..e298836 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -51,6 +51,8 @@ def build_request # add bool to request body req["query"]["bool"] = bool + # uncomment below line to log ES query for debugging + # puts req.to_json() return req end @@ -136,12 +138,17 @@ def facets "size" => size }, "aggs" => { - "top_matches" => { - "top_hits" => { - "_source" => { - "includes" => [ agg_name ] - }, - "size" => 1 + "field_to_item" => { + "reverse_nested" => {}, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ agg_name ] + }, + "size" => 1 + } + } } } } From b08f408d233388a4fdb4ae4a4a062f0aa3f555c2 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 20 Oct 2022 10:01:28 -0500 Subject: [PATCH 21/62] used doc_count from reverse nested if it exists --- app/services/search_item_res.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 04843d9..05210e7 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -65,7 +65,7 @@ def format_bucket_value(facets, field, bucket) # dates return in wonktastic ways, so grab key_as_string instead of gibberish number # but otherwise just grab the key if key_as_string unavailable key = bucket.key?("key_as_string") ? bucket["key_as_string"] : bucket["key"] - val = bucket["doc_count"] + val = bucket.key?("field_to_item") ? bucket["field_to_item"]["doc_count"] : bucket["doc_count"] source = key # top_matches is a top_hits aggregation which returns a list of terms # which were used for the facet. From f399f9a818f52ac3ce3a137d7b4ff05316d75634 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 21 Oct 2022 13:23:57 -0500 Subject: [PATCH 22/62] change key for new elasticsearch version --- app/services/search_item_req.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index e298836..497c84a 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -99,7 +99,7 @@ def facets aggs[f] = { "date_histogram" => { "field" => field, - "interval" => interval, + "calendar_interval" => interval, "format" => formatted, "min_doc_count" => 1, "order" => { f_type => dir }, From 72abce3cf4c15c436e4caa40e51d9c2a5ca39979 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 24 Oct 2022 12:51:22 -0500 Subject: [PATCH 23/62] change order query to avoid deprecated '_term' --- app/services/search_item_req.rb | 10 +++++----- test/services/search_item_req_test.rb | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 497c84a..56e6453 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -74,7 +74,7 @@ def facets dir = "desc" if @params["facet_sort"].present? sort_type, sort_dir = @params["facet_sort"].split(@@filter_separator) - type = "_term" if sort_type == "term" + type = "term" if sort_type == "term" dir = sort_dir if sort_dir == "asc" end @@ -85,7 +85,7 @@ def facets aggs = {} Array.wrap(@params["facet"]).each do |f| # histograms use a different ordering terminology than normal aggs - f_type = type == "_term" ? "_key" : "_count" + f_type = (type == "term") ? "_key" : "_count" if f.include?("date") || f[/_d$/] # NOTE: if nested fields will ever have dates we will # need to refactor this to be available to both @@ -134,7 +134,7 @@ def facets agg_name => { "terms" => { "field" => facet, - "order" => { type => dir }, + "order" => {f_type => dir}, "size" => size }, "aggs" => { @@ -179,7 +179,7 @@ def facets f => { "terms" => { "field" => f, - "order" => { type => dir }, + "order" => {f_type => dir}, "size" => size }, "aggs" => { @@ -199,7 +199,7 @@ def facets aggs[f] = { "terms" => { "field" => f, - "order" => { type => dir }, + "order" => { f_type => dir }, "size" => size }, "aggs" => { diff --git a/test/services/search_item_req_test.rb b/test/services/search_item_req_test.rb index 29bf323..c4d8197 100644 --- a/test/services/search_item_req_test.rb +++ b/test/services/search_item_req_test.rb @@ -44,7 +44,7 @@ def test_facets "facet" => [ "title", "subcategory" ] }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"asc"}, "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>{"_term"=>"asc"}, "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["subcategory"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"asc", "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>"asc", "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["subcategory"]}, "size"=>1}}}}}, facets ) @@ -69,7 +69,7 @@ def test_facets "facet" => [ "creator.name" ] }).facets assert_equal( - {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["creator.name"]}, "size"=>1}}}}}}}, + {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["creator.name"]}, "size"=>1}}}}}}}, facets ) @@ -83,14 +83,14 @@ def test_facets # sort term order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term|desc" }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, facets ) # sort term no order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term" }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, facets ) From db643190ba8a71a4775aa9e23208e66852c1f329 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 26 Oct 2022 09:06:39 -0500 Subject: [PATCH 24/62] gitignore master key --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 76fb9c4..a3bd71e 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ bower.json .byebug_history .DS_Store + +/config/master.key From 59835ed8c7ec5d404f0bba710284b3c310ba3e63 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 26 Oct 2022 09:26:15 -0500 Subject: [PATCH 25/62] add basic auth to elasticsearch requests --- app/controllers/application_controller.rb | 3 ++- app/services/search_service.rb | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 15ff0d8..e97e006 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -3,7 +3,8 @@ class ApplicationController < ActionController::API def post_search(json, error_method=method(:display_error)) - res = RestClient.post("#{ES_URI}/_search", json.to_json, { "content-type" => "json" }) + auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{ES_USER}:#{ES_PASSWORD}")}" } + res = RestClient.post("#{ES_URI}/_search", json.to_json, auth_hash.merge({ "content-type" => "json" })) raise return JSON.parse(res.body) rescue => e diff --git a/app/services/search_service.rb b/app/services/search_service.rb index dbd8877..319b97b 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -11,7 +11,8 @@ def initialize(url, params={}, user_req) end def post(url_ending, json) - res = RestClient.post("#{@url}/#{url_ending}", json.to_json, { "content-type" => "json" } ) + auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{Rails.application.credentials.elasticsearch[:user]}:#{Rails.application.credentials.elasticsearch[:password]}")}" } + res = RestClient.post("#{@url}/#{url_ending}", json.to_json, auth_hash.merge({ "content-type" => "json" } )) JSON.parse(res.body) rescue => e e From 419bc8b7d377533cdb65dda31e5d65a3a1ba25b5 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 28 Oct 2022 11:58:55 -0500 Subject: [PATCH 26/62] raise number of results per facet --- config/config.example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.example.yml b/config/config.example.yml index a06b13c..c0ce5d4 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -18,7 +18,7 @@ default: &default # highlight fragment number hl_num: 3 # number of results returned per "page" - num: 20 + num: 5000 # default sorting field and direction (field|asc) sort_fl: "identifier|asc" # starting document in results list (0 == 1st result) From bd92c4996eb318119a58c73a287b9631cd2fc85b Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 31 Oct 2022 11:40:32 -0500 Subject: [PATCH 27/62] use facet_limit instead of facet_num to match Orchid --- app/services/search_item_req.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 56e6453..749af2f 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -79,8 +79,7 @@ def facets end # FACET_SETTINGS["start"] - size = SETTINGS["num"] - size = @params["facet_num"].blank? ? SETTINGS["num"] : @params["facet_num"] + size = @params["facet_limit"].blank? ? SETTINGS["num"] : @params["facet_limit"] aggs = {} Array.wrap(@params["facet"]).each do |f| From f0653a9e82eda87bf02024865df89351188a7732 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 31 Oct 2022 11:40:50 -0500 Subject: [PATCH 28/62] revert, will set in Orchid --- config/config.example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.example.yml b/config/config.example.yml index c0ce5d4..a06b13c 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -18,7 +18,7 @@ default: &default # highlight fragment number hl_num: 3 # number of results returned per "page" - num: 5000 + num: 20 # default sorting field and direction (field|asc) sort_fl: "identifier|asc" # starting document in results list (0 == 1st result) From 19f789f61708633dbc32944b77ab9aef77744545 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 1 Nov 2022 15:59:45 -0500 Subject: [PATCH 29/62] change facet_num to facet_limit --- test/services/search_item_req_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/services/search_item_req_test.rb b/test/services/search_item_req_test.rb index c4d8197..a5b3eab 100644 --- a/test/services/search_item_req_test.rb +++ b/test/services/search_item_req_test.rb @@ -39,7 +39,7 @@ def test_facets # normal with pagination overrides, multiple facets facets = SearchItemReq.new({ - "facet_num" => 10, + "facet_limit" => 10, "facet_sort" => "term|asc", "facet" => [ "title", "subcategory" ] }).facets @@ -50,7 +50,7 @@ def test_facets # should be blank if there are no facets provided facets = SearchItemReq.new({ - "facet_num" => 1, + "facet_limit" => 1, "facet_sort" => "nonterm|asc", "facet" => [] }).facets From e922ed7272af82429896c68a093f49e01c884cd3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 10 Nov 2022 09:56:24 -0600 Subject: [PATCH 30/62] update nested facets documentation --- docs/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/README.md b/docs/README.md index 8aa9ad6..cd2fc90 100644 --- a/docs/README.md +++ b/docs/README.md @@ -50,6 +50,12 @@ __Nested fields__ facet[]=creator.name facet[]=creator.name&facet[]=creator.role ``` +you can also match on another nested field with the new API schema +`facet[]=nested_field.keyword_field1[nested_field.keyword_field2#value]` +``` +facet[]=person.name[person.role#judge] +``` +the above will select all names of persons, where the role of that person is "judge". __Date ranges__ (currently supports days or years) From 5eb6476fee81dd168d5cd90bdae8917cf8be200e Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 10 Nov 2022 13:55:56 -0600 Subject: [PATCH 31/62] add links to more detailed documentation --- CHANGELOG.md | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03ef8d2..e91ee79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,84 +27,125 @@ Markdown Spec](https://github.github.com/gfm/). ### Contributors --> + ## [2.0.0] - new nested bucket aggregation/query functionality for Habeas release -[Unreleased]: https://github.com/CDRH/api/compare/v1.0.4...dev + +[unreleased]: https://github.com/CDRH/api/compare/v1.0.4...dev ### Added + - "api_version" added to all response "res" objects - support for elasticsearch 8.5 - user/password basic authentication with ES 8.5, when querying the index or posting from Datura - better support for nested fields - support for nested bucket aggregations, matching a nested value on another nested value. `person.name[person.role#judge]` will return all names of persons where role="judge". - "api_version" added to all response "res" objects + +### Changed + +- upgraded to Rails 6.1.7 and Ruby 3 +- changes reflect new api schemas in Datura, which make heavy use of nested fields - Added support for aggregating buckets by normalized keyword and returning the "top_hits" first document result for a non-normalized display - Changes response format of `facets` key - + From: + ``` "facets": { "WILLA CATHER": 10, "Willa Cather": 50 } ``` + To: + ``` "facets": { "willa cather": { "num" : 60, source: "Willa Cather" } } ``` + Not only is the response format itself different, but there may be fewer facets returned since normalized values which match are combined + ### Changed + - upgraded to Rails 6.1.7 and Ruby 3 - changes reflect new api schemas in Datura, which make heavy use of nested fields ### Migration + - in Datura repos config `private.yml` api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. - Use Elasticsearch 8.5 or later - If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add + ``` elasticsearch: user: username password: ***** ``` + to the secrets file and then close the window to save. Do not commit `config/master.key` (it should be in `gitignore`) + - Orchid apps that connect to the API should use `facet_limit` instead of `facet_num` in options. - Add nested facets as described above, if desired +### Migration + +- in Datura repos config `private.yml` api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/schema_v2.md) +- Use Elasticsearch 8.5 or later. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). +- If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add + +``` +elasticsearch: + user: username + password: ***** +``` + +to the secrets file and then close the window to save. Do not commit `config/master.key` (it should be in `gitignore`) + +- Orchid apps that connect to the API should use `facet_limit` instead of `facet_num` in options. +- Add nested facets as described above, if desired. ## [v1.0.4](https://github.com/CDRH/api/compare/v1.0....v1.0.4) - Updates & license ### Changed + - Updated Ruby version, gems (which addresses mimemagic dependency problem), and -license added + license added ### Added + - Documentation on facets and highlighting ## [v1.0.3](https://github.com/CDRH/api/compare/v1.0.2...v1.0.3) - gem updates ### Changed + - updates to rails and other gems ## [v1.0.2](https://github.com/CDRH/api/compare/v1.0.1...v1.0.2) - escapes and sorting ### Fixed + - question mark and asterisk behavior in queries - order of expected, actual in tests - sort behavior for relevancy ### Added + - support for multivalued and nested field sorting - documentation moved back into apium from henbit location in order to version it with software ### Changed + - ruby, rails, and other gem versions ## [v1.0.1](https://github.com/CDRH/api/compare/v1.00...v1.0.1) - version 1.0.1 ### Changed + - ruby, rails, and other gem versions - version moved to initializer @@ -113,4 +154,3 @@ license added ### Contributors - Jessica Dussault (jduss4) - From 4e2638e9407e6f86c5f2dc880eed24f6ce4a7c2b Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 10 Nov 2022 14:24:33 -0600 Subject: [PATCH 32/62] clarify --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e91ee79..fc011ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,8 +76,8 @@ Markdown Spec](https://github.github.com/gfm/). ### Migration -- in Datura repos config `private.yml` api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. -- Use Elasticsearch 8.5 or later +- in the config files of your Datura repos, (`private.yml` or `public.yml`, set the api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/schema_v2.md). +- Use Elasticsearch 8.5 or later. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). - If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add ``` From 90281ed9ae7a697e0f963aed3d6d309174c60e99 Mon Sep 17 00:00:00 2001 From: wkdewey Date: Mon, 19 Dec 2022 09:26:38 -0600 Subject: [PATCH 33/62] update gems --- Gemfile.lock | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 5bc4909..bb5cd5f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -60,12 +60,13 @@ GEM minitest (>= 5.1) tzinfo (~> 2.0) zeitwerk (~> 2.3) - bootsnap (1.13.0) + bootsnap (1.15.0) msgpack (~> 1.2) builder (3.2.4) byebug (11.1.3) concurrent-ruby (1.1.10) crass (1.0.6) + date (3.3.2) domain_name (0.5.20190701) unf (>= 0.0.5, < 1.0.0) erubi (1.11.0) @@ -80,11 +81,14 @@ GEM listen (3.0.8) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - loofah (2.19.0) + loofah (2.19.1) crass (~> 1.0.2) nokogiri (>= 1.5.9) - mail (2.7.1) + mail (2.8.0) mini_mime (>= 0.1.1) + net-imap + net-pop + net-smtp marcel (1.0.2) method_source (1.0.0) mime-types (3.4.1) @@ -93,17 +97,22 @@ GEM mini_mime (1.1.2) minitest (5.16.3) msgpack (1.6.0) - net-protocol (0.1.3) + net-imap (0.3.2) + date + net-protocol + net-pop (0.1.2) + net-protocol + net-protocol (0.2.1) timeout net-smtp (0.3.3) net-protocol netrc (0.11.0) nio4r (2.5.8) - nokogiri (1.13.9-x86_64-darwin) + nokogiri (1.13.10-x86_64-darwin) racc (~> 1.4) puma (6.0.0) nio4r (~> 2.0) - racc (1.6.0) + racc (1.6.1) rack (2.2.4) rack-test (2.0.2) rack (>= 1.3) @@ -125,8 +134,8 @@ GEM rails-dom-testing (2.0.3) activesupport (>= 4.2.0) nokogiri (>= 1.6) - rails-html-sanitizer (1.4.3) - loofah (~> 2.3) + rails-html-sanitizer (1.4.4) + loofah (~> 2.19, >= 2.19.1) railties (6.1.7) actionpack (= 6.1.7) activesupport (= 6.1.7) @@ -153,9 +162,9 @@ GEM actionpack (>= 5.2) activesupport (>= 5.2) sprockets (>= 3.0.0) - sqlite3 (1.5.3-x86_64-darwin) + sqlite3 (1.5.4-x86_64-darwin) thor (1.2.1) - timeout (0.3.0) + timeout (0.3.1) tzinfo (2.0.5) concurrent-ruby (~> 1.0) unf (0.1.4) @@ -168,6 +177,7 @@ GEM PLATFORMS x86_64-darwin-21 + x86_64-darwin-22 DEPENDENCIES bootsnap @@ -183,4 +193,4 @@ DEPENDENCIES tzinfo-data BUNDLED WITH - 2.3.7 + 2.3.26 From e52ba03307d2497fdca809af96d91aba30111690 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 25 May 2023 09:15:54 -0500 Subject: [PATCH 34/62] use reverse nested on simple nested aggregations fixes #141 --- app/services/search_item_req.rb | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 749af2f..b76f5d3 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -182,12 +182,17 @@ def facets "size" => size }, "aggs" => { - "top_matches" => { - "top_hits" => { - "_source" => { - "includes" => [ f ] - }, - "size" => 1 + "field_to_item" => { + "reverse_nested" => {}, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ agg_name ] + }, + "size" => 1 + } + } } } } From 6e92258bb41af2eca94af68673d8fb5997627d8f Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 26 May 2023 08:13:17 -0500 Subject: [PATCH 35/62] fix elasticsearch errors --- app/services/search_item_req.rb | 2 +- app/services/search_item_res.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index b76f5d3..1475e87 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -188,7 +188,7 @@ def facets "top_matches" => { "top_hits" => { "_source" => { - "includes" => [ agg_name ] + "includes" => [ f ] }, "size" => 1 } diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 05210e7..49b9c92 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -78,7 +78,7 @@ def format_bucket_value(facets, field, bucket) end facets[field][key] = { "num" => val, - "source" => source + "source" => source.to_s } end From f8cc0648ab949d998ec7e029a64d00da70b96d27 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 21 Jul 2023 14:05:26 -0500 Subject: [PATCH 36/62] titleize bucket values because ES automatically lowercases them --- app/services/search_item_res.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 49b9c92..741e736 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -49,9 +49,9 @@ def find_source_from_top_hits(top_hits, field, key) if hit.class == Array # I don't love this, because we will have to match exactly the logic # that got us the key to get this to work - match_index = hit - .map { |s| remove_nonword_chars(s) } - .index(remove_nonword_chars(key)) + match_index = hit + .map { |s| remove_nonword_chars(s) } + .index(remove_nonword_chars(key)) # if nothing matches the original key, return the entire source hit # should return a string, regardless return match_index ? hit[match_index] : hit.join(" ") @@ -64,7 +64,7 @@ def find_source_from_top_hits(top_hits, field, key) def format_bucket_value(facets, field, bucket) # dates return in wonktastic ways, so grab key_as_string instead of gibberish number # but otherwise just grab the key if key_as_string unavailable - key = bucket.key?("key_as_string") ? bucket["key_as_string"] : bucket["key"] + key = bucket.key?("key_as_string") ? bucket["key_as_string"].titleize : bucket["key"].titleize val = bucket.key?("field_to_item") ? bucket["field_to_item"]["doc_count"] : bucket["doc_count"] source = key # top_matches is a top_hits aggregation which returns a list of terms From 013df32d19c603a899ba13e220f3fb1f995c56f3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 15 May 2023 10:56:09 -0500 Subject: [PATCH 37/62] display total search hits (above 10,000) fixes #139 --- app/services/search_item_req.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 1475e87..f8b44da 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -17,6 +17,7 @@ def build_request start = @params["start"].blank? ? SETTINGS["start"] : @params["start"] req = { + "track_total_hits": true, "aggs" => {}, "from" => start, "highlight" => {}, From f72c307ebdddc336edb9a405019ab2bc5ae11dd5 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 6 Aug 2024 16:50:06 -0500 Subject: [PATCH 38/62] fix display of nested field facets Need to adjust for new Elasticsearch hash formatting when finding the non-normalized facet source name fixes #145 --- app/services/search_item_res.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 741e736..0ff691d 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -41,17 +41,18 @@ def combine_highlights def find_source_from_top_hits(top_hits, field, key) # elasticsearch stores nested source results without the "path" + parent = field.split(".").first nested_child = field.split(".").last - hit = top_hits.first.dig("_source", nested_child) + hit = top_hits.first.dig("_source", parent).map { |i| i[nested_child] }.compact # if this is a multivalued field (for example: works or places), # ALL of the values come back as the source, but we only want # the single value from which the key was derived if hit.class == Array # I don't love this, because we will have to match exactly the logic # that got us the key to get this to work - match_index = hit - .map { |s| remove_nonword_chars(s) } - .index(remove_nonword_chars(key)) + match_index = hit + .map { |s| remove_nonword_chars(s) } + .index(remove_nonword_chars(key)) # if nothing matches the original key, return the entire source hit # should return a string, regardless return match_index ? hit[match_index] : hit.join(" ") @@ -72,7 +73,8 @@ def format_bucket_value(facets, field, bucket) # Example: "Willa Cather" and "WILLA CATHER" # Those terms will both have been normalized as "willa cather" but # we will want to display one of the non-normalized terms instead - top_hits = bucket.dig("top_matches", "hits", "hits") + + top_hits = bucket.dig("field_to_item", "top_matches", "hits", "hits") if top_hits source = find_source_from_top_hits(top_hits, field, key) end From f11a5d787b8149d5bfe67e8cdd13bbf7ca7b9309 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 20 Aug 2024 09:45:35 -0500 Subject: [PATCH 39/62] handle un-normalization of values in case of array rather than string values --- app/services/search_item_res.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 0ff691d..82eddae 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -43,11 +43,12 @@ def find_source_from_top_hits(top_hits, field, key) # elasticsearch stores nested source results without the "path" parent = field.split(".").first nested_child = field.split(".").last - hit = top_hits.first.dig("_source", parent).map { |i| i[nested_child] }.compact + hit = top_hits.first.dig("_source", parent) # if this is a multivalued field (for example: works or places), # ALL of the values come back as the source, but we only want # the single value from which the key was derived if hit.class == Array + hit = hit.map { |i| i[nested_child] }.compact # I don't love this, because we will have to match exactly the logic # that got us the key to get this to work match_index = hit @@ -104,6 +105,14 @@ def reformat_facets end def remove_nonword_chars(term) + #in case of nested arrays, etc. + if term.class == Array + new_term = [] + term.each do |ele| + new_term << remove_nonword_chars(ele) + end + return new_term + end # transliterate to ascii (ø -> o) transliterated = I18n.transliterate(term) # remove html tags like em, u, and strong, then strip remaining non-alpha characters From 3319d19727b76c550eb26810b12528d5b05ba8e2 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 22 Aug 2024 15:24:32 -0500 Subject: [PATCH 40/62] change method for unnormalization of values to fix bugs --- app/services/search_item_res.rb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 82eddae..b463400 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -56,7 +56,18 @@ def find_source_from_top_hits(top_hits, field, key) .index(remove_nonword_chars(key)) # if nothing matches the original key, return the entire source hit # should return a string, regardless - return match_index ? hit[match_index] : hit.join(" ") + if match_index + #matching item may be an array + if hit[match_index].class == Array + return hit[match_index][0] + else + #just return the match + return hit[match_index] + end + else + # if there is an array of values but no match, just return the key + return key + end else # it must be single-valued and therefore we are good to go return hit @@ -105,13 +116,10 @@ def reformat_facets end def remove_nonword_chars(term) - #in case of nested arrays, etc. + if term.class == Array - new_term = [] - term.each do |ele| - new_term << remove_nonword_chars(ele) - end - return new_term + #ensure that term is a string value, not an array + term = term[0] end # transliterate to ascii (ø -> o) transliterated = I18n.transliterate(term) From 2f7c08501a3add9e44016898bcb5d4e164a424b3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 23 Aug 2024 10:57:34 -0500 Subject: [PATCH 41/62] update changelog and improve formatting --- CHANGELOG.md | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc011ba..1653afa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,17 +36,27 @@ Markdown Spec](https://github.github.com/gfm/). - "api_version" added to all response "res" objects - support for elasticsearch 8.5 -- user/password basic authentication with ES 8.5, when querying the index or posting from Datura +- user/password basic authentication with ES 8.5, when querying the index or + posting from Datura - better support for nested fields -- support for nested bucket aggregations, matching a nested value on another nested value. `person.name[person.role#judge]` will return all names of persons where role="judge". +- support for nested bucket aggregations, matching a nested value on another + nested value. + `person.name[person.role#judge]` will return all names of persons where + role="judge". - "api_version" added to all response "res" objects +- updated documentation for new features +- "net-smtp" gem +- `track total hits` option added to ES queries, to return counts of search + results higher than 10000 ### Changed - upgraded to Rails 6.1.7 and Ruby 3 - changes reflect new api schemas in Datura, which make heavy use of nested fields - Added support for aggregating buckets by normalized keyword and returning - the "top_hits" first document result for a non-normalized display + the "top_hits" first document result for a non-normalized display. internal logic + has been changed because of nested fields, this may cause subtle differences in + how facet labels are displayed - Changes response format of `facets` key From: @@ -67,16 +77,18 @@ Markdown Spec](https://github.github.com/gfm/). ``` Not only is the response format itself different, but there may be fewer - facets returned since normalized values which match are combined - -### Changed - -- upgraded to Rails 6.1.7 and Ruby 3 -- changes reflect new api schemas in Datura, which make heavy use of nested fields + facets returned since matching normalized values are combined +- gemset changed to `api-v2` ### Migration -- in the config files of your Datura repos, (`private.yml` or `public.yml`, set the api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/schema_v2.md). +- in the config files of your Datura repos, (`private.yml` or `public.yml`, set + the api to `"api_version": "2.0"` to take advantage of new bucket aggregation + functionality (or `"api_version": "1.0"` for legacy repos that have not been + updated for the new schema). Please note that a running API index can only use + one ES index at a time, and each ES index is restricted to one version of the + schema. See new schema (2.0) documentation + [here](https://github.com/CDRH/datura/docs/schema_v2.md). - Use Elasticsearch 8.5 or later. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). - If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add From e02ffee3d3ff3e44872d34c7ab4def0639abf5a9 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 23 Aug 2024 10:59:03 -0500 Subject: [PATCH 42/62] fix YAML loading for new version of Ruby --- Gemfile | 2 +- Gemfile.lock | 213 +++++++++++++++++----------------- config/initializers/config.rb | 2 +- 3 files changed, 110 insertions(+), 107 deletions(-) diff --git a/Gemfile b/Gemfile index 9c639fc..dc16d9b 100644 --- a/Gemfile +++ b/Gemfile @@ -9,7 +9,7 @@ end # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record -gem 'sqlite3' +gem 'sqlite3', "~> 1.4" # Use Puma as the app server gem 'puma', '>= 5.6' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder diff --git a/Gemfile.lock b/Gemfile.lock index bb5cd5f..316a2fd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,150 +1,154 @@ GEM remote: https://rubygems.org/ specs: - actioncable (6.1.7) - actionpack (= 6.1.7) - activesupport (= 6.1.7) + actioncable (6.1.7.8) + actionpack (= 6.1.7.8) + activesupport (= 6.1.7.8) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailbox (6.1.7) - actionpack (= 6.1.7) - activejob (= 6.1.7) - activerecord (= 6.1.7) - activestorage (= 6.1.7) - activesupport (= 6.1.7) + actionmailbox (6.1.7.8) + actionpack (= 6.1.7.8) + activejob (= 6.1.7.8) + activerecord (= 6.1.7.8) + activestorage (= 6.1.7.8) + activesupport (= 6.1.7.8) mail (>= 2.7.1) - actionmailer (6.1.7) - actionpack (= 6.1.7) - actionview (= 6.1.7) - activejob (= 6.1.7) - activesupport (= 6.1.7) + actionmailer (6.1.7.8) + actionpack (= 6.1.7.8) + actionview (= 6.1.7.8) + activejob (= 6.1.7.8) + activesupport (= 6.1.7.8) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (6.1.7) - actionview (= 6.1.7) - activesupport (= 6.1.7) + actionpack (6.1.7.8) + actionview (= 6.1.7.8) + activesupport (= 6.1.7.8) rack (~> 2.0, >= 2.0.9) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.2.0) - actiontext (6.1.7) - actionpack (= 6.1.7) - activerecord (= 6.1.7) - activestorage (= 6.1.7) - activesupport (= 6.1.7) + actiontext (6.1.7.8) + actionpack (= 6.1.7.8) + activerecord (= 6.1.7.8) + activestorage (= 6.1.7.8) + activesupport (= 6.1.7.8) nokogiri (>= 1.8.5) - actionview (6.1.7) - activesupport (= 6.1.7) + actionview (6.1.7.8) + activesupport (= 6.1.7.8) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.1, >= 1.2.0) - activejob (6.1.7) - activesupport (= 6.1.7) + activejob (6.1.7.8) + activesupport (= 6.1.7.8) globalid (>= 0.3.6) - activemodel (6.1.7) - activesupport (= 6.1.7) - activerecord (6.1.7) - activemodel (= 6.1.7) - activesupport (= 6.1.7) - activestorage (6.1.7) - actionpack (= 6.1.7) - activejob (= 6.1.7) - activerecord (= 6.1.7) - activesupport (= 6.1.7) + activemodel (6.1.7.8) + activesupport (= 6.1.7.8) + activerecord (6.1.7.8) + activemodel (= 6.1.7.8) + activesupport (= 6.1.7.8) + activestorage (6.1.7.8) + actionpack (= 6.1.7.8) + activejob (= 6.1.7.8) + activerecord (= 6.1.7.8) + activesupport (= 6.1.7.8) marcel (~> 1.0) mini_mime (>= 1.1.0) - activesupport (6.1.7) + activesupport (6.1.7.8) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) zeitwerk (~> 2.3) - bootsnap (1.15.0) + bootsnap (1.18.3) msgpack (~> 1.2) - builder (3.2.4) + builder (3.3.0) byebug (11.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.3.3) crass (1.0.6) - date (3.3.2) - domain_name (0.5.20190701) - unf (>= 0.0.5, < 1.0.0) - erubi (1.11.0) - ffi (1.15.5) - globalid (1.0.0) - activesupport (>= 5.0) + date (3.3.4) + domain_name (0.6.20240107) + erubi (1.13.0) + ffi (1.17.0-arm64-darwin) + ffi (1.17.0-x86_64-darwin) + globalid (1.2.1) + activesupport (>= 6.1) http-accept (1.7.0) - http-cookie (1.0.5) + http-cookie (1.0.6) domain_name (~> 0.5) - i18n (1.12.0) + i18n (1.14.5) concurrent-ruby (~> 1.0) listen (3.0.8) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - loofah (2.19.1) + loofah (2.22.0) crass (~> 1.0.2) - nokogiri (>= 1.5.9) - mail (2.8.0) + nokogiri (>= 1.12.0) + mail (2.8.1) mini_mime (>= 0.1.1) net-imap net-pop net-smtp - marcel (1.0.2) - method_source (1.0.0) - mime-types (3.4.1) + marcel (1.0.4) + method_source (1.1.0) + mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2022.0105) - mini_mime (1.1.2) - minitest (5.16.3) - msgpack (1.6.0) - net-imap (0.3.2) + mime-types-data (3.2024.0702) + mini_mime (1.1.5) + minitest (5.24.1) + msgpack (1.7.2) + net-imap (0.4.14) date net-protocol net-pop (0.1.2) net-protocol - net-protocol (0.2.1) + net-protocol (0.2.2) timeout - net-smtp (0.3.3) + net-smtp (0.5.0) net-protocol netrc (0.11.0) - nio4r (2.5.8) - nokogiri (1.13.10-x86_64-darwin) + nio4r (2.7.3) + nokogiri (1.16.6-arm64-darwin) racc (~> 1.4) - puma (6.0.0) + nokogiri (1.16.6-x86_64-darwin) + racc (~> 1.4) + puma (6.4.2) nio4r (~> 2.0) - racc (1.6.1) - rack (2.2.4) - rack-test (2.0.2) + racc (1.8.0) + rack (2.2.9) + rack-test (2.1.0) rack (>= 1.3) - rails (6.1.7) - actioncable (= 6.1.7) - actionmailbox (= 6.1.7) - actionmailer (= 6.1.7) - actionpack (= 6.1.7) - actiontext (= 6.1.7) - actionview (= 6.1.7) - activejob (= 6.1.7) - activemodel (= 6.1.7) - activerecord (= 6.1.7) - activestorage (= 6.1.7) - activesupport (= 6.1.7) + rails (6.1.7.8) + actioncable (= 6.1.7.8) + actionmailbox (= 6.1.7.8) + actionmailer (= 6.1.7.8) + actionpack (= 6.1.7.8) + actiontext (= 6.1.7.8) + actionview (= 6.1.7.8) + activejob (= 6.1.7.8) + activemodel (= 6.1.7.8) + activerecord (= 6.1.7.8) + activestorage (= 6.1.7.8) + activesupport (= 6.1.7.8) bundler (>= 1.15.0) - railties (= 6.1.7) + railties (= 6.1.7.8) sprockets-rails (>= 2.0.0) - rails-dom-testing (2.0.3) - activesupport (>= 4.2.0) + rails-dom-testing (2.2.0) + activesupport (>= 5.0.0) + minitest nokogiri (>= 1.6) - rails-html-sanitizer (1.4.4) - loofah (~> 2.19, >= 2.19.1) - railties (6.1.7) - actionpack (= 6.1.7) - activesupport (= 6.1.7) + rails-html-sanitizer (1.6.0) + loofah (~> 2.21) + nokogiri (~> 1.14) + railties (6.1.7.8) + actionpack (= 6.1.7.8) + activesupport (= 6.1.7.8) method_source rake (>= 12.2) thor (~> 1.0) - rake (13.0.6) + rake (13.2.1) rb-fsevent (0.11.2) - rb-inotify (0.10.1) + rb-inotify (0.11.1) ffi (~> 1.0) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) @@ -155,27 +159,26 @@ GEM spring-watcher-listen (2.0.1) listen (>= 2.7, < 4.0) spring (>= 1.2, < 3.0) - sprockets (4.1.1) + sprockets (4.2.1) concurrent-ruby (~> 1.0) - rack (> 1, < 3) - sprockets-rails (3.4.2) - actionpack (>= 5.2) - activesupport (>= 5.2) + rack (>= 2.2.4, < 4) + sprockets-rails (3.5.1) + actionpack (>= 6.1) + activesupport (>= 6.1) sprockets (>= 3.0.0) - sqlite3 (1.5.4-x86_64-darwin) - thor (1.2.1) - timeout (0.3.1) - tzinfo (2.0.5) + sqlite3 (1.7.3-arm64-darwin) + sqlite3 (1.7.3-x86_64-darwin) + thor (1.3.1) + timeout (0.4.1) + tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unf (0.1.4) - unf_ext - unf_ext (0.0.8.2) - websocket-driver (0.7.5) + websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) - zeitwerk (2.6.6) + zeitwerk (2.6.16) PLATFORMS + arm64-darwin-23 x86_64-darwin-21 x86_64-darwin-22 @@ -189,7 +192,7 @@ DEPENDENCIES rest-client (>= 2.1.0.rc1, < 2.2) spring spring-watcher-listen (~> 2.0.0) - sqlite3 + sqlite3 (~> 1.4) tzinfo-data BUNDLED WITH diff --git a/config/initializers/config.rb b/config/initializers/config.rb index f38f1e0..d397138 100644 --- a/config/initializers/config.rb +++ b/config/initializers/config.rb @@ -1,5 +1,5 @@ config_path = Rails.root.join("config", "config.yml") -config = YAML.load_file(config_path)[Rails.env] +config = YAML.load_file(config_path, aliases: true)[Rails.env] ES_URI = "#{config['es_path']}/#{config['es_index']}" METADATA = config["metadata"] From 5717c9b5e30c596e853a0a65b51462b1a7335477 Mon Sep 17 00:00:00 2001 From: wdewey2 Date: Thu, 26 Sep 2024 14:09:53 -0500 Subject: [PATCH 43/62] update Ruby and gems for new server --- .ruby-version | 2 +- Gemfile | 2 +- Gemfile.lock | 29 +++++++++++++++-------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/.ruby-version b/.ruby-version index 7bde84d..2457623 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -ruby-3.1.2 +ruby-3.1.6 diff --git a/Gemfile b/Gemfile index dc16d9b..67192d1 100644 --- a/Gemfile +++ b/Gemfile @@ -9,7 +9,7 @@ end # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record -gem 'sqlite3', "~> 1.4" +gem 'sqlite3', '~> 1.4' # Use Puma as the app server gem 'puma', '>= 5.6' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder diff --git a/Gemfile.lock b/Gemfile.lock index 316a2fd..9bc9a63 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -64,13 +64,13 @@ GEM msgpack (~> 1.2) builder (3.3.0) byebug (11.1.3) - concurrent-ruby (1.3.3) + concurrent-ruby (1.3.1) crass (1.0.6) date (3.3.4) domain_name (0.6.20240107) - erubi (1.13.0) - ffi (1.17.0-arm64-darwin) + erubi (1.12.0) ffi (1.17.0-x86_64-darwin) + ffi (1.17.0-x86_64-linux-gnu) globalid (1.2.1) activesupport (>= 6.1) http-accept (1.7.0) @@ -93,11 +93,11 @@ GEM method_source (1.1.0) mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2024.0702) + mime-types-data (3.2024.0507) mini_mime (1.1.5) - minitest (5.24.1) + minitest (5.23.1) msgpack (1.7.2) - net-imap (0.4.14) + net-imap (0.4.12) date net-protocol net-pop (0.1.2) @@ -108,9 +108,9 @@ GEM net-protocol netrc (0.11.0) nio4r (2.7.3) - nokogiri (1.16.6-arm64-darwin) + nokogiri (1.16.5-x86_64-darwin) racc (~> 1.4) - nokogiri (1.16.6-x86_64-darwin) + nokogiri (1.16.5-x86_64-linux) racc (~> 1.4) puma (6.4.2) nio4r (~> 2.0) @@ -162,12 +162,12 @@ GEM sprockets (4.2.1) concurrent-ruby (~> 1.0) rack (>= 2.2.4, < 4) - sprockets-rails (3.5.1) - actionpack (>= 6.1) - activesupport (>= 6.1) + sprockets-rails (3.4.2) + actionpack (>= 5.2) + activesupport (>= 5.2) sprockets (>= 3.0.0) - sqlite3 (1.7.3-arm64-darwin) sqlite3 (1.7.3-x86_64-darwin) + sqlite3 (1.7.3-x86_64-linux) thor (1.3.1) timeout (0.4.1) tzinfo (2.0.6) @@ -175,12 +175,13 @@ GEM websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) - zeitwerk (2.6.16) + zeitwerk (2.6.15) PLATFORMS arm64-darwin-23 x86_64-darwin-21 x86_64-darwin-22 + x86_64-linux DEPENDENCIES bootsnap @@ -196,4 +197,4 @@ DEPENDENCIES tzinfo-data BUNDLED WITH - 2.3.26 + 2.3.27 From 1097fb1e6ecff324976899b40b5df9fc30aec07e Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 4 Nov 2024 11:55:21 -0600 Subject: [PATCH 44/62] handle nested field facets in case of single hash rather than array --- app/services/search_item_res.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index b463400..d0f733d 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -47,6 +47,9 @@ def find_source_from_top_hits(top_hits, field, key) # if this is a multivalued field (for example: works or places), # ALL of the values come back as the source, but we only want # the single value from which the key was derived + if hit.class == Hash + hit = [hit] + end if hit.class == Array hit = hit.map { |i| i[nested_child] }.compact # I don't love this, because we will have to match exactly the logic From 88ab346f8443ee31ca67f88c473335ec9c1e1dea Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 11 Nov 2024 11:47:15 -0600 Subject: [PATCH 45/62] make sure facet key is returned if no source --- app/services/search_item_res.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index d0f733d..004f278 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -50,7 +50,9 @@ def find_source_from_top_hits(top_hits, field, key) if hit.class == Hash hit = [hit] end - if hit.class == Array + if !hit + return key + elsif hit.class == Array hit = hit.map { |i| i[nested_child] }.compact # I don't love this, because we will have to match exactly the logic # that got us the key to get this to work @@ -88,7 +90,6 @@ def format_bucket_value(facets, field, bucket) # Example: "Willa Cather" and "WILLA CATHER" # Those terms will both have been normalized as "willa cather" but # we will want to display one of the non-normalized terms instead - top_hits = bucket.dig("field_to_item", "top_matches", "hits", "hits") if top_hits source = find_source_from_top_hits(top_hits, field, key) From 1239facf8417bc6186d836ac3aa0d6b856f19b9e Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 26 Nov 2024 11:55:29 -0600 Subject: [PATCH 46/62] fix de-normalization for non-nested fields fixes #148 fixes https://github.com/CDRH/african_poetics/issues/401 --- app/services/search_item_res.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 004f278..b8a70f0 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -42,7 +42,9 @@ def combine_highlights def find_source_from_top_hits(top_hits, field, key) # elasticsearch stores nested source results without the "path" parent = field.split(".").first - nested_child = field.split(".").last + if field.include?(".") + nested_child = field.split(".").last + end hit = top_hits.first.dig("_source", parent) # if this is a multivalued field (for example: works or places), # ALL of the values come back as the source, but we only want @@ -53,7 +55,9 @@ def find_source_from_top_hits(top_hits, field, key) if !hit return key elsif hit.class == Array - hit = hit.map { |i| i[nested_child] }.compact + if nested_child + hit = hit.map { |i| i[nested_child] }.compact + end # I don't love this, because we will have to match exactly the logic # that got us the key to get this to work match_index = hit @@ -90,7 +94,7 @@ def format_bucket_value(facets, field, bucket) # Example: "Willa Cather" and "WILLA CATHER" # Those terms will both have been normalized as "willa cather" but # we will want to display one of the non-normalized terms instead - top_hits = bucket.dig("field_to_item", "top_matches", "hits", "hits") + top_hits = bucket.key?("field_to_item") ? bucket.dig("field_to_item", "top_matches", "hits", "hits") : bucket.dig("top_matches", "hits", "hits") if top_hits source = find_source_from_top_hits(top_hits, field, key) end From 39cf19f9fc14687173d63cab04853e3a9928febd Mon Sep 17 00:00:00 2001 From: William Dewey Date: Wed, 4 Dec 2024 16:51:05 -0600 Subject: [PATCH 47/62] sort facets properly by count when nested fixes https://github.com/CDRH/family_letters/issues/179 --- app/services/search_item_req.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index f8b44da..8ee5a47 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -114,6 +114,7 @@ def facets options = JSON.parse(f) original = options[0] agg_name = options[1] + facet = original.split("[")[0] # may or may not be nested nested = facet.include?(".") @@ -123,6 +124,10 @@ def facets condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last + if f_type == "_count" + #make sure sort is on the acutal count of documents + f_type = "field_to_item" + end aggregation = { # common to nested and non-nested "filter" => { @@ -171,6 +176,10 @@ def facets end elsif f.include?(".") path = f.split(".").first + if f_type == "_count" + #make sure sort is on the acutal count of documents + f_type = "field_to_item" + end aggs[f] = { "nested" => { "path" => path From ae7f0a8bbdc729a2bd07daa9914a174e68cef6c3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 9 Dec 2024 10:47:09 -0600 Subject: [PATCH 48/62] fix ES query for nested aggregations so that _source finds right item fixes https://github.com/Willa-Cather-Archive/willa-cather-archive/issues/371 --- app/services/search_item_req.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 8ee5a47..7de29c6 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -149,7 +149,7 @@ def facets "top_matches" => { "top_hits" => { "_source" => { - "includes" => [ agg_name ] + "includes" => [ facet ] }, "size" => 1 } From f5b61b4303a1ee5f2ff6af7b68c4bbe9352a938c Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 16 Dec 2024 14:30:37 -0600 Subject: [PATCH 49/62] don't try to match up non-string values stopgap fix for https://github.com/CDRH/api/issues/151 --- app/services/search_item_res.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index b8a70f0..5bb328b 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -41,6 +41,7 @@ def combine_highlights def find_source_from_top_hits(top_hits, field, key) # elasticsearch stores nested source results without the "path" + parent = field.split(".").first if field.include?(".") nested_child = field.split(".").last @@ -124,15 +125,18 @@ def reformat_facets end def remove_nonword_chars(term) - + if term.class == Array #ensure that term is a string value, not an array term = term[0] end - # transliterate to ascii (ø -> o) - transliterated = I18n.transliterate(term) - # remove html tags like em, u, and strong, then strip remaining non-alpha characters - transliterated.gsub(/<\/?(?:em|strong|u)>|\W/, "").downcase + if term.class == String + # it should not be a hash, but this is a failsafe + # transliterate to ascii (ø -> o) + transliterated = I18n.transliterate(term) + # remove html tags like em, u, and strong, then strip remaining non-alpha characters + transliterated.gsub(/<\/?(?:em|strong|u)>|\W/, "").downcase + end end def get_buckets(info, field) From 9318336806138125ba00c9b4bc5cc68d8402e346 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 16 Dec 2024 14:34:31 -0600 Subject: [PATCH 50/62] add a comment for future bugfixing --- app/services/search_item_res.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index 5bb328b..b711c5d 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -57,6 +57,7 @@ def find_source_from_top_hits(top_hits, field, key) return key elsif hit.class == Array if nested_child + #TODO solve bug where this returns a hash value instead of an array hit = hit.map { |i| i[nested_child] }.compact end # I don't love this, because we will have to match exactly the logic From f0b8817e5167ed960282a5f188cdacc2cab19eb4 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Mon, 14 Jul 2025 16:44:17 -0500 Subject: [PATCH 51/62] handle any response error from RestClient fixes #152 --- app/services/search_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 319b97b..19c3aa7 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -58,7 +58,7 @@ def search_item(id) raw_res = post("_search", req) if raw_res.class == RuntimeError on_error(raw_res, req) - elsif raw_res.class == RestClient::BadRequest + elsif raw_res.class == RestClient::ExceptionWithResponse on_error(JSON.parse(raw_res.response), req) else res = build_item_response(raw_res) @@ -71,7 +71,7 @@ def search_items raw_res = post("_search", req) if raw_res.class == RuntimeError on_error(raw_res.inspect, req) - elsif raw_res.class == RestClient::BadRequest + elsif raw_res.class == RestClient::ExceptionWithResponse on_error(JSON.parse(raw_res.response), req) else res = build_item_response(raw_res) From d6a704ebfce774df6e4db0df2022c8c1133b3ca3 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 20:17:59 -0600 Subject: [PATCH 52/62] Bump Ruby version to 3.1.7 --- .ruby-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ruby-version b/.ruby-version index 2457623..434c481 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -ruby-3.1.6 +ruby-3.1.7 From 46d3b0db4ab229f56211afd5841db4944cc5e541 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 21:03:17 -0600 Subject: [PATCH 53/62] Remove unused code in ApplicationController --- app/controllers/application_controller.rb | 34 ----------------------- 1 file changed, 34 deletions(-) diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index e97e006..4ac8823 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -1,36 +1,2 @@ -require 'rest-client' - class ApplicationController < ActionController::API - - def post_search(json, error_method=method(:display_error)) - auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{ES_USER}:#{ES_PASSWORD}")}" } - res = RestClient.post("#{ES_URI}/_search", json.to_json, auth_hash.merge({ "content-type" => "json" })) - raise - return JSON.parse(res.body) - rescue => e - error_method.call(e, json) - return nil - end - - # I am so pleased that this works - # as a default error handler - def display_error(error, req_body) - render(status: 500, json: JSON.pretty_generate({ - "res" => { - "code" => 500, - "api_version" => Api::Application::VERSION, - "message" => "TODO", - "info" => { - "documentation" => "TODO", - "error" => error.inspect, - "suggestion" => "TODO" - } - }, - "req" => { - "query_string" => request.fullpath, - "query_obj" => req_body - } - })) and return - end - end From b004587c12e2c66c53b164728e94b06544686d47 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 22:21:38 -0600 Subject: [PATCH 54/62] Reformat post code; use auth if creds present - Make auth code more readable with less string interpolation and separate multiple args for methods across lines - Check if elasticsearch credentials are present to determine whether to construct addition auth header and merge with json header --- app/services/search_service.rb | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 19c3aa7..d17cc68 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -11,8 +11,29 @@ def initialize(url, params={}, user_req) end def post(url_ending, json) - auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{Rails.application.credentials.elasticsearch[:user]}:#{Rails.application.credentials.elasticsearch[:password]}")}" } - res = RestClient.post("#{@url}/#{url_ending}", json.to_json, auth_hash.merge({ "content-type" => "json" } )) + # Add Basic Authentication header if credentials present + if Rails.application.credentials.elasticsearch.present? && + Rails.application.credentials.elasticsearch[:user].present? && + Rails.application.credentials.elasticsearch[:password].present? + auth_hash = { + "Authorization" => "Basic " + + Base64::encode64( + Rails.application.credentials.elasticsearch[:user] + + ":" + Rails.application.credentials.elasticsearch[:password] + ) + } + res = RestClient.post( + @url + "/" + url_ending, + json.to_json, + auth_hash.merge({ "content-type" => "json" }) + ) + else + res = RestClient.post( + @url + "/" + url_ending, + json.to_json, + {"content-type" => "json"} + ) + end JSON.parse(res.body) rescue => e e From a0ed47a0d7820e328e83df4cc471ceb5975f8454 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 23:03:53 -0600 Subject: [PATCH 55/62] Update Gemfile and gems - Compare and match against a fresh `rails new --api` app - Remove net-smtp gem from Gemfile and Changelog - Remove extra platforms --- CHANGELOG.md | 1 - Gemfile | 35 +++++---- Gemfile.lock | 198 ++++++++++++++++++++++++++------------------------- 3 files changed, 119 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1653afa..6cdada1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,6 @@ Markdown Spec](https://github.github.com/gfm/). role="judge". - "api_version" added to all response "res" objects - updated documentation for new features -- "net-smtp" gem - `track total hits` option added to ES queries, to return counts of search results higher than 10000 diff --git a/Gemfile b/Gemfile index 67192d1..1f15323 100644 --- a/Gemfile +++ b/Gemfile @@ -1,48 +1,47 @@ source 'https://rubygems.org' - -git_source(:github) do |repo_name| - repo_name = "#{repo_name}/#{repo_name}" unless repo_name.include?("/") - "https://github.com/#{repo_name}.git" -end - +git_source(:github) { |repo| "https://github.com/#{repo}.git" } # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' gem 'rails', '~> 6.1.7' # Use sqlite3 as the database for Active Record gem 'sqlite3', '~> 1.4' # Use Puma as the app server -gem 'puma', '>= 5.6' +gem 'puma', '>= 5.0' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder -# gem 'jbuilder', '~> 2.5' +# gem 'jbuilder', '~> 2.7' # Use Redis adapter to run Action Cable in production -# gem 'redis', '~> 3.0' +# gem 'redis', '~> 4.0' # Use ActiveModel has_secure_password # gem 'bcrypt', '~> 3.1.7' -# Use Capistrano for deployment -# gem 'capistrano-rails', group: :development +# Use ActiveStorage variant +# gem 'image_processing', '~> 1.2' + +# Reduces boot times through caching; required in config/boot.rb +gem 'bootsnap', '>= 1.4.4', require: false # Use Rack CORS for handling Cross-Origin Resource Sharing (CORS), making cross-origin AJAX possible # gem 'rack-cors' -gem 'bootsnap', require: false -gem 'net-smtp' - group :development, :test do # Call 'byebug' anywhere in the code to stop execution and get a debugger console - gem 'byebug', platform: :mri + gem 'byebug', platforms: [:mri, :mingw, :x64_mingw] end group :development do - gem 'listen', '>= 3.0.5', '< 3.2' + gem 'listen', '~> 3.3' # Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring gem 'spring' - gem 'spring-watcher-listen', '~> 2.0.0' end # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] +# Additions to Rails defaults + +# Note: Above list different from other Rails apps +# because this app was created with --api option + # using rest-client because I've had far more luck than the # stlib net/http -gem 'rest-client', '>= 2.1.0.rc1', '< 2.2' +gem 'rest-client', '~> 2.1' diff --git a/Gemfile.lock b/Gemfile.lock index 9bc9a63..04a05ce 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,152 +1,159 @@ GEM remote: https://rubygems.org/ specs: - actioncable (6.1.7.8) - actionpack (= 6.1.7.8) - activesupport (= 6.1.7.8) + actioncable (6.1.7.10) + actionpack (= 6.1.7.10) + activesupport (= 6.1.7.10) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailbox (6.1.7.8) - actionpack (= 6.1.7.8) - activejob (= 6.1.7.8) - activerecord (= 6.1.7.8) - activestorage (= 6.1.7.8) - activesupport (= 6.1.7.8) + actionmailbox (6.1.7.10) + actionpack (= 6.1.7.10) + activejob (= 6.1.7.10) + activerecord (= 6.1.7.10) + activestorage (= 6.1.7.10) + activesupport (= 6.1.7.10) mail (>= 2.7.1) - actionmailer (6.1.7.8) - actionpack (= 6.1.7.8) - actionview (= 6.1.7.8) - activejob (= 6.1.7.8) - activesupport (= 6.1.7.8) + actionmailer (6.1.7.10) + actionpack (= 6.1.7.10) + actionview (= 6.1.7.10) + activejob (= 6.1.7.10) + activesupport (= 6.1.7.10) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (6.1.7.8) - actionview (= 6.1.7.8) - activesupport (= 6.1.7.8) + actionpack (6.1.7.10) + actionview (= 6.1.7.10) + activesupport (= 6.1.7.10) rack (~> 2.0, >= 2.0.9) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.2.0) - actiontext (6.1.7.8) - actionpack (= 6.1.7.8) - activerecord (= 6.1.7.8) - activestorage (= 6.1.7.8) - activesupport (= 6.1.7.8) + actiontext (6.1.7.10) + actionpack (= 6.1.7.10) + activerecord (= 6.1.7.10) + activestorage (= 6.1.7.10) + activesupport (= 6.1.7.10) nokogiri (>= 1.8.5) - actionview (6.1.7.8) - activesupport (= 6.1.7.8) + actionview (6.1.7.10) + activesupport (= 6.1.7.10) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.1, >= 1.2.0) - activejob (6.1.7.8) - activesupport (= 6.1.7.8) + activejob (6.1.7.10) + activesupport (= 6.1.7.10) globalid (>= 0.3.6) - activemodel (6.1.7.8) - activesupport (= 6.1.7.8) - activerecord (6.1.7.8) - activemodel (= 6.1.7.8) - activesupport (= 6.1.7.8) - activestorage (6.1.7.8) - actionpack (= 6.1.7.8) - activejob (= 6.1.7.8) - activerecord (= 6.1.7.8) - activesupport (= 6.1.7.8) + activemodel (6.1.7.10) + activesupport (= 6.1.7.10) + activerecord (6.1.7.10) + activemodel (= 6.1.7.10) + activesupport (= 6.1.7.10) + activestorage (6.1.7.10) + actionpack (= 6.1.7.10) + activejob (= 6.1.7.10) + activerecord (= 6.1.7.10) + activesupport (= 6.1.7.10) marcel (~> 1.0) mini_mime (>= 1.1.0) - activesupport (6.1.7.8) + activesupport (6.1.7.10) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) zeitwerk (~> 2.3) - bootsnap (1.18.3) + base64 (0.3.0) + bootsnap (1.19.0) msgpack (~> 1.2) builder (3.3.0) - byebug (11.1.3) - concurrent-ruby (1.3.1) + byebug (12.0.0) + concurrent-ruby (1.3.6) crass (1.0.6) - date (3.3.4) + date (3.5.1) domain_name (0.6.20240107) - erubi (1.12.0) - ffi (1.17.0-x86_64-darwin) - ffi (1.17.0-x86_64-linux-gnu) - globalid (1.2.1) + erubi (1.13.1) + ffi (1.17.2-arm64-darwin) + ffi (1.17.2-x86_64-darwin) + ffi (1.17.2-x86_64-linux-gnu) + globalid (1.3.0) activesupport (>= 6.1) http-accept (1.7.0) - http-cookie (1.0.6) + http-cookie (1.1.0) domain_name (~> 0.5) - i18n (1.14.5) + i18n (1.14.7) concurrent-ruby (~> 1.0) listen (3.0.8) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - loofah (2.22.0) + logger (1.7.0) + loofah (2.25.0) crass (~> 1.0.2) nokogiri (>= 1.12.0) - mail (2.8.1) + mail (2.9.0) + logger mini_mime (>= 0.1.1) net-imap net-pop net-smtp - marcel (1.0.4) + marcel (1.1.0) method_source (1.1.0) - mime-types (3.5.2) - mime-types-data (~> 3.2015) - mime-types-data (3.2024.0507) + mime-types (3.7.0) + logger + mime-types-data (~> 3.2025, >= 3.2025.0507) + mime-types-data (3.2025.0924) mini_mime (1.1.5) - minitest (5.23.1) - msgpack (1.7.2) - net-imap (0.4.12) + minitest (5.27.0) + msgpack (1.8.0) + net-imap (0.5.13) date net-protocol net-pop (0.1.2) net-protocol net-protocol (0.2.2) timeout - net-smtp (0.5.0) + net-smtp (0.5.1) net-protocol netrc (0.11.0) - nio4r (2.7.3) - nokogiri (1.16.5-x86_64-darwin) + nio4r (2.7.5) + nokogiri (1.18.10-arm64-darwin) racc (~> 1.4) - nokogiri (1.16.5-x86_64-linux) + nokogiri (1.18.10-x86_64-darwin) racc (~> 1.4) - puma (6.4.2) + nokogiri (1.18.10-x86_64-linux-gnu) + racc (~> 1.4) + puma (7.1.0) nio4r (~> 2.0) - racc (1.8.0) - rack (2.2.9) - rack-test (2.1.0) + racc (1.8.1) + rack (2.2.21) + rack-test (2.2.0) rack (>= 1.3) - rails (6.1.7.8) - actioncable (= 6.1.7.8) - actionmailbox (= 6.1.7.8) - actionmailer (= 6.1.7.8) - actionpack (= 6.1.7.8) - actiontext (= 6.1.7.8) - actionview (= 6.1.7.8) - activejob (= 6.1.7.8) - activemodel (= 6.1.7.8) - activerecord (= 6.1.7.8) - activestorage (= 6.1.7.8) - activesupport (= 6.1.7.8) + rails (6.1.7.10) + actioncable (= 6.1.7.10) + actionmailbox (= 6.1.7.10) + actionmailer (= 6.1.7.10) + actionpack (= 6.1.7.10) + actiontext (= 6.1.7.10) + actionview (= 6.1.7.10) + activejob (= 6.1.7.10) + activemodel (= 6.1.7.10) + activerecord (= 6.1.7.10) + activestorage (= 6.1.7.10) + activesupport (= 6.1.7.10) bundler (>= 1.15.0) - railties (= 6.1.7.8) + railties (= 6.1.7.10) sprockets-rails (>= 2.0.0) - rails-dom-testing (2.2.0) + rails-dom-testing (2.3.0) activesupport (>= 5.0.0) minitest nokogiri (>= 1.6) - rails-html-sanitizer (1.6.0) + rails-html-sanitizer (1.6.2) loofah (~> 2.21) - nokogiri (~> 1.14) - railties (6.1.7.8) - actionpack (= 6.1.7.8) - activesupport (= 6.1.7.8) + nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0) + railties (6.1.7.10) + actionpack (= 6.1.7.10) + activesupport (= 6.1.7.10) method_source rake (>= 12.2) thor (~> 1.0) - rake (13.2.1) + rake (13.3.1) rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) @@ -159,35 +166,34 @@ GEM spring-watcher-listen (2.0.1) listen (>= 2.7, < 4.0) spring (>= 1.2, < 3.0) - sprockets (4.2.1) + sprockets (4.2.2) concurrent-ruby (~> 1.0) + logger rack (>= 2.2.4, < 4) - sprockets-rails (3.4.2) - actionpack (>= 5.2) - activesupport (>= 5.2) + sprockets-rails (3.5.2) + actionpack (>= 6.1) + activesupport (>= 6.1) sprockets (>= 3.0.0) + sqlite3 (1.7.3-arm64-darwin) sqlite3 (1.7.3-x86_64-darwin) sqlite3 (1.7.3-x86_64-linux) - thor (1.3.1) - timeout (0.4.1) + thor (1.4.0) + timeout (0.6.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - websocket-driver (0.7.6) + websocket-driver (0.8.0) + base64 websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) - zeitwerk (2.6.15) + zeitwerk (2.6.18) PLATFORMS - arm64-darwin-23 - x86_64-darwin-21 - x86_64-darwin-22 x86_64-linux DEPENDENCIES bootsnap byebug listen (>= 3.0.5, < 3.2) - net-smtp puma (>= 5.6) rails (~> 6.1.7) rest-client (>= 2.1.0.rc1, < 2.2) From c42e264f4aed3d94026315fb1f505aa4c22d8d85 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 23:29:16 -0600 Subject: [PATCH 56/62] Set defaults to 5.2, enable new 6.1 defaults Remove old new_framework_defaults initializers for 5.0 and 5.2 --- config/application.rb | 2 +- config/initializers/new_framework_defaults.rb | 15 -------- .../new_framework_defaults_5_2.rb | 35 ------------------- .../new_framework_defaults_6_1.rb | 32 ++++++++--------- 4 files changed, 17 insertions(+), 67 deletions(-) delete mode 100644 config/initializers/new_framework_defaults.rb delete mode 100644 config/initializers/new_framework_defaults_5_2.rb diff --git a/config/application.rb b/config/application.rb index 03f5085..db0b615 100644 --- a/config/application.rb +++ b/config/application.rb @@ -22,7 +22,7 @@ module Api class Application < Rails::Application # Initialize configuration defaults for originally generated Rails version. - config.load_defaults 5.0 + config.load_defaults 5.2 # Configuration for the application, engines, and railties goes here. # diff --git a/config/initializers/new_framework_defaults.rb b/config/initializers/new_framework_defaults.rb deleted file mode 100644 index e943ba9..0000000 --- a/config/initializers/new_framework_defaults.rb +++ /dev/null @@ -1,15 +0,0 @@ -# Be sure to restart your server when you modify this file. -# -# This file contains migration options to ease your Rails 5.0 upgrade. -# -# Read the Guide for Upgrading Ruby on Rails for more info on each option. - -# Make Ruby 2.4 preserve the timezone of the receiver when calling `to_time`. -# Previous versions had false. -ActiveSupport.to_time_preserves_timezone = true - -# Require `belongs_to` associations by default. Previous versions had false. -Rails.application.config.active_record.belongs_to_required_by_default = true - -# Configure SSL options to enable HSTS with subdomains. Previous versions had false. -#Rails.application.config.ssl_options = { hsts: { subdomains: true } } diff --git a/config/initializers/new_framework_defaults_5_2.rb b/config/initializers/new_framework_defaults_5_2.rb deleted file mode 100644 index 421e5a2..0000000 --- a/config/initializers/new_framework_defaults_5_2.rb +++ /dev/null @@ -1,35 +0,0 @@ -# Be sure to restart your server when you modify this file. -# -# This file contains migration options to ease your Rails 5.2 upgrade. -# -# Once upgraded flip defaults one by one to migrate to the new default. -# -# Read the Guide for Upgrading Ruby on Rails for more info on each option. - -# Make Active Record use stable #cache_key alongside new #cache_version method. -# This is needed for recyclable cache keys. -# Rails.application.config.active_record.cache_versioning = true - -# Use AES-256-GCM authenticated encryption for encrypted cookies. -# Also, embed cookie expiry in signed or encrypted cookies for increased security. -# -# This option is not backwards compatible with earlier Rails versions. -# It's best enabled when your entire app is migrated and stable on 5.2. -# -# Existing cookies will be converted on read then written with the new scheme. -# Rails.application.config.action_dispatch.use_authenticated_cookie_encryption = true - -# Use AES-256-GCM authenticated encryption as default cipher for encrypting messages -# instead of AES-256-CBC, when use_authenticated_message_encryption is set to true. -# Rails.application.config.active_support.use_authenticated_message_encryption = true - -# Add default protection from forgery to ActionController::Base instead of in -# ApplicationController. -# Rails.application.config.action_controller.default_protect_from_forgery = true - -# Store boolean values are in sqlite3 databases as 1 and 0 instead of 't' and -# 'f' after migrating old data. -# Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true - -# Use SHA-1 instead of MD5 to generate non-sensitive digests, such as the ETag header. -# Rails.application.config.active_support.use_sha1_digests = true diff --git a/config/initializers/new_framework_defaults_6_1.rb b/config/initializers/new_framework_defaults_6_1.rb index 9526b83..89165aa 100644 --- a/config/initializers/new_framework_defaults_6_1.rb +++ b/config/initializers/new_framework_defaults_6_1.rb @@ -7,61 +7,61 @@ # Read the Guide for Upgrading Ruby on Rails for more info on each option. # Support for inversing belongs_to -> has_many Active Record associations. -# Rails.application.config.active_record.has_many_inversing = true +Rails.application.config.active_record.has_many_inversing = true # Track Active Storage variants in the database. -# Rails.application.config.active_storage.track_variants = true +Rails.application.config.active_storage.track_variants = true # Apply random variation to the delay when retrying failed jobs. -# Rails.application.config.active_job.retry_jitter = 0.15 +Rails.application.config.active_job.retry_jitter = 0.15 # Stop executing `after_enqueue`/`after_perform` callbacks if # `before_enqueue`/`before_perform` respectively halts with `throw :abort`. -# Rails.application.config.active_job.skip_after_callbacks_if_terminated = true +Rails.application.config.active_job.skip_after_callbacks_if_terminated = true # Specify cookies SameSite protection level: either :none, :lax, or :strict. # # This change is not backwards compatible with earlier Rails versions. # It's best enabled when your entire app is migrated and stable on 6.1. -# Rails.application.config.action_dispatch.cookies_same_site_protection = :lax +Rails.application.config.action_dispatch.cookies_same_site_protection = :lax # Generate CSRF tokens that are encoded in URL-safe Base64. # # This change is not backwards compatible with earlier Rails versions. # It's best enabled when your entire app is migrated and stable on 6.1. -# Rails.application.config.action_controller.urlsafe_csrf_tokens = true +Rails.application.config.action_controller.urlsafe_csrf_tokens = true # Specify whether `ActiveSupport::TimeZone.utc_to_local` returns a time with an # UTC offset or a UTC time. -# ActiveSupport.utc_to_local_returns_utc_offset_times = true +ActiveSupport.utc_to_local_returns_utc_offset_times = true # Change the default HTTP status code to `308` when redirecting non-GET/HEAD # requests to HTTPS in `ActionDispatch::SSL` middleware. -# Rails.application.config.action_dispatch.ssl_default_redirect_status = 308 +Rails.application.config.action_dispatch.ssl_default_redirect_status = 308 # Use new connection handling API. For most applications this won't have any # effect. For applications using multiple databases, this new API provides # support for granular connection swapping. -# Rails.application.config.active_record.legacy_connection_handling = false +Rails.application.config.active_record.legacy_connection_handling = false # Make `form_with` generate non-remote forms by default. -# Rails.application.config.action_view.form_with_generates_remote_forms = false +Rails.application.config.action_view.form_with_generates_remote_forms = false # Set the default queue name for the analysis job to the queue adapter default. -# Rails.application.config.active_storage.queues.analysis = nil +Rails.application.config.active_storage.queues.analysis = nil # Set the default queue name for the purge job to the queue adapter default. -# Rails.application.config.active_storage.queues.purge = nil +Rails.application.config.active_storage.queues.purge = nil # Set the default queue name for the incineration job to the queue adapter default. -# Rails.application.config.action_mailbox.queues.incineration = nil +Rails.application.config.action_mailbox.queues.incineration = nil # Set the default queue name for the routing job to the queue adapter default. -# Rails.application.config.action_mailbox.queues.routing = nil +Rails.application.config.action_mailbox.queues.routing = nil # Set the default queue name for the mail deliver job to the queue adapter default. -# Rails.application.config.action_mailer.deliver_later_queue_name = nil +Rails.application.config.action_mailer.deliver_later_queue_name = nil # Generate a `Link` header that gives a hint to modern browsers about # preloading assets when using `javascript_include_tag` and `stylesheet_link_tag`. -# Rails.application.config.action_view.preload_links_header = true +Rails.application.config.action_view.preload_links_header = true From e4e16f8ca7334cf26c9eeea7d2a16d66c23a6a12 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Fri, 19 Dec 2025 23:44:35 -0600 Subject: [PATCH 57/62] Fix logger, listen gem broken from last update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update of Gemfile required logger be added to boot.rb now Listen gem version was causing Rails app boot to fail. Hadn't run bundle update again after removing the extra platforms, so suspect this was the problem we'd seen before with those 🤔 --- Gemfile.lock | 28 ++++++++-------------------- config/boot.rb | 1 + 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 04a05ce..db17c93 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -70,8 +70,6 @@ GEM date (3.5.1) domain_name (0.6.20240107) erubi (1.13.1) - ffi (1.17.2-arm64-darwin) - ffi (1.17.2-x86_64-darwin) ffi (1.17.2-x86_64-linux-gnu) globalid (1.3.0) activesupport (>= 6.1) @@ -80,9 +78,9 @@ GEM domain_name (~> 0.5) i18n (1.14.7) concurrent-ruby (~> 1.0) - listen (3.0.8) - rb-fsevent (~> 0.9, >= 0.9.4) - rb-inotify (~> 0.9, >= 0.9.7) + listen (3.9.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) logger (1.7.0) loofah (2.25.0) crass (~> 1.0.2) @@ -113,10 +111,6 @@ GEM net-protocol netrc (0.11.0) nio4r (2.7.5) - nokogiri (1.18.10-arm64-darwin) - racc (~> 1.4) - nokogiri (1.18.10-x86_64-darwin) - racc (~> 1.4) nokogiri (1.18.10-x86_64-linux-gnu) racc (~> 1.4) puma (7.1.0) @@ -162,10 +156,7 @@ GEM http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - spring (2.1.1) - spring-watcher-listen (2.0.1) - listen (>= 2.7, < 4.0) - spring (>= 1.2, < 3.0) + spring (4.4.0) sprockets (4.2.2) concurrent-ruby (~> 1.0) logger @@ -174,8 +165,6 @@ GEM actionpack (>= 6.1) activesupport (>= 6.1) sprockets (>= 3.0.0) - sqlite3 (1.7.3-arm64-darwin) - sqlite3 (1.7.3-x86_64-darwin) sqlite3 (1.7.3-x86_64-linux) thor (1.4.0) timeout (0.6.0) @@ -191,14 +180,13 @@ PLATFORMS x86_64-linux DEPENDENCIES - bootsnap + bootsnap (>= 1.4.4) byebug - listen (>= 3.0.5, < 3.2) - puma (>= 5.6) + listen (~> 3.3) + puma (>= 5.0) rails (~> 6.1.7) - rest-client (>= 2.1.0.rc1, < 2.2) + rest-client (~> 2.1) spring - spring-watcher-listen (~> 2.0.0) sqlite3 (~> 1.4) tzinfo-data diff --git a/config/boot.rb b/config/boot.rb index 3cda23b..676662a 100644 --- a/config/boot.rb +++ b/config/boot.rb @@ -2,3 +2,4 @@ require "bundler/setup" # Set up gems listed in the Gemfile. require "bootsnap/setup" # Speed up boot time by caching expensive operations. +require "logger" From 8e6bdca86ebb18f6e671c6aa364ea66be93439c2 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Sat, 20 Dec 2025 00:13:24 -0600 Subject: [PATCH 58/62] Add standard Rails dev and prod env Note production is modified to keep non-public production domains out of source control --- config/environments/development.rb | 8 ++------ config/environments/production.rb | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/config/environments/development.rb b/config/environments/development.rb index b41968b..1b2b7ec 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -61,7 +61,7 @@ # routes, locales, etc. This feature depends on the listen gem. config.file_watcher = ActiveSupport::EventedFileUpdateChecker - # LOCAL + # LOCAL CHANGES # Custom dev env logger to empty log more frequently config.logger = ActiveSupport::TaggedLogging.new( ActiveSupport::Logger.new(File.join(Rails.root.to_s, "log", "development.log"), @@ -69,9 +69,5 @@ 1, 32 * 1024 * 1024 ) ) - - # CDRH CONFIGURATION - - config.hosts << "cdrhdev1.unl.edu" - config.hosts << "whitman-dev.unl.edu" + config.hosts << ENV.fetch("RAILS_DEV_HOST") { "localhost" } end diff --git a/config/environments/production.rb b/config/environments/production.rb index 50bdaf0..e98aaed 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -111,11 +111,21 @@ # config.active_record.database_resolver = ActiveRecord::Middleware::DatabaseSelector::Resolver # config.active_record.database_resolver_context = ActiveRecord::Middleware::DatabaseSelector::Resolver::Session - # CDRH CONFIGURATION - - # Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies. - config.force_ssl = true - # Handle STS here instead of Apache, or Rails duplicates header contents - # Also unset cache-control header in HTTPS vhost for same reason - config.ssl_options = { hsts: { preload: true } } + # LOCAL CHANGES + # Secure HTTPS config + # Can be toggled off with env var for local production env testing + config.force_ssl = ENV['RAILS_PROD_NOSSL'].blank? + # HSTS here instead of Apache, otherwise Rails duplicates header contents + # Also unset cache-control header in Apache HTTPS vhost for same reason + config.ssl_options = { + hsts: { expires: 31536000, preload: true, subdomains: true } + } + # In case we ever need to disable HSTS on a public site, switch to this + # config.ssl_options = { hsts: false } + + # Reduce log bloat + config.log_level = :warn + + # Allow access via non-public domain + config.hosts << ENV.fetch("RAILS_PROD_HOST") { "localhost" } end From 30145d1d813b61e84c50a0f881f7bf7ad9a15037 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Sat, 20 Dec 2025 00:15:22 -0600 Subject: [PATCH 59/62] Augment Elasticsearch request error handling Add back RestClient::BadRequest exception so both display errors from Elasticsearch in API response rather than returning an opaque Rails error page --- app/services/search_service.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index d17cc68..78fc536 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -79,7 +79,8 @@ def search_item(id) raw_res = post("_search", req) if raw_res.class == RuntimeError on_error(raw_res, req) - elsif raw_res.class == RestClient::ExceptionWithResponse + elsif raw_res.class == RestClient::ExceptionWithResponse || + raw_res.class == RestClient::BadRequest on_error(JSON.parse(raw_res.response), req) else res = build_item_response(raw_res) @@ -92,7 +93,8 @@ def search_items raw_res = post("_search", req) if raw_res.class == RuntimeError on_error(raw_res.inspect, req) - elsif raw_res.class == RestClient::ExceptionWithResponse + elsif raw_res.class == RestClient::ExceptionWithResponse || + raw_res.class == RestClient::BadRequest on_error(JSON.parse(raw_res.response), req) else res = build_item_response(raw_res) From afaf758ea806dc302ed53fcd91d93836435b3766 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Sat, 20 Dec 2025 00:55:21 -0600 Subject: [PATCH 60/62] search_item_req.rb reformatting, refactoring - Fix typo in comment - Reformat other comments so there is a space between # and text - Refactor `if nested` code together and bypass extraneous variable when value only used once after - Likewise for path variable - Fix indentation of aggregation hash --- app/services/search_item_req.rb | 83 ++++++++++++++------------------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 7de29c6..b271c3e 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -105,7 +105,7 @@ def facets "order" => { f_type => dir }, } } - #nested facet, matching on another nested facet + # nested facet, matching on another nested facet elsif f.include?("[") # will be an array including the original, and an alternate aggregation name @@ -116,43 +116,37 @@ def facets agg_name = options[1] facet = original.split("[")[0] - # may or may not be nested - nested = facet.include?(".") - if nested - path = facet.split(".").first - end condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last if f_type == "_count" - #make sure sort is on the acutal count of documents + # make sure sort is on the actual count of documents f_type = "field_to_item" end aggregation = { - # common to nested and non-nested - "filter" => { - "term" => { - subject => predicate - } - }, - "aggs" => { - agg_name => { - "terms" => { - "field" => facet, - "order" => {f_type => dir}, - "size" => size - }, - "aggs" => { - "field_to_item" => { - "reverse_nested" => {}, - "aggs" => { - "top_matches" => { - "top_hits" => { - "_source" => { - "includes" => [ facet ] - }, - "size" => 1 - } + # common to nested and non-nested + "filter" => { + "term" => { + subject => predicate + } + }, + "aggs" => { + agg_name => { + "terms" => { + "field" => facet, + "order" => {f_type => dir}, + "size" => size + }, + "aggs" => { + "field_to_item" => { + "reverse_nested" => {}, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ facet ] + }, + "size" => 1 } } } @@ -160,29 +154,29 @@ def facets } } } - #interpolate above hash into nested query - if nested + } + # interpolate above hash into nested query + if facet.include?(".") aggs[agg_name] = { "nested" => { - "path" => path + "path" => facet.split(".").first }, "aggs" => { agg_name => aggregation } } else - #otherwise it is the whole query + # otherwise it is the whole query aggs[agg_name] = aggregation end elsif f.include?(".") - path = f.split(".").first if f_type == "_count" - #make sure sort is on the acutal count of documents + # make sure sort is on the acutal count of documents f_type = "field_to_item" end aggs[f] = { "nested" => { - "path" => path + "path" => f.split(".").first }, "aggs" => { f => { @@ -243,10 +237,6 @@ def filters if filter[0].include?("[") original = filter[0] facet = original.split("[")[0] - nested = facet.include?(".") - if nested - path = facet.split(".").first - end condition = original[/(?<=\[).+?(?=\])/] subject = condition.split("#").first predicate = condition.split("#").last @@ -258,10 +248,10 @@ def filters term_filter = { subject => predicate } - if nested + if facet.include?(".") query = { "nested" => { - "path" => path, + "path" => facet.split(".").first, "query" => { "bool" => { "must" => [ @@ -274,13 +264,12 @@ def filters } end filter_list << query - #ordinary nested facet + # ordinary nested facet elsif filter[0].include?(".") - path = filter[0].split(".").first # this is a nested field and must be treated differently nested = { "nested" => { - "path" => path, + "path" => filter[0].split(".").first, "query" => { "term" => { # Remove CR's added by hidden input field values with returns From 3d4842b03061d206739bc3d9fa8986afbf547f31 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Sat, 20 Dec 2025 01:14:00 -0600 Subject: [PATCH 61/62] Refactor search_item_res.rb - Remove extraneous returns - Remove extraneous use of buckets variable when direct returns work --- app/services/search_item_res.rb | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index b711c5d..104bd9a 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -54,7 +54,7 @@ def find_source_from_top_hits(top_hits, field, key) hit = [hit] end if !hit - return key + key elsif hit.class == Array if nested_child #TODO solve bug where this returns a hash value instead of an array @@ -70,18 +70,18 @@ def find_source_from_top_hits(top_hits, field, key) if match_index #matching item may be an array if hit[match_index].class == Array - return hit[match_index][0] + hit[match_index][0] else #just return the match - return hit[match_index] + hit[match_index] end else # if there is an array of values but no match, just return the key - return key + key end else # it must be single-valued and therefore we are good to go - return hit + hit end end @@ -141,17 +141,15 @@ def remove_nonword_chars(term) end def get_buckets(info, field) - buckets = nil # ordinary facet if info.key?("buckets") - buckets = info["buckets"] + info["buckets"] # nested facet elsif info.dig(field, "buckets") - buckets = info.dig(field, "buckets") + info.dig(field, "buckets") # filtered facet else - buckets = info.dig(field, field, "buckets") + info.dig(field, field, "buckets") end - buckets end end From aac16c04f961cd4fe63b13554639c05e5e9db8b8 Mon Sep 17 00:00:00 2001 From: Greg Tunink Date: Sat, 20 Dec 2025 01:41:03 -0600 Subject: [PATCH 62/62] Update version, config example; review Changelog - Version to 2.0.0 - Config example license and docs links updated. Old year removed from `api_update` example - Changelog review - Set version for release and compare url to `v2.0.0` - Change explicit code references to `code` rather than "quotes" - Capitalize first words, remove trailing periods - Remove duplicate entries (`Migration` section, `api_version` added to `res`) - Drop Rails and Ruby entry as v1.0.5 for v1 already upgraded to them - Move text for list item including json snippets to before the snippets as Markdown doesn't connect the indented text after with prior list items - Add json syntax highlighting to json snippets - Reorder some of the major change items to the top of sections - Add v1.0.5 release that was forgotten with that release --- CHANGELOG.md | 88 ++++++++++++++-------------------- config/config.example.yml | 6 +-- config/initializers/version.rb | 2 +- 3 files changed, 40 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cdada1..682ba26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,39 +28,35 @@ Markdown Spec](https://github.github.com/gfm/). ### Contributors --> -## [2.0.0] - new nested bucket aggregation/query functionality for Habeas release - -[unreleased]: https://github.com/CDRH/api/compare/v1.0.4...dev +## [v2.0.0] - Nested bucket aggregation/query functionality +[v2.0.0]: https://github.com/CDRH/api/compare/v1.0.4...v2.0.0 ### Added -- "api_version" added to all response "res" objects -- support for elasticsearch 8.5 -- user/password basic authentication with ES 8.5, when querying the index or - posting from Datura -- better support for nested fields -- support for nested bucket aggregations, matching a nested value on another - nested value. - `person.name[person.role#judge]` will return all names of persons where - role="judge". -- "api_version" added to all response "res" objects -- updated documentation for new features +- `api_version` added to all response `res` objects +- Support for Elasticsearch 8.5+ +- User/password basic authentication support when credentials present +- Better support for nested fields +- Support for nested bucket aggregations, matching a nested value on another + nested value. For example, `person.name[person.role#judge]` will return + all names of persons where `role="judge"` +- Updated documentation for new features - `track total hits` option added to ES queries, to return counts of search results higher than 10000 ### Changed -- upgraded to Rails 6.1.7 and Ruby 3 -- changes reflect new api schemas in Datura, which make heavy use of nested fields +- Gemset changed to `api-v2` +- Changes reflect new api schemas in Datura, which make heavy use of nested fields - Added support for aggregating buckets by normalized keyword and returning - the "top_hits" first document result for a non-normalized display. internal logic - has been changed because of nested fields, this may cause subtle differences in - how facet labels are displayed -- Changes response format of `facets` key + the `top_hits` first document result for a non-normalized display. Internal logic has been changed because of nested fields, this may cause subtle differences in how facet labels are displayed +- Changes response format of `facets` key. Not only is the response format + itself different, but there may be fewer facets returned since matching + normalized values are combined From: - ``` + ```json "facets": { "WILLA CATHER": 10, "Willa Cather": 50 @@ -69,44 +65,30 @@ Markdown Spec](https://github.github.com/gfm/). To: - ``` + ```json "facets": { "willa cather": { "num" : 60, source: "Willa Cather" } } ``` - Not only is the response format itself different, but there may be fewer - facets returned since matching normalized values are combined -- gemset changed to `api-v2` - ### Migration -- in the config files of your Datura repos, (`private.yml` or `public.yml`, set +- Add nested facets as described above, if desired +- Orchid apps that connect to the API should use `facet_limit` instead of `facet_num` in options +- In the config files of your Datura repos, (`private.yml` or `public.yml`, set the api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the - schema. See new schema (2.0) documentation - [here](https://github.com/CDRH/datura/docs/schema_v2.md). -- Use Elasticsearch 8.5 or later. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). -- If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add - -``` -elasticsearch: - user: username - password: ***** -``` - -to the secrets file and then close the window to save. Do not commit `config/master.key` (it should be in `gitignore`) - -- Orchid apps that connect to the API should use `facet_limit` instead of `facet_num` in options. -- Add nested facets as described above, if desired - -### Migration - -- in Datura repos config `private.yml` api to `"api_version": "2.0"` to take advantage of new bucket aggregation functionality (or `"api_version": "1.0"` for legacy repos that have not been updated for the new schema). Please note that a running API index can only use one ES index at a time, and each ES index is restricted to one version of the schema. See new schema (2.0) documentation [here](https://github.com/CDRH/datura/docs/schema_v2.md) -- Use Elasticsearch 8.5 or later. See [dev docs instructions](https://github.com/CDRH/cdrh_dev_docs/blob/update_elasticsearch_documentation/publishing/2_basic_requirements.md#downloading-elasticsearch). -- If you are using ES with security enabled, you must configure credentials with Rails in the API repo. See https://guides.rubyonrails.org/v6.1/security.html. Configure the VSCode editor. Run `EDITOR="code --wait" rails credentials:edit` and add + schema. See [new schema (2.0) + documentation](https://github.com/CDRH/datura/docs/schema_v2.md). +- Connect to Elasticsearch 8.5 or later +- If you are using ES with security enabled, you must configure credentials + with Rails in the API repo. See + https://guides.rubyonrails.org/v6.1/security.html. To configure with VSCode + editor run `EDITOR="code --wait" rails credentials:edit` and add to the + secrets file and then close the window to save. + Do not commit `config/master.key` (it should be in `.gitignore`) ``` elasticsearch: @@ -114,12 +96,14 @@ elasticsearch: password: ***** ``` -to the secrets file and then close the window to save. Do not commit `config/master.key` (it should be in `gitignore`) +## [v1.0.5] - API v1 on Ruby 3.1.6, Rails 6.1.7 +[v1.0.5]: https://github.com/CDRH/api/compare/v1.0.4...v1.0.5 -- Orchid apps that connect to the API should use `facet_limit` instead of `facet_num` in options. -- Add nested facets as described above, if desired. +### Changed +- Ruby 3.1.6 +- Rails 6.1.7 -## [v1.0.4](https://github.com/CDRH/api/compare/v1.0....v1.0.4) - Updates & license +## [v1.0.4](https://github.com/CDRH/api/compare/v1.0.3...v1.0.4) - Updates & license ### Changed diff --git a/config/config.example.yml b/config/config.example.yml index a06b13c..bd03bdc 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -1,11 +1,11 @@ default: &default metadata: # api metadata / description - api_updated: "TODO 2017" + api_updated: "TODO" contact: "cdrhdev@unl.edu" description: "API to access all public Center for Digital Research in the Humanities resources" - documentation: "https://cdrhapi.unl.edu/docs" - license: "TODO" + documentation: "https://github.com/CDRH/api/tree/main/docs" + license: "MIT License" terms_of_service: "TODO" settings: diff --git a/config/initializers/version.rb b/config/initializers/version.rb index b17ba8e..3144cc8 100644 --- a/config/initializers/version.rb +++ b/config/initializers/version.rb @@ -1,5 +1,5 @@ module Api class Application < Rails::Application - VERSION = "1.0.4" + VERSION = "2.0.0" end end