From 0d042a1eb4bba24c873dd87afa0a672df653c9af Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 25 Oct 2016 15:41:18 +0200 Subject: [PATCH 001/106] add comment --- lib/goo/sparql/queries.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 7e6adb35..b8d282ab 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -724,6 +724,7 @@ def self.model_load_sliced(*options) unless models_by_id[id].class.handler?(v) unless object.nil? && !models_by_id[id].instance_variable_get("@#{v.to_s}").nil? if v != :id + # TODO: handle multilingual values here # if multiple language values are included for a given property, set the # corresponding model attribute to the English language value - NCBO-1662 if sol[v].kind_of?(RDF::Literal) From ae67841e4f60e56c1bd867ce0a2738e0739b0ceb Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Fri, 9 Jun 2017 12:29:31 +0200 Subject: [PATCH 002/106] add Goo.main_languages, an array to define the main_languages of the Portal --- lib/goo.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/goo.rb b/lib/goo.rb index 43a1e408..3fe28dc7 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -23,6 +23,8 @@ module Goo @@resource_options = Set.new([:persistent]).freeze + @@main_languages = ["en","eng"] + @@configure_flag = false @@sparql_backends = {} @@model_by_name = {} @@ -87,6 +89,14 @@ def self.test_reset redis_cache: @@redis_client }) end + def self.main_languages + @@main_languages + end + + def self.main_languages=(value) + @@main_languages = value + end + def self.use_cache=(value) @@use_cache = value set_sparql_cache From 18d5ecad231e0fa609d86da5ae0de26c372b7c93 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Fri, 9 Jun 2017 12:54:44 +0200 Subject: [PATCH 003/106] Use 'Goo.main_languages' to get a property value. now when retrieving a value only the object with a language included in main_languages will be taken (value with no lang defined are taken too). Be careful if an ontology contains several prefLabel with lang defined here --- lib/goo.rb | 1 + lib/goo/sparql/queries.rb | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/goo.rb b/lib/goo.rb index 3fe28dc7..5929ab70 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -23,6 +23,7 @@ module Goo @@resource_options = Set.new([:persistent]).freeze + # Define the languages from which the properties values will be taken (be careful if prefLabel with different lang, only one will be taken) @@main_languages = ["en","eng"] @@configure_flag = false diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index c775b015..78488886 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -582,7 +582,6 @@ def self.model_load_sliced(*options) end expand_equivalent_predicates(select,equivalent_predicates) - var_set_hash = {} select.each_solution do |sol| next if sol[:some_type] && klass.type_uri(collection) != sol[:some_type] @@ -725,14 +724,21 @@ def self.model_load_sliced(*options) unless models_by_id[id].class.handler?(v) unless object.nil? && !models_by_id[id].instance_variable_get("@#{v.to_s}").nil? if v != :id - # TODO: handle multilingual values here # if multiple language values are included for a given property, set the # corresponding model attribute to the English language value - NCBO-1662 if sol[v].kind_of?(RDF::Literal) key = "#{v}#__#{id.to_s}" - models_by_id[id].send("#{v}=", object, on_load: true) unless var_set_hash[key] lang = sol[v].language - var_set_hash[key] = true if lang == :EN || lang == :en + + #models_by_id[id].send("#{v}=", object, on_load: true) unless var_set_hash[key] + #var_set_hash[key] = true if lang == :EN || lang == :en + + # We add the value only if it's language is in the main languages or if lang is nil + if (Goo.main_languages.nil?) + models_by_id[id].send("#{v}=", object, on_load: true) + elsif (lang.nil? || Goo.main_languages.include?(lang.to_s.downcase) || Goo.main_languages.include?(lang.to_s.upcase)) + models_by_id[id].send("#{v}=", object, on_load: true) + end else models_by_id[id].send("#{v}=", object, on_load: true) end From fa40b50851eee2b118d769a0590ed26f6ace89e6 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Fri, 9 Jun 2017 13:45:37 +0200 Subject: [PATCH 004/106] add main_lang and accepted_lang instead of just main_languages. The main_lang is the main lang used to extract prefLabels. Accepted lang is used to extract all others properties --- lib/goo.rb | 19 ++++++++++++++----- lib/goo/sparql/queries.rb | 23 +++++++++++++++++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/lib/goo.rb b/lib/goo.rb index 5929ab70..ec0e9148 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -24,7 +24,8 @@ module Goo @@resource_options = Set.new([:persistent]).freeze # Define the languages from which the properties values will be taken (be careful if prefLabel with different lang, only one will be taken) - @@main_languages = ["en","eng"] + @@main_lang = "en" + @@accepted_lang = ["en","eng"] @@configure_flag = false @@sparql_backends = {} @@ -90,12 +91,20 @@ def self.test_reset redis_cache: @@redis_client }) end - def self.main_languages - @@main_languages + def self.main_lang + @@main_lang end - def self.main_languages=(value) - @@main_languages = value + def self.main_lang=(value) + @@main_lang = value + end + + def self.accepted_lang + @@accepted_lang + end + + def self.accepted_lang=(value) + @@accepted_lang = value end def self.use_cache=(value) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 78488886..f9257f28 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -582,6 +582,8 @@ def self.model_load_sliced(*options) end expand_equivalent_predicates(select,equivalent_predicates) + main_lang_hash = {} + accepted_lang_hash = {} select.each_solution do |sol| next if sol[:some_type] && klass.type_uri(collection) != sol[:some_type] @@ -734,9 +736,26 @@ def self.model_load_sliced(*options) #var_set_hash[key] = true if lang == :EN || lang == :en # We add the value only if it's language is in the main languages or if lang is nil - if (Goo.main_languages.nil?) + if (Goo.accepted_lang.nil? || Goo.main_lang.nil?) models_by_id[id].send("#{v}=", object, on_load: true) - elsif (lang.nil? || Goo.main_languages.include?(lang.to_s.downcase) || Goo.main_languages.include?(lang.to_s.upcase)) + + elsif (v.to_s.eql?("prefLabel")) + # Special treatment for prefLabel where we want to extract the main_lang first, then accepted lang if no main. + # Then anything if no main or accepted found + if lang.to_s.downcase.eql?(Goo.main_lang) + models_by_id[id].send("#{v}=", object, on_load: true) + main_lang_hash[key] = true + end + if !main_lang_hash[key] + if Goo.accepted_lang.include?(lang.to_s.downcase) + models_by_id[id].send("#{v}=", object, on_load: true) + accepted_lang_hash[key] = true + elsif !accepted_lang_hash[key] + models_by_id[id].send("#{v}=", object, on_load: true) + end + end + + elsif (lang.nil? || Goo.accepted_lang.include?(lang.to_s.downcase)) models_by_id[id].send("#{v}=", object, on_load: true) end else From 172f7867aa9775866eda2ef43f9015f8611971f9 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Fri, 9 Jun 2017 14:02:40 +0200 Subject: [PATCH 005/106] remove accepted_lang, now only used main_lang --- lib/goo.rb | 11 +---------- lib/goo/sparql/queries.rb | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/lib/goo.rb b/lib/goo.rb index ec0e9148..16bf3352 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -24,8 +24,7 @@ module Goo @@resource_options = Set.new([:persistent]).freeze # Define the languages from which the properties values will be taken (be careful if prefLabel with different lang, only one will be taken) - @@main_lang = "en" - @@accepted_lang = ["en","eng"] + @@main_lang = ["en","eng"] @@configure_flag = false @@sparql_backends = {} @@ -99,14 +98,6 @@ def self.main_lang=(value) @@main_lang = value end - def self.accepted_lang - @@accepted_lang - end - - def self.accepted_lang=(value) - @@accepted_lang = value - end - def self.use_cache=(value) @@use_cache = value set_sparql_cache diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index f9257f28..2da5583d 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -583,7 +583,6 @@ def self.model_load_sliced(*options) expand_equivalent_predicates(select,equivalent_predicates) main_lang_hash = {} - accepted_lang_hash = {} select.each_solution do |sol| next if sol[:some_type] && klass.type_uri(collection) != sol[:some_type] @@ -736,26 +735,21 @@ def self.model_load_sliced(*options) #var_set_hash[key] = true if lang == :EN || lang == :en # We add the value only if it's language is in the main languages or if lang is nil - if (Goo.accepted_lang.nil? || Goo.main_lang.nil?) + if Goo.main_lang.nil? models_by_id[id].send("#{v}=", object, on_load: true) elsif (v.to_s.eql?("prefLabel")) - # Special treatment for prefLabel where we want to extract the main_lang first, then accepted lang if no main. - # Then anything if no main or accepted found - if lang.to_s.downcase.eql?(Goo.main_lang) - models_by_id[id].send("#{v}=", object, on_load: true) - main_lang_hash[key] = true - end + # Special treatment for prefLabel where we want to extract the main_lang first, or anything else if !main_lang_hash[key] - if Goo.accepted_lang.include?(lang.to_s.downcase) - models_by_id[id].send("#{v}=", object, on_load: true) - accepted_lang_hash[key] = true - elsif !accepted_lang_hash[key] - models_by_id[id].send("#{v}=", object, on_load: true) + + models_by_id[id].send("#{v}=", object, on_load: true) + if Goo.main_lang.include?(lang.to_s.downcase) + # If prefLabel from the main_lang found we stop looking for prefLabel + main_lang_hash[key] = true end end - elsif (lang.nil? || Goo.accepted_lang.include?(lang.to_s.downcase)) + elsif (lang.nil? || Goo.main_lang.include?(lang.to_s.downcase)) models_by_id[id].send("#{v}=", object, on_load: true) end else From 157082420fab1fb9b575771325b6a22af9df3a13 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Mon, 12 Jun 2017 18:18:38 +0200 Subject: [PATCH 006/106] add comment on how solr docs are generated --- lib/goo/search/search.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 742a5995..56a13cc8 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -29,6 +29,7 @@ def get_indexable_object() #in solr doc[:resource_id] = doc[:id].to_s doc[:id] = get_index_id.to_s + # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 doc end From 2e547094cb226df96f3e7d95c64c55fcd78e192c Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 03:02:03 +0200 Subject: [PATCH 007/106] now only index labels with the language included in main_lang --- lib/goo/base/resource.rb | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index c12f6203..d102323c 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -228,6 +228,7 @@ def graph return col ? col.id : nil end + # Retrieve unmapped attribute from an instance (i.e.: a Class) def self.map_attributes(inst,equivalent_predicates=nil) if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) @@ -267,7 +268,27 @@ def self.map_attributes(inst,equivalent_predicates=nil) else object = unmapped_string_keys[attr_uri] end - object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } + + #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") + # Now include only literal that have language in the main_langs or nil + # Old way: object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } + + object = object.map { |o| if o.is_a?(RDF::URI) + o + else + if o.respond_to?("language") + # Include only literal that have language in the main_langs or nil + if o.language.nil? + o.object + elsif Goo.main_lang.include?(o.language.to_s.downcase) + o.object + end + else + o.object + end + end } + object = object.compact + if klass.range(attr) object = object.map { |o| o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } @@ -281,13 +302,8 @@ def self.map_attributes(inst,equivalent_predicates=nil) inst.send("#{attr}=",object, on_load: true) end else - inst.send("#{attr}=", - list_attrs.include?(attr) ? [] : nil, on_load: true) - if inst.id.to_s == "http://purl.obolibrary.org/obo/IAO_0000415" - if attr == :definition - # binding.pry - end - end + inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) + #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") end end From f31367938720c5dbf7b45165ce579ff9e87bafc0 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 11:34:22 +0200 Subject: [PATCH 008/106] now take prefLabel with a lang that is in main_lang in priority, then nil if not available --- lib/goo/base/resource.rb | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index d102323c..6b52315a 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -269,17 +269,24 @@ def self.map_attributes(inst,equivalent_predicates=nil) object = unmapped_string_keys[attr_uri] end - #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") + #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") && attr.to_s.eql?("prefLabel") # Now include only literal that have language in the main_langs or nil # Old way: object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } - + prefLabelNilLang = [] object = object.map { |o| if o.is_a?(RDF::URI) o else if o.respond_to?("language") # Include only literal that have language in the main_langs or nil if o.language.nil? - o.object + if attr.to_s.eql?("prefLabel") + # For prefLabel we want to take a value with a defined lang in priority + # And one with nil lang if not available + prefLabelNilLang << o.object + nil + else + o.object + end elsif Goo.main_lang.include?(o.language.to_s.downcase) o.object end @@ -294,7 +301,16 @@ def self.map_attributes(inst,equivalent_predicates=nil) o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } end unless list_attrs.include?(attr) - object = object.first + if attr.to_s.eql?("prefLabel") + if object.empty? + # If no value with a lang within main_lang for prefLabel, we take the nil lang + object = prefLabelNilLang.first + else + object = object.first + end + else + object = object.first + end end if inst.respond_to?(:klass) inst[attr] = object @@ -303,7 +319,6 @@ def self.map_attributes(inst,equivalent_predicates=nil) end else inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) - #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") end end From 4d32d7ed5d825829f58faef31cd41960f056c443 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 16:01:36 +0200 Subject: [PATCH 009/106] retrieve label from bad langs --- lib/goo/base/resource.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 6b52315a..24a05e71 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -271,8 +271,9 @@ def self.map_attributes(inst,equivalent_predicates=nil) #binding.pry if inst.id.to_s.eql?("http://lirmm.fr/2015/resource/AGROOE_c_03") && attr.to_s.eql?("prefLabel") # Now include only literal that have language in the main_langs or nil - # Old way: object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } + # Olw way: object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } prefLabelNilLang = [] + attrBadLang = [] object = object.map { |o| if o.is_a?(RDF::URI) o else @@ -289,12 +290,18 @@ def self.map_attributes(inst,equivalent_predicates=nil) end elsif Goo.main_lang.include?(o.language.to_s.downcase) o.object + else + attrBadLang << o.object end else o.object end end } object = object.compact + if object.nil? || object.empty? + # If no label have been found in the main_langs, then we take from the not accepted lang + object = attrBadLang.compact + end if klass.range(attr) object = object.map { |o| From 07340fc906ca7612fe03660b0a75be01c3177ebe Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 17:37:37 +0200 Subject: [PATCH 010/106] fix how we retrieve labels when no labels frfrom one of the main_lang --- lib/goo/base/resource.rb | 12 +++++++----- lib/goo/base/settings/settings.rb | 8 ++++---- lib/goo/search/search.rb | 4 +++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 24a05e71..a8281d00 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -292,16 +292,13 @@ def self.map_attributes(inst,equivalent_predicates=nil) o.object else attrBadLang << o.object + nil end else o.object end end } object = object.compact - if object.nil? || object.empty? - # If no label have been found in the main_langs, then we take from the not accepted lang - object = attrBadLang.compact - end if klass.range(attr) object = object.map { |o| @@ -311,7 +308,11 @@ def self.map_attributes(inst,equivalent_predicates=nil) if attr.to_s.eql?("prefLabel") if object.empty? # If no value with a lang within main_lang for prefLabel, we take the nil lang - object = prefLabelNilLang.first + if prefLabelNilLang.length > 0 + object = prefLabelNilLang.first + else + object = attrBadLang.compact.first + end else object = object.first end @@ -319,6 +320,7 @@ def self.map_attributes(inst,equivalent_predicates=nil) object = object.first end end + if inst.respond_to?(:klass) inst[attr] = object else diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index 2a274454..ce3e9a21 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -232,15 +232,13 @@ def shape_attribute(attr) raise ArgumentError, "Method based attributes cannot be set" end if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) - raise ArgumentError, - "`#{attr}` is an inverse attribute. Values cannot be assigned." + raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." end @loaded_attributes.add(attr) value = args[0] unless args.last.instance_of?(Hash) and args.last[:on_load] if self.persistent? and self.class.name_with == attr - raise ArgumentError, - "`#{attr}` attribute is used to name this resource and cannot be modified." + raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." end prev = self.instance_variable_get("@#{attr}") if !prev.nil? and !@modified_attributes.include?(attr) @@ -266,9 +264,11 @@ def shape_attribute(attr) @loaded_attributes << attr return value end + if (not @persistent) or @loaded_attributes.include?(attr) return self.instance_variable_get("@#{attr}") else + # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." end end diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 56a13cc8..06f81c0d 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -24,9 +24,11 @@ def get_index_id() self.class.model_settings[:search_options][:index_id].call(self) end + # Get the doc that will be indexed in solr def get_indexable_object() + # To make the code less readable the guys that wrote it managed to hide the real function called by this line + # It is "get_index_doc" in ontologies_linked_data Class.rb doc = self.class.model_settings[:search_options][:document].call(self) - #in solr doc[:resource_id] = doc[:id].to_s doc[:id] = get_index_id.to_s # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 From 773a0f3408d21d350463e92258655c6406e21c43 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 20:42:27 +0200 Subject: [PATCH 011/106] avoid attributeNotLoaded error for empty labels now --- lib/goo/sparql/queries.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 2da5583d..72962273 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -584,7 +584,9 @@ def self.model_load_sliced(*options) expand_equivalent_predicates(select,equivalent_predicates) main_lang_hash = {} + select.each_solution do |sol| + attr_to_load_if_empty = [] next if sol[:some_type] && klass.type_uri(collection) != sol[:some_type] if count return sol[:count_var].object @@ -748,9 +750,10 @@ def self.model_load_sliced(*options) main_lang_hash[key] = true end end - elsif (lang.nil? || Goo.main_lang.include?(lang.to_s.downcase)) models_by_id[id].send("#{v}=", object, on_load: true) + else + attr_to_load_if_empty << v end else models_by_id[id].send("#{v}=", object, on_load: true) @@ -760,6 +763,12 @@ def self.model_load_sliced(*options) end end end + attr_to_load_if_empty.each do |empty_attr| + # To avoid bug where the attr is not loaded (because the data model is really bad) + if !models_by_id[id].loaded_attributes.include?(empty_attr.to_sym) + models_by_id[id].send("#{empty_attr}=", nil, on_load: true) + end + end end return models_by_id if bnode_extraction From a17bc26053b2cd15f05bbeb5a01648403b78f94f Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 20:45:00 +0200 Subject: [PATCH 012/106] stop trying to get bad lang values for indexing --- lib/goo/base/resource.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index a8281d00..34d2feb1 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -273,7 +273,6 @@ def self.map_attributes(inst,equivalent_predicates=nil) # Now include only literal that have language in the main_langs or nil # Olw way: object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } prefLabelNilLang = [] - attrBadLang = [] object = object.map { |o| if o.is_a?(RDF::URI) o else @@ -291,7 +290,6 @@ def self.map_attributes(inst,equivalent_predicates=nil) elsif Goo.main_lang.include?(o.language.to_s.downcase) o.object else - attrBadLang << o.object nil end else @@ -310,8 +308,6 @@ def self.map_attributes(inst,equivalent_predicates=nil) # If no value with a lang within main_lang for prefLabel, we take the nil lang if prefLabelNilLang.length > 0 object = prefLabelNilLang.first - else - object = attrBadLang.compact.first end else object = object.first From 9a2fe3146f660e5a9eae7e8cddc46fe059fc4253 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 13 Jun 2017 20:49:53 +0200 Subject: [PATCH 013/106] return array instead of nil when no value for attr --- lib/goo/sparql/queries.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 72962273..4d94d090 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -764,9 +764,9 @@ def self.model_load_sliced(*options) end end attr_to_load_if_empty.each do |empty_attr| - # To avoid bug where the attr is not loaded (because the data model is really bad) + # To avoid bug where the attr is not loaded, we return an empty array (because the data model is really bad) if !models_by_id[id].loaded_attributes.include?(empty_attr.to_sym) - models_by_id[id].send("#{empty_attr}=", nil, on_load: true) + models_by_id[id].send("#{empty_attr}=", [], on_load: true) end end end From 711b8d90bdc4351dcaf8ebde97cae87b828a5079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20LAMARQUE?= Date: Tue, 16 Jun 2020 22:10:16 +0000 Subject: [PATCH 014/106] Gemfile.lock update --- Gemfile.lock | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index b508c486..4281e29a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -33,7 +33,7 @@ GEM tzinfo (~> 0.3.37) addressable (2.3.5) builder (3.2.4) - coderay (1.1.2) + coderay (1.1.3) concurrent-ruby (1.1.6) cube-ruby (0.0.3) daemons (1.3.1) @@ -54,7 +54,7 @@ GEM method_source (1.0.0) mime-types (3.3.1) mime-types-data (~> 3.2015) - mime-types-data (3.2019.1009) + mime-types-data (3.2020.0512) minitest (4.7.5) multi_json (1.14.1) multipart-post (2.1.1) @@ -62,10 +62,10 @@ GEM ruby2_keywords (~> 0.0.1) net-http-persistent (2.9.4) netrc (0.11.0) - pry (0.13.0) + pry (0.13.1) coderay (~> 1.1) method_source (~> 1.0) - rack (2.2.2) + rack (2.2.3) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) @@ -75,7 +75,7 @@ GEM rake (13.0.1) rdf (1.0.8) addressable (>= 2.2) - redis (4.1.3) + redis (4.2.1) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) @@ -101,7 +101,7 @@ GEM rack (>= 1, < 3) thread_safe (0.3.6) tilt (2.0.10) - tzinfo (0.3.56) + tzinfo (0.3.57) unf (0.1.4) unf_ext unf_ext (0.0.7.7) @@ -110,7 +110,6 @@ GEM PLATFORMS ruby - x86_64-darwin-16 DEPENDENCIES activesupport From 894aba0b10a96a20c07202a634035bf2ff0cd519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Lamarque?= Date: Fri, 26 Jun 2020 09:31:25 +0200 Subject: [PATCH 015/106] Fixing merge issue with NCBO master (goo/search/search.rb) --- lib/goo/search/search.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 730f21de..1dc72ea9 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -82,6 +82,17 @@ def unindexByQuery(query, connection_name=:main) Goo.search_connection(connection_name).delete_by_query(query) end + # Get the doc that will be indexed in solr + def get_indexable_object() + # To make the code less readable the guys that wrote it managed to hide the real function called by this line + # It is "get_index_doc" in ontologies_linked_data Class.rb + doc = self.class.model_settings[:search_options][:document].call(self) + doc[:resource_id] = doc[:id].to_s + doc[:id] = get_index_id.to_s + # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 + doc + end + def indexCommit(attrs=nil, connection_name=:main) Goo.search_connection(connection_name).commit(:commit_attributes => attrs || {}) end From 1f5c0e3fd2233b939d79a66198d23f0a3592cf57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20LAMARQUE?= Date: Fri, 26 Jun 2020 07:36:03 +0000 Subject: [PATCH 016/106] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 4281e29a..b73b61a1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/sparql-client.git - revision: 0e7f983ec590bf3cac4369366a0bffb80bd9adb2 + revision: 298f9c9a8d2637650a62f8102679aa30084bf878 branch: master specs: sparql-client (1.0.1) From bf42c5c632cfc6475b38fc4e2694d2b8e65d32e4 Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Fri, 3 Jul 2020 08:43:53 +0000 Subject: [PATCH 017/106] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index b73b61a1..26f4ee99 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -48,7 +48,7 @@ GEM domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json_pure (2.3.0) + json_pure (2.3.1) macaddr (1.7.2) systemu (~> 2.6.5) method_source (1.0.0) From 09e8627a5a894f43eb4c9fd336a999da4a3015c3 Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Fri, 10 Jul 2020 09:32:18 +0200 Subject: [PATCH 018/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 26f4ee99..b57e8564 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -56,7 +56,7 @@ GEM mime-types-data (~> 3.2015) mime-types-data (3.2020.0512) minitest (4.7.5) - multi_json (1.14.1) + multi_json (1.15.0) multipart-post (2.1.1) mustermann (1.1.1) ruby2_keywords (~> 0.0.1) From 34aa8878748aca41f94b65fd3ce28629d54f2d92 Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Thu, 16 Jul 2020 11:44:55 +0200 Subject: [PATCH 019/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index b57e8564..b393b128 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/sparql-client.git - revision: 298f9c9a8d2637650a62f8102679aa30084bf878 + revision: 87bc0e8976519cdcdef06169556677467a10fcfc branch: master specs: sparql-client (1.0.1) From 819d8122422fe16c3934468ab1658863883ed629 Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Tue, 25 Aug 2020 07:30:27 +0000 Subject: [PATCH 020/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index b393b128..7401eb5b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -34,7 +34,7 @@ GEM addressable (2.3.5) builder (3.2.4) coderay (1.1.3) - concurrent-ruby (1.1.6) + concurrent-ruby (1.1.7) cube-ruby (0.0.3) daemons (1.3.1) docile (1.3.2) @@ -85,7 +85,7 @@ GEM builder (>= 2.1.2) faraday (>= 0.9.0) ruby2_keywords (0.0.2) - simplecov (0.18.5) + simplecov (0.19.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov-html (0.12.2) From 5d4b78df14266a1c2ee86887fbd342e72e8eb1a5 Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Thu, 10 Sep 2020 13:34:10 +0000 Subject: [PATCH 021/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 400f4099..e69c63fa 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,7 +75,7 @@ GEM rake (13.0.1) rdf (1.0.8) addressable (>= 2.2) - redis (4.2.1) + redis (4.2.2) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) From 7dee2100dc26273158ddc6f4c4bbecd9d64d903d Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Thu, 1 Oct 2020 10:58:23 +0200 Subject: [PATCH 022/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index e69c63fa..0dd2c26c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -88,7 +88,7 @@ GEM simplecov (0.19.0) docile (~> 1.1) simplecov-html (~> 0.11) - simplecov-html (0.12.2) + simplecov-html (0.12.3) sinatra (2.1.0) mustermann (~> 1.0) rack (~> 2.2) From ba3edcdd1e2f6ee630b3cca29e08cc209229186e Mon Sep 17 00:00:00 2001 From: OntoPortal Bot Date: Fri, 30 Oct 2020 18:39:31 +0100 Subject: [PATCH 023/106] [ontoportal-bot] Gemfile.lock update --- Gemfile.lock | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0dd2c26c..fdaa5132 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -41,8 +41,9 @@ GEM domain_name (0.5.20190701) unf (>= 0.0.5, < 1.0.0) eventmachine (1.2.7) - faraday (1.0.1) + faraday (1.1.0) multipart-post (>= 1.2, < 3) + ruby2_keywords http-accept (1.7.0) http-cookie (1.0.3) domain_name (~> 0.5) @@ -85,7 +86,7 @@ GEM builder (>= 2.1.2) faraday (>= 0.9.0) ruby2_keywords (0.0.2) - simplecov (0.19.0) + simplecov (0.19.1) docile (~> 1.1) simplecov-html (~> 0.11) simplecov-html (0.12.3) From 89a8f665f70644989adf70cd132bf0f38ef3414d Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 28 Dec 2021 16:54:30 +0100 Subject: [PATCH 024/106] fix #4 ; add regex filter --- lib/goo/base/filter.rb | 22 +++++++++++++--------- lib/goo/sparql/queries.rb | 25 ++++++++++++++++--------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/lib/goo/base/filter.rb b/lib/goo/base/filter.rb index 66f2095d..50fa58ec 100644 --- a/lib/goo/base/filter.rb +++ b/lib/goo/base/filter.rb @@ -11,50 +11,54 @@ def initialize(pattern) end def >(value) - @filter_tree << FILTER_TUPLE.new(:>,value) + @filter_tree << FILTER_TUPLE.new(:>, value) self end def <(value) - @filter_tree << FILTER_TUPLE.new(:<,value) + @filter_tree << FILTER_TUPLE.new(:<, value) self end def <=(value) - @filter_tree << FILTER_TUPLE.new(:<=,value) + @filter_tree << FILTER_TUPLE.new(:<=, value) self end def >=(value) - @filter_tree << FILTER_TUPLE.new(:>=,value) + @filter_tree << FILTER_TUPLE.new(:>=, value) self end def or(value) - @filter_tree << FILTER_TUPLE.new(:or,value) + @filter_tree << FILTER_TUPLE.new(:or, value) self end def ==(value) - @filter_tree << FILTER_TUPLE.new(:==,value) + @filter_tree << FILTER_TUPLE.new(:==, value) self end def and(value) - @filter_tree << FILTER_TUPLE.new(:and,value) + @filter_tree << FILTER_TUPLE.new(:and, value) self end def unbound - @filter_tree << FILTER_TUPLE.new(:unbound,nil) + @filter_tree << FILTER_TUPLE.new(:unbound, nil) self end def bound - @filter_tree << FILTER_TUPLE.new(:bound,nil) + @filter_tree << FILTER_TUPLE.new(:bound, nil) self end + def regex(value) + @filter_tree << FILTER_TUPLE.new(:regex, value) + self + end end end end diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 8ed102dd..7cc04642 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -131,22 +131,29 @@ def self.query_filter_sparql(klass,filter,filter_patterns,filter_graphs, end filter_var = inspected_patterns[filter_pattern_match] if !filter_operation.value.instance_of?(Goo::Filter) - unless filter_operation.operator == :unbound || filter_operation.operator == :bound + case filter_operation.operator + when :unbound + filter_operations << "!BOUND(?#{filter_var.to_s})" + return :optional + + when :bound + filter_operations << "BOUND(?#{filter_var.to_s})" + return :optional + when :regex + if filter_operation.value.is_a?(String) + filter_operations << "REGEX(?#{filter_var.to_s} , \"#{filter_operation.value.to_s}\")" + end + + else value = RDF::Literal.new(filter_operation.value) if filter_operation.value.is_a? String value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) end filter_operations << ( "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + - " #{value.to_ntriples}") - else - if filter_operation.operator == :unbound - filter_operations << "!BOUND(?#{filter_var.to_s})" - else - filter_operations << "BOUND(?#{filter_var.to_s})" - end - return :optional + " #{value.to_ntriples}") end + else filter_operations << "#{sparql_op_string(filter_operation.operator)}" query_filter_sparql(klass,filter_operation.value,filter_patterns, From 694aa19dc46941d01535f07996320307f74eb84d Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 15 Mar 2022 18:13:23 +0100 Subject: [PATCH 025/106] add main_languages configuration variable --- lib/goo.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/goo.rb b/lib/goo.rb index 1867c7d6..fedae62e 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -25,6 +25,8 @@ module Goo @@resource_options = Set.new([:persistent]).freeze + # Define the languages from which the properties values will be taken + @@main_languages = %w[en] @@configure_flag = false @@sparql_backends = {} @@model_by_name = {} @@ -42,6 +44,19 @@ module Goo @@slice_loading_size = 500 + + def self.main_languages + @@main_languages + end + def self.main_languages=(lang) + @@main_languages = lang + end + + def self.language_includes(lang) + lang_str = lang.to_s + main_languages.index { |l| lang_str.downcase.eql?(l) || lang_str.upcase.eql?(l)} + end + def self.add_namespace(shortcut, namespace,default=false) if !(namespace.instance_of? RDF::Vocabulary) raise ArgumentError, "Namespace must be a RDF::Vocabulary object" From eec4d6066655ba4657dab173cb6d0765e8ce6e45 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 15 Mar 2022 18:15:19 +0100 Subject: [PATCH 026/106] add LanguageFilter class to filter an attribute values by the languages --- lib/goo/sparql/mixins/solution_lang_filter.rb | 83 +++++++++++++++++++ lib/goo/sparql/sparql.rb | 1 + 2 files changed, 84 insertions(+) create mode 100644 lib/goo/sparql/mixins/solution_lang_filter.rb diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb new file mode 100644 index 00000000..af02b44a --- /dev/null +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -0,0 +1,83 @@ +module Goo + module SPARQL + module Solution + class LanguageFilter + + def initialize + @other_languages_values = {} + end + + def other_languages_values + @other_languages_values + end + + def main_lang_filter(id, attr, old_values, new_value) + index, value = lang_index old_values, new_value + save_other_lang_val(id, attr, index, new_value) unless index.eql? :no_lang + [index, value] + end + + def fill_models_with_other_languages(models_by_id, list_attributes) + @other_languages_values.each do |id, languages_values| + languages_values.each do |attr, index_values| + model_attribute_val = models_by_id[id].instance_variable_get("@#{attr.to_s}") + values = languages_values_to_set(index_values, model_attribute_val) + + if !values.nil? && list_attributes.include?(attr) + models_by_id[id].send("#{attr.to_s}=", values || [], on_load: true) + elsif !values.nil? + models_by_id[id].send("#{attr.to_s}=", values.first || nil, on_load: true) + end + end + end + end + + def languages_values_to_set(language_values, no_lang_values) + + values = nil + matched_lang, not_matched_lang = matched_languages(language_values, no_lang_values) + if !matched_lang.empty? + main_lang = Array(matched_lang[:'0']) + Array(matched_lang[:no_lang]) + secondary_languages = matched_lang.select { |key| key != :'0' && key != :no_lang }.sort.map { |x| x[1] }.flatten + values = main_lang + secondary_languages + elsif !not_matched_lang.empty? + values = not_matched_lang + end + values + end + + private + + def lang_index(object, new_value) + lang = new_value.language + if lang.nil? + [:no_lang, object] + else + index = Goo.language_includes(lang) + index = index ? index.to_s.to_sym : :not_matched + [index, new_value] + end + end + + def save_other_lang_val(id, attr, index, value) + @other_languages_values[id] ||= {} + @other_languages_values[id][attr] ||= {} + @other_languages_values[id][attr][index] ||= [] + + unless @other_languages_values[id][attr][index].include?(value.to_s) + @other_languages_values[id][attr][index] += Array(value.to_s) + end + end + + + + def matched_languages(index_values, model_attribute_val) + not_matched_lang = index_values[:not_matched] + matched_lang = index_values.reject { |key| key == :not_matched } + matched_lang[:no_lang] = Array(model_attribute_val) unless model_attribute_val.nil? + [matched_lang, not_matched_lang] + end + end + end + end +end diff --git a/lib/goo/sparql/sparql.rb b/lib/goo/sparql/sparql.rb index dfd3d0a6..6fa1d582 100644 --- a/lib/goo/sparql/sparql.rb +++ b/lib/goo/sparql/sparql.rb @@ -1,6 +1,7 @@ require "sparql/client" require_relative "mixins/query_pattern" +require_relative "mixins/solution_lang_filter" require_relative "query_builder" require_relative "solutions_mapper" require_relative "client" From 31b0bb051c1bf52e8fea19bd77a3dd073d0f436f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 15 Mar 2022 18:15:44 +0100 Subject: [PATCH 027/106] use LanguageFilter in the solution mapper --- lib/goo/sparql/solutions_mapper.rb | 47 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 386b3f18..209bf316 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -19,6 +19,7 @@ def initialize(aggregate_projections, bnode_extraction, embed_struct, @variables = variables @options = options + end def map_each_solutions(select) @@ -31,9 +32,9 @@ def map_each_solutions(select) found = Set.new objects_new = {} - var_set_hash = {} list_attributes = Set.new(klass.attributes(:list)) all_attributes = Set.new(klass.attributes(:all)) + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new select.each_solution do |sol| next if sol[:some_type] && klass.type_uri(collection) != sol[:some_type] @@ -83,11 +84,13 @@ def map_each_solutions(select) object = object_to_array(id, @klass_struct, @models_by_id, object, v) if list_attributes.include?(v) - model_map_attributes_values(id, var_set_hash, @models_by_id, object, sol, v) unless object.nil? + model_map_attributes_values(id, object, sol, v) end end + @lang_filter.fill_models_with_other_languages(@models_by_id, list_attributes) return @models_by_id if @bnode_extraction + model_set_collection_attributes(collection, klass, @models_by_id, objects_new) #remove from models_by_id elements that were not touched @@ -119,7 +122,6 @@ def model_set_unmapped(models_by_id, sol) end end - def create_struct(bnode_extraction, klass, models_by_id, sol, variables) list_attributes = Set.new(klass.attributes(:list)) struct = klass.range(bnode_extraction).new @@ -144,6 +146,7 @@ def create_class_model(id, klass, klass_struct) klass_model.klass = klass if klass_struct klass_model end + def models_unmapped_to_array(models_by_id) models_by_id.each do |idm, m| m.unmmaped_to_array @@ -215,36 +218,32 @@ def model_set_collection_attributes(collection, klass, models_by_id, objects_new def get_collection_value(collection, klass) collection_value = nil if klass.collection_opts.instance_of?(Symbol) - if collection.is_a?(Array) && (collection.length == 1) - collection_value = collection.first - end - if collection.respond_to? :id - collection_value = collection - end + collection_value = collection.first if collection.is_a?(Array) && (collection.length == 1) + collection_value = collection if collection.respond_to? :id end collection_value end - def model_map_attributes_values(id, var_set_hash, models_by_id, object, sol, v) - if models_by_id[id].respond_to?(:klass) - models_by_id[id][v] = object if models_by_id[id][v].nil? + def model_map_attributes_values(id, object, sol, v) + + if @models_by_id[id].respond_to?(:klass) + @models_by_id[id][v] = object if @models_by_id[id][v].nil? else - model_attribute_val = models_by_id[id].instance_variable_get("@#{v.to_s}") - if (!models_by_id[id].class.handler?(v) || model_attribute_val.nil?) && v != :id + model_attribute_val = @models_by_id[id].instance_variable_get("@#{v.to_s}") + if (!@models_by_id[id].class.handler?(v) || model_attribute_val.nil?) && v != :id # if multiple language values are included for a given property, set the # corresponding model attribute to the English language value - NCBO-1662 if sol[v].kind_of?(RDF::Literal) - key = "#{v}#__#{id.to_s}" - models_by_id[id].send("#{v}=", object, on_load: true) unless var_set_hash[key] - lang = sol[v].language - var_set_hash[key] = true if lang == :EN || lang == :en - else - models_by_id[id].send("#{v}=", object, on_load: true) + index, value = @lang_filter.main_lang_filter id, v, object, sol[v] + @models_by_id[id].send("#{v}=", value, on_load: true) if index.eql? :no_lang + elsif model_attribute_val.nil? + @models_by_id[id].send("#{v}=", object, on_load: true) end end end end + def object_to_array(id, klass_struct, models_by_id, object, v) pre = klass_struct ? models_by_id[id][v] : models_by_id[id].instance_variable_get("@#{v}") @@ -301,11 +300,11 @@ def get_pre_val(id, models_by_id, object, v, read_only) if models_by_id[id] && ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || models_by_id[id].loaded_attributes.include?(v)) - if !read_only - pre_val = models_by_id[id].instance_variable_get("@#{v}") + pre_val = if !read_only + models_by_id[id].instance_variable_get("@#{v}") else - pre_val = models_by_id[id][v] - end + models_by_id[id][v] + end pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) end From 55a931367c2ec1dd6c8c07e51db8d01bc8e0a298 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 15 Mar 2022 18:18:30 +0100 Subject: [PATCH 028/106] use the LanguageFilter in map_attributes --- lib/goo/base/resource.rb | 206 ++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 110 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index c12f6203..34504069 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -42,9 +42,7 @@ def valid? self.class.attributes.each do |attr| inst_value = self.instance_variable_get("@#{attr}") attr_errors = Goo::Validators::Enforce.enforce(self,attr,inst_value) - unless attr_errors.nil? - validation_errors[attr] = attr_errors - end + validation_errors[attr] = attr_errors unless attr_errors.nil? end if !@persistent && validation_errors.length == 0 @@ -70,18 +68,14 @@ def valid? end def id=(new_id) - if !@id.nil? and @persistent - raise ArgumentError, "The id of a persistent object cannot be changed." - end + raise ArgumentError, "The id of a persistent object cannot be changed." if !@id.nil? and @persistent raise ArgumentError, "ID must be an RDF::URI" unless new_id.kind_of?(RDF::URI) @id = new_id end def id if @id.nil? - if self.class.name_with == :id - raise IDGenerationError, ":id must be set if configured in name_with" - end + raise IDGenerationError, ":id must be set if configured in name_with" if self.class.name_with == :id custom_name = self.class.name_with if custom_name.instance_of?(Symbol) @id = id_from_attribute() @@ -153,9 +147,7 @@ def unmmaped_to_array def delete(*args) if self.kind_of?(Goo::Base::Enum) - unless args[0] && args[0][:init_enum] - raise ArgumentError, "Enums cannot be deleted" - end + raise ArgumentError, "Enums cannot be deleted" unless args[0] && args[0][:init_enum] end raise ArgumentError, "This object is not persistent and cannot be deleted" if !@persistent @@ -163,9 +155,7 @@ def delete(*args) if !fully_loaded? missing = missing_load_attributes options_load = { models: [ self ], klass: self.class, :include => missing } - if self.class.collection_opts - options_load[:collection] = self.collection - end + options_load[:collection] = self.collection if self.class.collection_opts Goo::SPARQL::Queries.model_load(options_load) end @@ -181,9 +171,7 @@ def delete(*args) end @persistent = false @modified = true - if self.class.inmutable? && self.class.inm_instances - self.class.load_inmutable_instances - end + self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances return nil end @@ -191,15 +179,11 @@ def bring(*opts) opts.each do |k| if k.kind_of?(Hash) k.each do |k2,v| - if self.class.handler?(k2) - raise ArgumentError, "Unable to bring a method based attr #{k2}" - end + raise ArgumentError, "Unable to bring a method based attr #{k2}" if self.class.handler?(k2) self.instance_variable_set("@#{k2}",nil) end else - if self.class.handler?(k) - raise ArgumentError, "Unable to bring a method based attr #{k}" - end + raise ArgumentError, "Unable to bring a method based attr #{k}" if self.class.handler?(k) self.instance_variable_set("@#{k}",nil) end end @@ -214,9 +198,7 @@ def bring(*opts) def graph opts = self.class.collection_opts - if opts.nil? - return self.class.uri_type - end + return self.class.uri_type if opts.nil? col = collection if col.is_a?Array if col.length == 1 @@ -228,79 +210,14 @@ def graph return col ? col.id : nil end - def self.map_attributes(inst,equivalent_predicates=nil) - if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || - (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) - raise ArgumentError, "Resource.map_attributes only works for :unmapped instances" - end - klass = inst.respond_to?(:klass) ? inst[:klass] : inst.class - unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped - list_attrs = klass.attributes(:list) - unmapped_string_keys = Hash.new - unmapped.each do |k,v| - unmapped_string_keys[k.to_s] = v - end - klass.attributes.each do |attr| - next if inst.class.collection?(attr) #collection is already there - next unless inst.respond_to?(attr) - attr_uri = klass.attribute_uri(attr,inst.collection).to_s - if unmapped_string_keys.include?(attr_uri.to_s) || - (equivalent_predicates && equivalent_predicates.include?(attr_uri)) - object = nil - if !unmapped_string_keys.include?(attr_uri) - equivalent_predicates[attr_uri].each do |eq_attr| - if object.nil? and !unmapped_string_keys[eq_attr].nil? - object = unmapped_string_keys[eq_attr].dup - else - if object.is_a?Array - if !unmapped_string_keys[eq_attr].nil? - object.concat(unmapped_string_keys[eq_attr]) - end - end - end - end - if object.nil? - inst.send("#{attr}=", - list_attrs.include?(attr) ? [] : nil, on_load: true) - next - end - else - object = unmapped_string_keys[attr_uri] - end - object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } - if klass.range(attr) - object = object.map { |o| - o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } - end - unless list_attrs.include?(attr) - object = object.first - end - if inst.respond_to?(:klass) - inst[attr] = object - else - inst.send("#{attr}=",object, on_load: true) - end - else - inst.send("#{attr}=", - list_attrs.include?(attr) ? [] : nil, on_load: true) - if inst.id.to_s == "http://purl.obolibrary.org/obo/IAO_0000415" - if attr == :definition - # binding.pry - end - end - end - end - end def collection opts = self.class.collection_opts if opts.instance_of?(Symbol) if self.class.attributes.include?(opts) value = self.send("#{opts}") - if value.nil? - raise ArgumentError, "Collection `#{opts}` is nil" - end + raise ArgumentError, "Collection `#{opts}` is nil" if value.nil? return value else raise ArgumentError, "Collection `#{opts}` is not an attribute" @@ -315,9 +232,7 @@ def add_aggregate(attribute,aggregate,value) def save(*opts) if self.kind_of?(Goo::Base::Enum) - unless opts[0] && opts[0][:init_enum] - raise ArgumentError, "Enums can only be created on initialization" - end + raise ArgumentError, "Enums can only be created on initialization" unless opts[0] && opts[0][:init_enum] end batch_file = nil if opts && opts.length > 0 @@ -327,9 +242,7 @@ def save(*opts) end if !batch_file - if not modified? - return self - end + return self if not modified? raise Goo::Base::NotValidException, "Object is not valid. Check errors." unless valid? end @@ -368,9 +281,7 @@ def save(*opts) @modified_attributes = Set.new @persistent = true - if self.class.inmutable? && self.class.inm_instances - self.class.load_inmutable_instances - end + self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances return self end @@ -408,9 +319,7 @@ def to_hash end end @unmapped.each do |attr,values| - unless all_attr_uris.include?(attr) - attr_hash[attr] = values.map { |v| v.to_s } - end + attr_hash[attr] = values.map { |v| v.to_s } unless all_attr_uris.include?(attr) end end attr_hash[:id] = @id @@ -430,13 +339,90 @@ def self.range_object(attr,id) return range_object end - def self.find(id, *options) - if !id.instance_of?(RDF::URI) && self.name_with == :id - id = RDF::URI.new(id) + + + def self.map_attributes(inst,equivalent_predicates=nil) + if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || + (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) + raise ArgumentError, "Resource.map_attributes only works for :unmapped instances" end - unless id.instance_of?(RDF::URI) - id = id_from_unique_attribute(name_with(),id) + klass = inst.respond_to?(:klass) ? inst[:klass] : inst.class + unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped + list_attrs = klass.attributes(:list) + unmapped_string_keys = Hash.new + unmapped.each do |k,v| + unmapped_string_keys[k.to_s] = v + end + klass.attributes.each do |attr| + next if inst.class.collection?(attr) #collection is already there + next unless inst.respond_to?(attr) + attr_uri = klass.attribute_uri(attr,inst.collection).to_s + if unmapped_string_keys.include?(attr_uri.to_s) || + (equivalent_predicates && equivalent_predicates.include?(attr_uri)) + object = nil + if !unmapped_string_keys.include?(attr_uri) + equivalent_predicates[attr_uri].each do |eq_attr| + if object.nil? and !unmapped_string_keys[eq_attr].nil? + object = unmapped_string_keys[eq_attr].dup + else + if object.is_a?Array + object.concat(unmapped_string_keys[eq_attr]) if !unmapped_string_keys[eq_attr].nil? + end + end + end + if object.nil? + inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) + next + end + else + object = unmapped_string_keys[attr_uri] + end + + lang_filter = Goo::SPARQL::Solution::LanguageFilter.new + + object = object.map do |o| + if o.is_a?(RDF::URI) + o + else + literal = o + index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal, literal + lang_val.to_s if index.eql? :no_lang + end + end + + object = object.compact + + other_languages_values = lang_filter.other_languages_values + other_languages_values = other_languages_values[inst.id.to_s][attr] unless other_languages_values.empty? + unless other_languages_values.nil? + object = lang_filter.languages_values_to_set(other_languages_values, object) + end + + if klass.range(attr) + object = object.map { |o| + o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } + end + object = object.first unless list_attrs.include?(attr) + if inst.respond_to?(:klass) + inst[attr] = object + else + inst.send("#{attr}=",object, on_load: true) + end + else + inst.send("#{attr}=", + list_attrs.include?(attr) ? [] : nil, on_load: true) + if inst.id.to_s == "http://purl.obolibrary.org/obo/IAO_0000415" + if attr == :definition + # binding.pry + end + end + end + end + end + def self.find(id, *options) + id = RDF::URI.new(id) if !id.instance_of?(RDF::URI) && self.name_with == :id + id = id_from_unique_attribute(name_with(),id) unless id.instance_of?(RDF::URI) if self.inmutable? && self.inm_instances && self.inm_instances[id] w = Goo::Base::Where.new(self) w.instance_variable_set("@result", [self.inm_instances[id]]) From 1108250e41ff37f65ad7659d8aa81ab251b93c8b Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 16 Mar 2022 08:45:39 +0100 Subject: [PATCH 029/106] add include variable to the solution mapper arguments --- lib/goo/sparql/loader.rb | 7 ++++--- lib/goo/sparql/solutions_mapper.rb | 14 ++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index d877d47d..a542c820 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -93,7 +93,8 @@ def self.model_load_sliced(*options) expand_equivalent_predicates(select, equivalent_predicates) solution_mapper = Goo::SPARQL::SolutionMapper.new aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - predicates_map, unmapped, variables, options + predicates_map, unmapped, variables, incl, options + solution_mapper.map_each_solutions(select) end @@ -147,10 +148,10 @@ def self.get_predicate_map(predicates) predicates_map = {} uniq_p.each do |p| i = 0 - key = ("var_" + p.last_part + i.to_s).to_sym + key = ("var_#{p.last_part}#{i.to_s}").to_sym while predicates_map.include?(key) i += 1 - key = ("var_" + p.last_part + i.to_s).to_sym + key = ("var_#{p.last_part}#{i.to_s}").to_sym break if i > 10 end predicates_map[key] = p diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 209bf316..57303f4d 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -6,7 +6,7 @@ class SolutionMapper def initialize(aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - predicates_map, unmapped, variables, options) + predicates_map, unmapped, variables, incl, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @@ -17,6 +17,7 @@ def initialize(aggregate_projections, bnode_extraction, embed_struct, @predicates_map = predicates_map @unmapped = unmapped @variables = variables + @incl = incl @options = options @@ -28,7 +29,7 @@ def map_each_solutions(select) klass = @options[:klass] read_only = @options[:read_only] collection = @options[:collection] - incl = @options[:include] + found = Set.new objects_new = {} @@ -72,7 +73,7 @@ def map_each_solutions(select) object = sol[v] || nil #bnodes - if object.kind_of?(RDF::Node) && object.anonymous? && incl.include?(v) + if object.kind_of?(RDF::Node) && object.anonymous? && @incl.include?(v) initialize_object(id, klass, object, objects_new, v) next end @@ -101,6 +102,7 @@ def map_each_solutions(select) #next level of embed attributes include_embed_attributes(collection, @incl_embed, klass, objects_new) if @incl_embed && !@incl_embed.empty? + #bnodes bnodes = objects_new.select { |id, obj| id.is_a?(RDF::Node) && id.anonymous? } include_bnodes(bnodes, collection, klass, @models_by_id) unless bnodes.empty? @@ -182,7 +184,7 @@ def include_embed_attributes(collection, incl_embed, klass, objects_new) }.values unless range_objs.empty? range_objs.uniq! - attr_range.where().models(range_objs).in(collection).include(*next_attrs).all + attr_range.where.models(range_objs).in(collection).include(*next_attrs).all end end end @@ -225,7 +227,7 @@ def get_collection_value(collection, klass) end def model_map_attributes_values(id, object, sol, v) - + #binding.pry if v.eql? :programs if @models_by_id[id].respond_to?(:klass) @models_by_id[id][v] = object if @models_by_id[id][v].nil? else @@ -236,7 +238,7 @@ def model_map_attributes_values(id, object, sol, v) if sol[v].kind_of?(RDF::Literal) index, value = @lang_filter.main_lang_filter id, v, object, sol[v] @models_by_id[id].send("#{v}=", value, on_load: true) if index.eql? :no_lang - elsif model_attribute_val.nil? + elsif model_attribute_val.nil? || !object.nil? @models_by_id[id].send("#{v}=", object, on_load: true) end end From 7c9aa7260d828e74d780d967d8947eeb49d7015b Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 16 Mar 2022 14:26:37 +0100 Subject: [PATCH 030/106] change the language filter to take only one language --- lib/goo.rb | 2 ++ lib/goo/sparql/mixins/solution_lang_filter.rb | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/goo.rb b/lib/goo.rb index fedae62e..cf7267ce 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -26,7 +26,9 @@ module Goo @@resource_options = Set.new([:persistent]).freeze # Define the languages from which the properties values will be taken + # It choose the first language that match otherwise return all the values @@main_languages = %w[en] + @@configure_flag = false @@sparql_backends = {} @@model_by_name = {} diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index af02b44a..f2bcc4ae 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -38,8 +38,12 @@ def languages_values_to_set(language_values, no_lang_values) matched_lang, not_matched_lang = matched_languages(language_values, no_lang_values) if !matched_lang.empty? main_lang = Array(matched_lang[:'0']) + Array(matched_lang[:no_lang]) - secondary_languages = matched_lang.select { |key| key != :'0' && key != :no_lang }.sort.map { |x| x[1] }.flatten - values = main_lang + secondary_languages + if main_lang.empty? + secondary_languages = matched_lang.select { |key| key != :'0' && key != :no_lang }.sort.map { |x| x[1] } + values = secondary_languages.first + else + values = main_lang + end elsif !not_matched_lang.empty? values = not_matched_lang end From ba27011fd2b093ff04d522477010d146602d0b62 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Fri, 18 Mar 2022 18:45:17 +0100 Subject: [PATCH 031/106] add the condition of nil? or empty? before adding the no_lang --- lib/goo/sparql/mixins/solution_lang_filter.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index f2bcc4ae..9f0a5568 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -78,7 +78,9 @@ def save_other_lang_val(id, attr, index, value) def matched_languages(index_values, model_attribute_val) not_matched_lang = index_values[:not_matched] matched_lang = index_values.reject { |key| key == :not_matched } - matched_lang[:no_lang] = Array(model_attribute_val) unless model_attribute_val.nil? + unless model_attribute_val.nil? || model_attribute_val.empty? + matched_lang[:no_lang] = Array(model_attribute_val) + end [matched_lang, not_matched_lang] end end From c7106bd6bb6d602b1aff30447ee34691f5f9984d Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 16 May 2022 10:54:16 +0200 Subject: [PATCH 032/106] uniquify the language values --- lib/goo/sparql/mixins/solution_lang_filter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index 9f0a5568..fb4d116b 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -47,7 +47,7 @@ def languages_values_to_set(language_values, no_lang_values) elsif !not_matched_lang.empty? values = not_matched_lang end - values + values&.uniq end private From 835aefa458186485eef60d3845315c478a95ff23 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 20 Jul 2022 15:36:05 +0200 Subject: [PATCH 033/106] re-implement the lang filter for the query builder --- Gemfile | 2 +- Gemfile.lock | 4 +- lib/goo/base/resource.rb | 2 +- lib/goo/sparql/mixins/solution_lang_filter.rb | 38 ++++++++++++------- lib/goo/sparql/solutions_mapper.rb | 34 ++++++++--------- 5 files changed, 45 insertions(+), 35 deletions(-) diff --git a/Gemfile b/Gemfile index 2ca6a3b7..30167e35 100644 --- a/Gemfile +++ b/Gemfile @@ -18,4 +18,4 @@ group :profiling do gem 'thin' end -gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' +gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' diff --git a/Gemfile.lock b/Gemfile.lock index 93e4d6e1..8480f287 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT - remote: https://github.com/ncbo/sparql-client.git - revision: fb4a89b420f8eb6dda5190a126b6c62e32c4c0c9 + remote: https://github.com/ontoportal-lirmm/sparql-client.git + revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 branch: master specs: sparql-client (1.0.1) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 34504069..13c4b61a 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -385,7 +385,7 @@ def self.map_attributes(inst,equivalent_predicates=nil) o else literal = o - index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal, literal + index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal lang_val.to_s if index.eql? :no_lang end end diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index fb4d116b..efb33943 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -7,13 +7,11 @@ def initialize @other_languages_values = {} end - def other_languages_values - @other_languages_values - end + attr_reader :other_languages_values - def main_lang_filter(id, attr, old_values, new_value) - index, value = lang_index old_values, new_value - save_other_lang_val(id, attr, index, new_value) unless index.eql? :no_lang + def main_lang_filter(id, attr, value) + index, value = lang_index value + save_other_lang_val(id, attr, index, value) unless index.nil? ||index.eql?(:no_lang) [index, value] end @@ -22,11 +20,22 @@ def fill_models_with_other_languages(models_by_id, list_attributes) languages_values.each do |attr, index_values| model_attribute_val = models_by_id[id].instance_variable_get("@#{attr.to_s}") values = languages_values_to_set(index_values, model_attribute_val) - + m = models_by_id[id] + value = nil + is_struct = m.respond_to?(:klass) if !values.nil? && list_attributes.include?(attr) - models_by_id[id].send("#{attr.to_s}=", values || [], on_load: true) + value = values || [] + elsif !values.nil? - models_by_id[id].send("#{attr.to_s}=", values.first || nil, on_load: true) + value = values.first || nil + end + + if value + if is_struct + m[attr] = value + else + m.send("#{attr}=", value, on_load: true) + end end end end @@ -52,14 +61,17 @@ def languages_values_to_set(language_values, no_lang_values) private - def lang_index(object, new_value) - lang = new_value.language + def lang_index(object) + return [nil, object] unless object.is_a?(RDF::Literal) + + lang = object.language + if lang.nil? [:no_lang, object] else index = Goo.language_includes(lang) index = index ? index.to_s.to_sym : :not_matched - [index, new_value] + [index, object] end end @@ -72,8 +84,6 @@ def save_other_lang_val(id, attr, index, value) @other_languages_values[id][attr][index] += Array(value.to_s) end end - - def matched_languages(index_values, model_attribute_val) not_matched_lang = index_values[:not_matched] diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index e990beef..585f3ca7 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -6,7 +6,7 @@ class SolutionMapper def initialize(aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables,ids, options) + properties_to_include, unmapped, variables, ids, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @@ -72,11 +72,13 @@ def map_each_solutions(select) next end - object, objects_new = get_value_object(id, objects_new, object, list_attributes, v) - add_object_to_model(id, object, v, var_set_hash) + # if multiple language values are included for a given property, set the + # corresponding model attribute to the English language value - NCBO-1662 + language, object = get_object_language(id, object, predicate) + object, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, object, predicate, language) end @lang_filter.fill_models_with_other_languages(@models_by_id, list_attributes) - init_unloaded_attributes(found, list_attributes) return @models_by_id if @bnode_extraction @@ -91,7 +93,6 @@ def map_each_solutions(select) #next level of embed attributes include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? - #bnodes blank_nodes = objects_new.select { |id, obj| id.is_a?(RDF::Node) && id.anonymous? } include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? @@ -103,6 +104,10 @@ def map_each_solutions(select) private + def get_object_language(id, object, predicate) + @lang_filter.main_lang_filter id, predicate, object + end + def init_unloaded_attributes(found, list_attributes) return if @incl.nil? @@ -167,25 +172,20 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) object.uniq! end end - [object,objects_new] + [object, objects_new] end - def add_object_to_model(id, object, predicate, var_set_hash) + def add_object_to_model(id, object, predicate, lang) if @models_by_id[id].respond_to?(:klass) @models_by_id[id][predicate] = object unless object.nil? && !@models_by_id[id][predicate].nil? elsif !@models_by_id[id].class.handler?(predicate) && - !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id - # if multiple language values are included for a given property, set the - # corresponding model attribute to the English language value - NCBO-1662 - if object.is_a?(RDF::Literal) - key = "#{predicate}#__#{id}" - @models_by_id[id].send("#{predicate}=", object, on_load: true) unless var_set_hash[key] - lang = object.language - var_set_hash[key] = true if %i[EN en].include?(lang) - else + !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id + + if (lang&.eql?(:no_lang)) || !lang @models_by_id[id].send("#{predicate}=", object, on_load: true) end + end end From d1d34dcf455491f3e6413a60e8c4edd977830cf8 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 20 Jul 2022 15:36:05 +0200 Subject: [PATCH 034/106] re-implement the lang filter for the query builder --- Gemfile | 2 +- Gemfile.lock | 4 +- lib/goo/base/resource.rb | 2 +- lib/goo/sparql/mixins/solution_lang_filter.rb | 38 ++++++++++------ lib/goo/sparql/solutions_mapper.rb | 44 +++++++++---------- 5 files changed, 50 insertions(+), 40 deletions(-) diff --git a/Gemfile b/Gemfile index 2ca6a3b7..30167e35 100644 --- a/Gemfile +++ b/Gemfile @@ -18,4 +18,4 @@ group :profiling do gem 'thin' end -gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' +gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' diff --git a/Gemfile.lock b/Gemfile.lock index 93e4d6e1..8480f287 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT - remote: https://github.com/ncbo/sparql-client.git - revision: fb4a89b420f8eb6dda5190a126b6c62e32c4c0c9 + remote: https://github.com/ontoportal-lirmm/sparql-client.git + revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 branch: master specs: sparql-client (1.0.1) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 34504069..13c4b61a 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -385,7 +385,7 @@ def self.map_attributes(inst,equivalent_predicates=nil) o else literal = o - index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal, literal + index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal lang_val.to_s if index.eql? :no_lang end end diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index fb4d116b..efb33943 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -7,13 +7,11 @@ def initialize @other_languages_values = {} end - def other_languages_values - @other_languages_values - end + attr_reader :other_languages_values - def main_lang_filter(id, attr, old_values, new_value) - index, value = lang_index old_values, new_value - save_other_lang_val(id, attr, index, new_value) unless index.eql? :no_lang + def main_lang_filter(id, attr, value) + index, value = lang_index value + save_other_lang_val(id, attr, index, value) unless index.nil? ||index.eql?(:no_lang) [index, value] end @@ -22,11 +20,22 @@ def fill_models_with_other_languages(models_by_id, list_attributes) languages_values.each do |attr, index_values| model_attribute_val = models_by_id[id].instance_variable_get("@#{attr.to_s}") values = languages_values_to_set(index_values, model_attribute_val) - + m = models_by_id[id] + value = nil + is_struct = m.respond_to?(:klass) if !values.nil? && list_attributes.include?(attr) - models_by_id[id].send("#{attr.to_s}=", values || [], on_load: true) + value = values || [] + elsif !values.nil? - models_by_id[id].send("#{attr.to_s}=", values.first || nil, on_load: true) + value = values.first || nil + end + + if value + if is_struct + m[attr] = value + else + m.send("#{attr}=", value, on_load: true) + end end end end @@ -52,14 +61,17 @@ def languages_values_to_set(language_values, no_lang_values) private - def lang_index(object, new_value) - lang = new_value.language + def lang_index(object) + return [nil, object] unless object.is_a?(RDF::Literal) + + lang = object.language + if lang.nil? [:no_lang, object] else index = Goo.language_includes(lang) index = index ? index.to_s.to_sym : :not_matched - [index, new_value] + [index, object] end end @@ -72,8 +84,6 @@ def save_other_lang_val(id, attr, index, value) @other_languages_values[id][attr][index] += Array(value.to_s) end end - - def matched_languages(index_values, model_attribute_val) not_matched_lang = index_values[:not_matched] diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 9dfc6ba4..86a23b58 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -6,7 +6,7 @@ class SolutionMapper def initialize(aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables,ids, options) + properties_to_include, unmapped, variables, ids, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @@ -35,7 +35,7 @@ def map_each_solutions(select) var_set_hash = {} list_attributes = Set.new(@klass.attributes(:list)) all_attributes = Set.new(@klass.attributes(:all)) - + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new select.each_solution do |sol| next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] @@ -61,23 +61,25 @@ def map_each_solutions(select) next end - v = sol[:attributeProperty].to_s.to_sym + predicate = sol[:attributeProperty].to_s.to_sym - next if v.nil? || !all_attributes.include?(v) + next if predicate.nil? || !all_attributes.include?(predicate) object = sol[:attributeObject] #bnodes - if bnode_id?(object, v) - objects_new = bnode_id_tuple(id, object, objects_new, v) + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) next end - object, objects_new = get_value_object(id, objects_new, object, list_attributes, v) - add_object_to_model(id, object, v, var_set_hash) + # if multiple language values are included for a given property, set the + # corresponding model attribute to the English language value - NCBO-1662 + language, object = get_object_language(id, object, predicate) + object, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, object, predicate, language) end @lang_filter.fill_models_with_other_languages(@models_by_id, list_attributes) - init_unloaded_attributes(found, list_attributes) return @models_by_id if @bnode_extraction @@ -92,7 +94,6 @@ def map_each_solutions(select) #next level of embed attributes include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? - #bnodes blank_nodes = objects_new.select { |id, obj| id.is_a?(RDF::Node) && id.anonymous? } include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? @@ -104,6 +105,10 @@ def map_each_solutions(select) private + def get_object_language(id, object, predicate) + @lang_filter.main_lang_filter id, predicate, object + end + def init_unloaded_attributes(found, list_attributes) return if @incl.nil? @@ -168,25 +173,20 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) object.uniq! end end - [object,objects_new] + [object, objects_new] end - def add_object_to_model(id, object, predicate, var_set_hash) + def add_object_to_model(id, object, predicate, lang) if @models_by_id[id].respond_to?(:klass) @models_by_id[id][predicate] = object unless object.nil? && !@models_by_id[id][predicate].nil? elsif !@models_by_id[id].class.handler?(predicate) && - !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id - # if multiple language values are included for a given property, set the - # corresponding model attribute to the English language value - NCBO-1662 - if object.is_a?(RDF::Literal) - key = "#{predicate}#__#{id}" - @models_by_id[id].send("#{predicate}=", object, on_load: true) unless var_set_hash[key] - lang = object.language - var_set_hash[key] = true if %i[EN en].include?(lang) - else + !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id + + if (lang&.eql?(:no_lang)) || !lang @models_by_id[id].send("#{predicate}=", object, on_load: true) end + end end From 8c84c9ed0a050c592093853d0739e7c4f14f25df Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 23 Nov 2022 09:34:58 +0100 Subject: [PATCH 035/106] force attribute values with lang to be string --- lib/goo/sparql/mixins/solution_lang_filter.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index fb4d116b..6c0205f0 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -78,8 +78,8 @@ def save_other_lang_val(id, attr, index, value) def matched_languages(index_values, model_attribute_val) not_matched_lang = index_values[:not_matched] matched_lang = index_values.reject { |key| key == :not_matched } - unless model_attribute_val.nil? || model_attribute_val.empty? - matched_lang[:no_lang] = Array(model_attribute_val) + unless model_attribute_val.nil? || model_attribute_val.to_s.empty? + matched_lang[:no_lang] = Array(model_attribute_val.to_s) end [matched_lang, not_matched_lang] end From 34a9fdfed3b79e5b796af154ff7407bf63d3411f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 23 Nov 2022 09:34:58 +0100 Subject: [PATCH 036/106] force attribute values with lang to be Array --- lib/goo/sparql/mixins/solution_lang_filter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index fb4d116b..4fb12292 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -78,7 +78,7 @@ def save_other_lang_val(id, attr, index, value) def matched_languages(index_values, model_attribute_val) not_matched_lang = index_values[:not_matched] matched_lang = index_values.reject { |key| key == :not_matched } - unless model_attribute_val.nil? || model_attribute_val.empty? + unless model_attribute_val.nil? || Array(model_attribute_val).empty? matched_lang[:no_lang] = Array(model_attribute_val) end [matched_lang, not_matched_lang] From ba4f9101985d92fc0b316121744801fcbad40b15 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 17 Dec 2022 14:57:57 +0100 Subject: [PATCH 037/106] merge partial fix to ncbo/bioportal-project#251 --- lib/goo/sparql/query_builder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index a3c35a43..274cd647 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -147,7 +147,7 @@ def ids_filter(ids) filter_id = [] ids.each do |id| - filter_id << "?id = #{id.to_ntriples.to_s}" + filter_id << "?id = #{id.to_ntriples.to_s.gsub(' ', '%20')}" end filter_id_str = filter_id.join ' || ' @query.filter filter_id_str From 3ddcf11fa852011ae779bb7220d1eb8762287c45 Mon Sep 17 00:00:00 2001 From: Timothy Redmond Date: Fri, 4 Nov 2022 14:28:20 -0700 Subject: [PATCH 038/106] Revert "A small (2 line) fix for 3 or 12 (depending on how it is counted) tests + removal at least one error." This reverts commit 0e09816b121750b3bb875a5c24cb79865287fcf4. --- lib/goo/validators/enforce.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index 2e1b9e56..d326839b 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -56,8 +56,6 @@ def self.enforce(inst,attr,value) errors_by_opt = {} enforce_opts.each do |opt| case opt - when :class - nil when :unique unless value.nil? dup = Goo::SPARQL::Queries.duplicate_attribute_value?(inst,attr) From 86553d235c89114a48c5a58832f67664e14c8158 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 17 Dec 2022 16:01:14 +0100 Subject: [PATCH 039/106] fix test_model_complex.rb tests with a uniq and not reused model name --- test/test_model_complex.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index e2237c1c..8f904d8b 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -10,7 +10,7 @@ class Submission < Goo::Base::Resource end class Term < Goo::Base::Resource - model :class, + model :term, namespace: :owl, collection: :submission, name_with: :id, @@ -25,22 +25,22 @@ class Term < Goo::Base::Resource attribute :parents, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - enforce: [:list, :class] + enforce: [:list, :term] attribute :ancestors, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - enforce: [:list, :class], transitive: true + enforce: [:list, :term], transitive: true attribute :children, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - inverse: { on: :class , attribute: :parents } + inverse: { on: :term , attribute: :parents } attribute :descendants, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - inverse: { on: :class , attribute: :parents }, + inverse: { on: :term , attribute: :parents }, transitive: true def self.tree_property(*args) From 8e0e46d7785a0c58f92edc9bb81e06c592d8b745 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 17 Dec 2022 17:13:33 +0100 Subject: [PATCH 040/106] fix test_embed_struct in test_read_only.rb --- test/test_read_only.rb | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/test_read_only.rb b/test/test_read_only.rb index 4496c463..268f7c86 100644 --- a/test/test_read_only.rb +++ b/test/test_read_only.rb @@ -39,11 +39,17 @@ def test_struct_find end def test_embed_struct - skip "not yet" + students = Student.where(enrolled: [university: [name: "Stanford"]]) .include(:name) - .include(enrolled: [:name, university: [ :address ]]) + .include(enrolled: [:name, university: [ :address, :name ]]) .read_only.all + + assert_equal 3, students.size + students.each do |st| + assert st.enrolled.any? {|e| e.is_a?(Struct) && e.university.name.eql?('Stanford')} + end + end end end From 361940a81fb4ca1bc9f85c2ee12c6d375dfc53b6 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 17 Dec 2022 17:15:55 +0100 Subject: [PATCH 041/106] fix test_reentrant_queries by ensuring the write thread is still alive --- test/test_chunks_write.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index a7988d40..4f0a8676 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -71,7 +71,6 @@ def test_put_delete_data end def test_reentrant_queries - skip "TODO: why does this test fail?" ntriples_file_path = "./test/data/nemo_ontology.ntriples" # Bypass in chunks @@ -87,8 +86,8 @@ def test_reentrant_queries tput = Thread.new { Goo.sparql_data_client.put_triples(ONT_ID_EXTRA, ntriples_file_path, mime_type="application/x-turtle") + sleep(1.5) } - sleep(1.5) count_queries = 0 tq = Thread.new { 5.times do @@ -112,8 +111,8 @@ def test_reentrant_queries tdelete = Thread.new { Goo.sparql_data_client.delete_graph(ONT_ID_EXTRA) + sleep(1.5) } - sleep(1.5) count_queries = 0 tq = Thread.new { 5.times do From a30af42241aa4d9f1ac237ea88942db3cad0765f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 17 Dec 2022 17:16:29 +0100 Subject: [PATCH 042/106] update test_inverse_on_collection test --- test/test_collections.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/test_collections.rb b/test/test_collections.rb index 390ad349..2177c669 100644 --- a/test/test_collections.rb +++ b/test/test_collections.rb @@ -91,18 +91,18 @@ def test_unique_per_collection def test_inverse_on_collection skip "Not supported inverse on collection" - john = User.find("John").include(:name).first || - User.new(name: "John").save() + john = User.find("John").include(:name).first || User.new(name: "John").save + 5.times do |i| - Issue.new(description: "issue_#{i}", owner: john).save + Issue.find("issue_#{i}").in(john) || Issue.new(description: "issue_#{i}", owner: john).save end - - binding.pry - User.find("John",include: [:issues]).first.issues - User.find("John",include: [issues: [:desciption]]).first.issues - 5.times do |i| - Issue.find("issue_#{i}", collection: john).delete + issues = User.find("John").include(:issues).first.issues + assert_equal 5, issues.size + + issues.each do |issue| + assert_equal "issue_#{i}", issue.description + assert_equal john, issue.collection end end From 4c87b64dfa6ad51825da522b0a07fc1abcac76e0 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 Jan 2023 10:06:47 +0100 Subject: [PATCH 043/106] add REGEX filter unit test --- test/test_where.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_where.rb b/test/test_where.rb index 1f5f9634..bca4b2ea 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -499,6 +499,12 @@ def test_filter f = Goo::Filter.new(enrolled: [ :xxx ]).unbound st = Student.where.filter(f).all assert st.length == 7 + + f = Goo::Filter.new(:name).regex("n") # will find all students that contains "n" in there name + st = Student.where.filter(f).include(:name).all # return "John" , "Daniel" and "Susan" + + assert_equal 3, st.length + assert_equal ["John","Daniel","Susan"].sort, st.map { |x| x.name }.sort end def test_aggregated From 03d8cccb5a86cc46328715a7ca2e58b44646d2c1 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 Jan 2023 10:07:38 +0100 Subject: [PATCH 044/106] update query_filter_sparql to handle REGEX --- lib/goo/sparql/queries.rb | 56 +-------------------------------- lib/goo/sparql/query_builder.rb | 36 ++++++++++----------- 2 files changed, 17 insertions(+), 75 deletions(-) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index fb8b1ec5..54189b08 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -50,61 +50,7 @@ def self.model_exist(model,id=nil,store=:main) return so.true? end - def self.query_filter_sparql(klass,filter,filter_patterns,filter_graphs, - filter_operations, - internal_variables, - inspected_patterns, - collection) - #create a object variable to project the value in the filter - filter.filter_tree.each do |filter_operation| - filter_pattern_match = {} - if filter.pattern.instance_of?(Symbol) - filter_pattern_match[filter.pattern] = [] - else - filter_pattern_match = filter.pattern - end - unless inspected_patterns.include?(filter_pattern_match) - attr = filter_pattern_match.keys.first - patterns_for_match(klass, attr, filter_pattern_match[attr], - filter_graphs, filter_patterns, - [],internal_variables, - subject=:id,in_union=false,in_aggregate=false, - collection=collection) - inspected_patterns[filter_pattern_match] = internal_variables.last - end - filter_var = inspected_patterns[filter_pattern_match] - if !filter_operation.value.instance_of?(Goo::Filter) - case filter_operation.operator - when :unbound - filter_operations << "!BOUND(?#{filter_var.to_s})" - return :optional - - when :bound - filter_operations << "BOUND(?#{filter_var.to_s})" - return :optional - when :regex - if filter_operation.value.is_a?(String) - filter_operations << "REGEX(?#{filter_var.to_s} , \"#{filter_operation.value.to_s}\")" - end - - else - value = RDF::Literal.new(filter_operation.value) - if filter_operation.value.is_a? String - value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) - end - filter_operations << ( - "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + - " #{value.to_ntriples}") - end - - else - filter_operations << "#{sparql_op_string(filter_operation.operator)}" - query_filter_sparql(klass,filter_operation.value,filter_patterns, - filter_graphs,filter_operations, - internal_variables,inspected_patterns,collection) - end - end - end + def self.model_load(*options) Goo::SPARQL::Loader.model_load(*options) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 274cd647..965277fc 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -310,28 +310,24 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, end filter_var = inspected_patterns[filter_pattern_match] - if !filter_operation.value.instance_of?(Goo::Filter) - if filter_operation.operator == :unbound || filter_operation.operator == :bound - if filter_operation.operator == :unbound - filter_operations << "!BOUND(?#{filter_var.to_s})" - else - filter_operations << "BOUND(?#{filter_var.to_s})" - end + unless filter_operation.value.instance_of?(Goo::Filter) + case filter_operation.operator + when :unbound + filter_operations << "!BOUND(?#{filter_var.to_s})" return :optional - else - value = RDF::Literal.new(filter_operation.value) - if filter_operation.value.is_a? String - value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) + + when :bound + filter_operations << "BOUND(?#{filter_var.to_s})" + return :optional + when :regex + if filter_operation.value.is_a?(String) + filter_operations << "REGEX(STR(?#{filter_var.to_s}) , \"#{filter_operation.value.to_s}\")" end - filter_operations << ( - "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + - " #{value.to_ntriples}") - end - else - filter_operations << "#{sparql_op_string(filter_operation.operator)}" - query_filter_sparql(klass, filter_operation.value, filter_patterns, - filter_graphs, filter_operations, - internal_variables, inspected_patterns, collection) + else + filter_operations << "#{sparql_op_string(filter_operation.operator)}" + query_filter_sparql(klass, filter_operation.value, filter_patterns, + filter_graphs, filter_operations, + internal_variables, inspected_patterns, collection) end end end From cc0c27eb6428a6fcd5b2b159735a45d43ecac59a Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 28 Apr 2023 09:41:47 +0200 Subject: [PATCH 045/106] Merge pull request #33 from ontoportal-lirmm/feature/make-regex-filter-no-case-sensitive Feature: Make the regex filter no-case sensitive --- lib/goo/sparql/query_builder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index a6e4f634..3cca37c4 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -321,7 +321,7 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, return :optional when :regex if filter_operation.value.is_a?(String) - filter_operations << "REGEX(STR(?#{filter_var.to_s}) , \"#{filter_operation.value.to_s}\")" + filter_operations << "REGEX(STR(?#{filter_var.to_s}) , \"#{filter_operation.value.to_s}\", \"i\")" end else From 9753f042d69191b2e44eb8761318839dccd4c952 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 28 Apr 2023 09:38:35 +0200 Subject: [PATCH 046/106] Merge pull request #34 from ontoportal-lirmm/fix/filters-with-pagination-empty Fix: Add filters patterns to select variables to resolve empty pagination --- lib/goo/sparql/query_builder.rb | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 3cca37c4..5227dac2 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -33,13 +33,12 @@ def build_select_query(ids, variables, graphs, patterns, @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables) variables, patterns = add_some_type_to_id(patterns, query_options, variables) - query_filter_str, patterns, optional_patterns = + query_filter_str, patterns, optional_patterns, filter_variables = filter_query_strings(@collection, graphs, internal_variables, @klass, optional_patterns, patterns, @query_filters) - variables = [] if @count variables.delete :some_type - select_distinct(variables, aggregate_projections) + select_distinct(variables, aggregate_projections, filter_variables) .from(graphs) .where(patterns) .union_bind_in_where(properties_to_include) @@ -135,10 +134,10 @@ def from(graphs) self end - def select_distinct(variables, aggregate_projections) - + def select_distinct(variables, aggregate_projections, filter_variables) select_vars = variables.dup reject_aggregations_from_vars(select_vars, aggregate_projections) if aggregate_projections + select_vars = (select_vars + filter_variables).uniq if @page # Fix for 4store pagination with a filter @query = @query.select(*select_vars).distinct(true) self end @@ -347,8 +346,8 @@ def filter_query_strings(collection, graphs, internal_variables, klass, optional_patterns, patterns, query_filters) query_filter_str = [] - filter_graphs = [] + filter_variables = [] inspected_patterns = {} query_filters&.each do |query_filter| filter_operations = [] @@ -365,9 +364,9 @@ def filter_query_strings(collection, graphs, internal_variables, klass, patterns.concat(filter_patterns) end end + filter_variables << inspected_patterns.values.last end - - [query_filter_str, patterns, optional_patterns, internal_variables] + [query_filter_str, patterns, optional_patterns, filter_variables] end def reject_aggregations_from_vars(variables, aggregate_projections) From a82e6f54014765426b22db2a6d5f5ea8ba04ddbe Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 1 May 2023 19:44:20 +0200 Subject: [PATCH 047/106] Merge pull request #36 from ontoportal-lirmm/feature/add-complex-oder-by Feature: Add complex oder_by for joined attributes --- lib/goo/sparql/query_builder.rb | 53 ++++++++++++++++++++++----------- test/test_where.rb | 11 +++++++ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 5227dac2..40e888d0 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -17,24 +17,25 @@ def initialize(options) @model_query_options = options[:query_options] @enable_rules = options[:rules] @order_by = options[:order_by] - + @internal_variables_map = {} @query = get_client end def build_select_query(ids, variables, graphs, patterns, query_options, properties_to_include) - internal_variables = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) + patterns = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) aggregate_projections, aggregate_vars, variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, - @klass, @unions, variables, internal_variables) + @klass, @unions, variables) - @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables) + @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) variables, patterns = add_some_type_to_id(patterns, query_options, variables) query_filter_str, patterns, optional_patterns, filter_variables = - filter_query_strings(@collection, graphs, internal_variables, @klass, optional_patterns, patterns, @query_filters) + filter_query_strings(@collection, graphs, @klass, optional_patterns, patterns, @query_filters) + variables = [] if @count variables.delete :some_type @@ -54,7 +55,7 @@ def build_select_query(ids, variables, graphs, patterns, @query.union(*@unions) unless @unions.empty? ids_filter(ids) if ids - order_by if @order_by # TODO test if work + order_by if @order_by put_query_aggregate_vars(aggregate_vars) if aggregate_vars count if @count @@ -117,7 +118,13 @@ def put_query_aggregate_vars(aggregate_vars) end def order_by - order_by_str = @order_by.map { |attr, order| "#{order.to_s.upcase}(?#{attr})" } + order_by_str = @order_by.map do |attr, order| + if order.is_a?(Hash) + sub_attr, order = order.first + attr = @internal_variables_map[sub_attr] + end + "#{order.to_s.upcase}(?#{attr})" + end @query.order_by(*order_by_str) self end @@ -169,6 +176,7 @@ def patterns_for_match(klass, attr, value, graphs, patterns, unions, value = "#{attr}_agg_#{in_aggregate}".to_sym end internal_variables << value + @internal_variables_map[attr] = value end add_rules(attr, klass, query_options) @@ -209,7 +217,7 @@ def walk_pattern(klass, match_patterns, graphs, patterns, unions, end end - def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables, internal_variables) + def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables) # mdorf, 6/03/20 If aggregate projections (sub-SELECT within main SELECT) use an alias, that alias cannot appear in the main SELECT # https://github.com/ncbo/goo/issues/106 # See last sentence in https://www.w3.org/TR/sparql11-query/#aggregateExample @@ -240,8 +248,6 @@ def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables, end def graph_match(collection, graph_match, graphs, klass, patterns, query_options, unions) - internal_variables = [] - if graph_match #make it deterministic - for caching graph_match_iteration = Goo::Base::PatternIteration.new(graph_match) @@ -249,28 +255,38 @@ def graph_match(collection, graph_match, graphs, klass, patterns, query_options, internal_variables, in_aggregate = false, query_options, collection) graphs.uniq! end - internal_variables + patterns end def get_client Goo.sparql_query_client(@store) end - def init_order_by(count, klass, order_by, optional_patterns, variables) + def init_order_by(count, klass, order_by, optional_patterns, variables, patterns, query_options, graphs) order_by = nil if count if order_by order_by = order_by.first #simple ordering ... needs to use pattern inspection order_by.each do |attr, direction| - quad = query_pattern(klass, attr) - optional_patterns << quad[1] + + if direction.is_a?(Hash) + sub_attr, direction = direction.first + graph_match_iteration = Goo::Base::PatternIteration.new(Goo::Base::Pattern.new({attr => [sub_attr]})) + old_internal = internal_variables.dup + walk_pattern(klass, graph_match_iteration, graphs, optional_patterns, @unions, internal_variables, in_aggregate = false, query_options, @collection) + variables << (internal_variables - old_internal).last + else + quad = query_pattern(klass, attr) + optional_patterns << quad[1] + variables << attr + end + #patterns << quad[1] #mdorf, 9/22/16 If an ORDER BY clause exists, the columns used in the ORDER BY should be present in the SPARQL select #variables << attr unless variables.include?(attr) end - variables = %i[id attributeProperty attributeObject] end - [order_by, variables, optional_patterns] + [order_by, variables, optional_patterns, patterns] end def sparql_op_string(op) @@ -342,7 +358,7 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, end end - def filter_query_strings(collection, graphs, internal_variables, klass, + def filter_query_strings(collection, graphs, klass, optional_patterns, patterns, query_filters) query_filter_str = [] @@ -382,6 +398,9 @@ def add_some_type_to_id(patterns, query_options, variables) [variables, patterns] end + def internal_variables + @internal_variables_map.values + end end end end diff --git a/test/test_where.rb b/test/test_where.rb index bca4b2ea..30d933e3 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -600,4 +600,15 @@ def test_include_inverse_with_find end end + def test_complex_order_by + u = University.where.include(address: [:country]).order_by(address: {country: :asc}).all + countries = u.map {|x| x.address.map{|a| a.country}}.flatten + assert_equal countries.sort, countries + + + u = University.where.include(address: [:country]).order_by(address: {country: :desc}).all + countries = u.map {|x| x.address.map{|a| a.country}}.flatten + assert_equal countries.sort{|a,b| b<=>a }, countries + end + end From 04419a8ac57a79ea5a6dd4918f5b0c677252ee7e Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 4 Dec 2023 11:25:39 +0100 Subject: [PATCH 048/106] Merge to master: Release 2.3.3 - Multilingual (#44) * Merge pull request #24 from ontoportal-lirmm/feature/support-multilingual-read-one-language-from-request-parameter Feature : Support multilingual - Phase 1 - Read one language * Merge branch pull request #32 from feature/language-return-all * group unmapped properties by lang * Feature: group unmapped properties by language (#38) * group unmapped properties by lang * downcase language keys of unmapped properties --------- Co-authored-by: Syphax bouazzouni * Feature: Support multi lingual - add show_language argument to the attributes getters (#39) * update define_method * update solution mapper * update get_preload_value * Merge pull request #40 from ontoportal-lirmm/support-muli-lang-part-02 Fix: save_model_values if unmmaped condition * Merge pull request #41 from ontoportal-lirmm/support-muli-lang-part-02 Feature: Refactor and fix the Language filter method * Merge pull request #42 from ontoportal-lirmm/feature/support-multi-langual-search Feature: Make map_attribute support showing all languages - Support multi language search --------- Co-authored-by: HADDAD Zineddine --- Gemfile | 1 + Gemfile.lock | 15 +- lib/goo.rb | 9 + lib/goo/base/resource.rb | 50 +- lib/goo/base/settings/settings.rb | 35 +- lib/goo/sparql/loader.rb | 6 + lib/goo/sparql/mixins/solution_lang_filter.rb | 275 ++++-- lib/goo/sparql/solutions_mapper.rb | 900 +++++++++--------- 8 files changed, 695 insertions(+), 596 deletions(-) diff --git a/Gemfile b/Gemfile index edb00975..3564fe3b 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,7 @@ gem "cube-ruby", require: "cube" gem "faraday", '~> 1.9' gem "rake" gem "uuid" +gem "request_store" group :test do gem "minitest", '< 5.0' diff --git a/Gemfile.lock b/Gemfile.lock index 34a6c39c..9fe7bd02 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -34,7 +34,7 @@ GEM public_suffix (>= 2.0.2, < 6.0) builder (3.2.4) coderay (1.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.2.2) connection_pool (2.3.0) cube-ruby (0.0.3) daemons (1.4.1) @@ -76,10 +76,10 @@ GEM method_source (1.0.0) mime-types (3.4.1) mime-types-data (~> 3.2015) - mime-types-data (3.2022.0105) + mime-types-data (3.2023.0218.1) minitest (4.7.5) multi_json (1.15.0) - multipart-post (2.2.3) + multipart-post (2.3.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) net-http-persistent (2.9.4) @@ -88,7 +88,7 @@ GEM coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.0.1) - rack (2.2.6.2) + rack (2.2.6.3) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) @@ -100,8 +100,10 @@ GEM addressable (>= 2.2) redis (5.0.6) redis-client (>= 0.9.0) - redis-client (0.12.1) + redis-client (0.13.0) connection_pool + request_store (1.5.1) + rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) @@ -132,7 +134,7 @@ GEM eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thread_safe (0.3.6) - tilt (2.0.11) + tilt (2.1.0) tzinfo (0.3.61) unf (0.1.4) unf_ext @@ -155,6 +157,7 @@ DEPENDENCIES rack-accept rack-post-body-to-params rake + request_store simplecov simplecov-cobertura sinatra diff --git a/lib/goo.rb b/lib/goo.rb index bb81541d..db863d2a 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -28,6 +28,7 @@ module Goo # Define the languages from which the properties values will be taken # It choose the first language that match otherwise return all the values @@main_languages = %w[en] + @@requested_language = nil @@configure_flag = false @@sparql_backends = {} @@ -54,6 +55,14 @@ def self.main_languages=(lang) @@main_languages = lang end + def self.requested_language + @@requested_language + end + + def self.requested_language=(lang) + @@requested_language = lang + end + def self.language_includes(lang) lang_str = lang.to_s main_languages.index { |l| lang_str.downcase.eql?(l) || lang_str.upcase.eql?(l)} diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 13c4b61a..88bbc8ce 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -15,7 +15,7 @@ class Resource attr_reader :modified_attributes attr_reader :errors attr_reader :aggregates - attr_reader :unmapped + attr_writer :unmapped attr_reader :id @@ -134,17 +134,29 @@ def missing_load_attributes def unmapped_set(attribute,value) @unmapped ||= {} - (@unmapped[attribute] ||= Set.new) << value + @unmapped[attribute] ||= Set.new + @unmapped[attribute].merge(Array(value)) unless value.nil? + end + + def unmapped_get(attribute) + @unmapped[attribute] end def unmmaped_to_array cpy = {} + @unmapped.each do |attr,v| cpy[attr] = v.to_a end @unmapped = cpy end + def unmapped(*args) + @unmapped.transform_values do |language_values| + self.class.not_show_all_languages?(language_values, args) ? language_values.values.flatten: language_values + end + end + def delete(*args) if self.kind_of?(Goo::Base::Enum) raise ArgumentError, "Enums cannot be deleted" unless args[0] && args[0][:init_enum] @@ -341,13 +353,13 @@ def self.range_object(attr,id) - def self.map_attributes(inst,equivalent_predicates=nil) + def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false) if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) raise ArgumentError, "Resource.map_attributes only works for :unmapped instances" end klass = inst.respond_to?(:klass) ? inst[:klass] : inst.class - unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped + unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped(include_languages: include_languages) list_attrs = klass.attributes(:list) unmapped_string_keys = Hash.new unmapped.each do |k,v| @@ -378,31 +390,18 @@ def self.map_attributes(inst,equivalent_predicates=nil) object = unmapped_string_keys[attr_uri] end - lang_filter = Goo::SPARQL::Solution::LanguageFilter.new - - object = object.map do |o| - if o.is_a?(RDF::URI) - o - else - literal = o - index, lang_val = lang_filter.main_lang_filter inst.id.to_s, attr, literal - lang_val.to_s if index.eql? :no_lang - end - end - - object = object.compact - - other_languages_values = lang_filter.other_languages_values - other_languages_values = other_languages_values[inst.id.to_s][attr] unless other_languages_values.empty? - unless other_languages_values.nil? - object = lang_filter.languages_values_to_set(other_languages_values, object) + if object.is_a?(Hash) + object = object.transform_values{|values| Array(values).map{|o|o.is_a?(RDF::URI) ? o : o.object}} + else + object = object.map {|o| o.is_a?(RDF::URI) ? o : o.object} end if klass.range(attr) object = object.map { |o| o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } end - object = object.first unless list_attrs.include?(attr) + + object = object.first unless list_attrs.include?(attr) || include_languages if inst.respond_to?(:klass) inst[attr] = object else @@ -411,11 +410,6 @@ def self.map_attributes(inst,equivalent_predicates=nil) else inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) - if inst.id.to_s == "http://purl.obolibrary.org/obo/IAO_0000415" - if attr == :definition - # binding.pry - end - end end end diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index ce3e9a21..a58daae0 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -255,9 +255,18 @@ def shape_attribute(attr) self.instance_variable_set("@#{attr}",value) end define_method("#{attr}") do |*args| + attr_value = self.instance_variable_get("@#{attr}") + + if self.class.not_show_all_languages?(attr_value, args) + is_array = attr_value.values.first.is_a?(Array) + attr_value = attr_value.values.flatten + attr_value = attr_value.first unless is_array + end + + if self.class.handler?(attr) if @loaded_attributes.include?(attr) - return self.instance_variable_get("@#{attr}") + return attr_value end value = self.send("#{self.class.handler(attr)}") self.instance_variable_set("@#{attr}",value) @@ -266,7 +275,7 @@ def shape_attribute(attr) end if (not @persistent) or @loaded_attributes.include?(attr) - return self.instance_variable_get("@#{attr}") + return attr_value else # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." @@ -372,6 +381,28 @@ def read_only(attributes) instance end + def show_all_languages?(args) + args.first.is_a?(Hash) && args.first.keys.include?(:include_languages) && args.first[:include_languages] + end + + def not_show_all_languages?(values, args) + values.is_a?(Hash) && !show_all_languages?(args) + end + + private + + def set_no_list_by_default(options) + if options[:enforce].nil? or !options[:enforce].include?(:list) + options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] + end + end + def set_data_type(options) + if options[:type] + options[:enforce] += Array(options[:type]) + options[:enforce].uniq! + options.delete :type + end + end end end end diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index 821aba26..094fbba2 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -1,3 +1,4 @@ +require 'request_store' module Goo module SPARQL module Loader @@ -6,8 +7,10 @@ class << self def model_load(*options) options = options.last + set_request_lang(options) if options[:models] && options[:models].is_a?(Array) && \ (options[:models].length > Goo.slice_loading_size) + options = options.dup models = options[:models] include_options = options[:include] @@ -96,6 +99,9 @@ def model_load_sliced(*options) private + def set_request_lang(options) + options[:requested_lang] = RequestStore.store[:requested_lang] + end def expand_equivalent_predicates(properties_to_include, eq_p) return unless eq_p && !eq_p.empty? diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index b5254786..8980dcdc 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -1,99 +1,176 @@ -module Goo - module SPARQL - module Solution - class LanguageFilter - - def initialize - @other_languages_values = {} - end - - attr_reader :other_languages_values - - def main_lang_filter(id, attr, value) - index, value = lang_index value - save_other_lang_val(id, attr, index, value) unless index.nil? ||index.eql?(:no_lang) - [index, value] - end - - def fill_models_with_other_languages(models_by_id, list_attributes) - @other_languages_values.each do |id, languages_values| - languages_values.each do |attr, index_values| - model_attribute_val = models_by_id[id].instance_variable_get("@#{attr.to_s}") - values = languages_values_to_set(index_values, model_attribute_val) - m = models_by_id[id] - value = nil - is_struct = m.respond_to?(:klass) - if !values.nil? && list_attributes.include?(attr) - value = values || [] - - elsif !values.nil? - value = values.first || nil - end - - if value - if is_struct - m[attr] = value - else - m.send("#{attr}=", value, on_load: true) - end - end - end - end - end - - def languages_values_to_set(language_values, no_lang_values) - - values = nil - matched_lang, not_matched_lang = matched_languages(language_values, no_lang_values) - if !matched_lang.empty? - main_lang = Array(matched_lang[:'0']) + Array(matched_lang[:no_lang]) - if main_lang.empty? - secondary_languages = matched_lang.select { |key| key != :'0' && key != :no_lang }.sort.map { |x| x[1] } - values = secondary_languages.first - else - values = main_lang - end - elsif !not_matched_lang.empty? - values = not_matched_lang - end - values&.uniq - end - - private - - def lang_index(object) - return [nil, object] unless object.is_a?(RDF::Literal) - - lang = object.language - - if lang.nil? - [:no_lang, object] - else - index = Goo.language_includes(lang) - index = index ? index.to_s.to_sym : :not_matched - [index, object] - end - end - - def save_other_lang_val(id, attr, index, value) - @other_languages_values[id] ||= {} - @other_languages_values[id][attr] ||= {} - @other_languages_values[id][attr][index] ||= [] - - unless @other_languages_values[id][attr][index].include?(value.to_s) - @other_languages_values[id][attr][index] += Array(value.to_s) - end - end - - def matched_languages(index_values, model_attribute_val) - not_matched_lang = index_values[:not_matched] - matched_lang = index_values.reject { |key| key == :not_matched } - unless model_attribute_val.nil? || Array(model_attribute_val).empty? - matched_lang[:no_lang] = Array(model_attribute_val) - end - [matched_lang, not_matched_lang] - end - end - end - end -end +module Goo + module SPARQL + module Solution + class LanguageFilter + + attr_reader :requested_lang, :unmapped, :objects_by_lang + + def initialize(requested_lang: RequestStore.store[:requested_lang], unmapped: false, list_attributes: []) + @list_attributes = list_attributes + @objects_by_lang = {} + @unmapped = unmapped + @requested_lang = get_language(requested_lang) + end + + def fill_models_with_all_languages(models_by_id) + objects_by_lang.each do |id, predicates| + model = models_by_id[id] + predicates.each do |predicate, values| + + if values.values.all? { |v| v.all? { |x| literal?(x) && x.plain?} } + pull_stored_values(model, values, predicate, @unmapped) + end + end + end + end + + + def set_model_value(model, predicate, values) + set_value(model, predicate, values) do + model.send("#{predicate}=", values, on_load: true) + end + end + + def set_unmapped_value(model, predicate, value) + set_value(model, predicate, value) do + return add_unmapped_to_model(model, predicate, value) + end + end + + def models_unmapped_to_array(m) + if show_all_languages? + model_group_by_lang(m) + else + m.unmmaped_to_array + end + end + + private + + + def set_value(model, predicate, value, &block) + language = object_language(value) + + if requested_lang.eql?(:ALL) || !literal?(value) || language_match?(language) + block.call + end + + if requested_lang.eql?(:ALL) || requested_lang.is_a?(Array) + language = "@none" if language.nil? || language.eql?(:no_lang) + store_objects_by_lang(model.id, predicate, value, language) + end + end + + def model_group_by_lang(model) + unmapped = model.unmapped + cpy = {} + + unmapped.each do |attr, v| + cpy[attr] = group_by_lang(v) + end + + model.unmapped = cpy + end + + def group_by_lang(values) + + return values.to_a if values.all?{|x| x.is_a?(RDF::URI) || !x.respond_to?(:language) } + + values = values.group_by { |x| x.respond_to?(:language) && x.language ? x.language.to_s.downcase : :none } + + no_lang = values[:none] || [] + return no_lang if !no_lang.empty? && no_lang.all? { |x| x.respond_to?(:plain?) && !x.plain? } + + values + end + + + def object_language(new_value) + new_value.language || :no_lang if new_value.is_a?(RDF::Literal) + end + + def language_match?(language) + # no_lang means that the object is not a literal + return true if language.eql?(:no_lang) + + return requested_lang.include?(language) if requested_lang.is_a?(Array) + + language.eql?(requested_lang) + end + + def literal?(object) + !object_language(object).nil? + end + + def store_objects_by_lang(id, predicate, object, language) + # store objects in this format: [id][predicate][language] = [objects] + return if requested_lang.is_a?(Array) && !requested_lang.include?(language) + + language_key = language.downcase + + objects_by_lang[id] ||= {} + objects_by_lang[id][predicate] ||= {} + objects_by_lang[id][predicate][language_key] ||= [] + + objects_by_lang[id][predicate][language_key] << object + end + + + def add_unmapped_to_model(model, predicate, value) + + if model.respond_to? :klass # struct + model[:unmapped] ||= {} + model[:unmapped][predicate] ||= [] + model[:unmapped][predicate] << value unless value.nil? + else + model.unmapped_set(predicate, value) + end + end + + def pull_stored_values(model, values, predicate, unmapped) + if unmapped + add_unmapped_to_model(model, predicate, values) + else + values = values.map do |language, values_literals| + values_string = values_literals.map{|x| x.object} + values_string = values_string.first unless list_attributes?(predicate) + [language, values_string] + end.to_h + + model.send("#{predicate}=", values, on_load: true) + end + + end + + def unmapped_get(model, predicate) + if model && model.respond_to?(:klass) # struct + model[:unmapped]&.dig(predicate) + else + model.unmapped_get(predicate) + end + + end + + def list_attributes?(predicate) + @list_attributes.include?(predicate) + end + + + def show_all_languages? + @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) + end + + def get_language(languages) + languages = portal_language if languages.nil? || languages.empty? + lang = languages.to_s.split(',').map { |l| l.upcase.to_sym } + lang.length == 1 ? lang.first : lang + end + + def portal_language + Goo.main_languages.first + end + + end + end + end +end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 77b20ae0..879c1ff7 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -1,461 +1,439 @@ -module Goo - module SPARQL - class SolutionMapper - - BNODES_TUPLES = Struct.new(:id, :attribute) - - def initialize(aggregate_projections, bnode_extraction, embed_struct, - incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables, ids, options) - - @aggregate_projections = aggregate_projections - @bnode_extraction = bnode_extraction - @embed_struct = embed_struct - @incl_embed = incl_embed - @klass_struct = klass_struct - @models_by_id = models_by_id - @properties_to_include = properties_to_include - @unmapped = unmapped - @variables = variables - @ids = ids - @klass = options[:klass] - @klass = options[:klass] - @read_only = options[:read_only] - @incl = options[:include] - @count = options[:count] - @collection = options[:collection] - end - - - - def map_each_solutions(select) - - found = Set.new - objects_new = {} - list_attributes = Set.new(@klass.attributes(:list)) - all_attributes = Set.new(@klass.attributes(:all)) - @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new - - select.each_solution do |sol| - next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] - return sol[:count_var].object if @count - - found.add(sol[:id]) - id = sol[:id] - - create_model(id) - - if @bnode_extraction - add_bnode_to_model(sol) - next - end - - if @unmapped - add_unmapped_to_model(sol) - next - end - - if @aggregate_projections - add_aggregations_to_model(sol) - next - end - - predicate = sol[:attributeProperty].to_s.to_sym - - next if predicate.nil? || !all_attributes.include?(predicate) - - object = sol[:attributeObject] - - #bnodes - if bnode_id?(object, predicate) - objects_new = bnode_id_tuple(id, object, objects_new, predicate) - next - end - - # if multiple language values are included for a given property, set the - # corresponding model attribute to the English language value - NCBO-1662 - language, object = get_object_language(id, object, predicate) - object, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) - add_object_to_model(id, object, predicate, language) - end - @lang_filter.fill_models_with_other_languages(@models_by_id, list_attributes) - init_unloaded_attributes(found, list_attributes) - - return @models_by_id if @bnode_extraction - - model_set_collection_attributes(@models_by_id, objects_new) - - #remove from models_by_id elements that were not touched - @models_by_id.select! { |k, m| found.include?(k) } - - models_set_all_persistent(@models_by_id) unless @read_only - - #next level of embed attributes - include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? - - #bnodes - blank_nodes = objects_new.select { |id, obj| id.is_a?(RDF::Node) && id.anonymous? } - include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? - - models_unmapped_to_array(@models_by_id) if @unmapped - - @models_by_id - end - - private - - def get_object_language(id, object, predicate) - @lang_filter.main_lang_filter id, predicate, object - end - - def init_unloaded_attributes(found, list_attributes) - return if @incl.nil? - - # Here we are setting to nil all attributes that have been included but not found in the triplestore - found.uniq.each do |model_id| - m = @models_by_id[model_id] - @incl.each do |attr_to_incl| - is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) - next if attr_to_incl.to_s.eql?('unmapped') || is_handler - - loaded = m.respond_to?('loaded_attributes') && m.loaded_attributes.include?(attr_to_incl) - is_list = list_attributes.include?(attr_to_incl) - is_struct = m.respond_to?(:klass) - - # Go through all models queried - if is_struct - m[attr_to_incl] = [] if is_list && m[attr_to_incl].nil? - elsif is_list && (!loaded || m.send(attr_to_incl.to_s).nil?) - m.send("#{attr_to_incl}=", [], on_load: true) - elsif !loaded && !is_list && m.respond_to?("#{attr_to_incl}=") - m.send("#{attr_to_incl}=", nil, on_load: true) - end - end - end - end - - def get_value_object(id, objects_new, object, list_attributes, predicate) - object = object.object if object && !(object.is_a? RDF::URI) - range_for_v = @klass.range(predicate) - #binding.pry if v.eql?(:enrolled) - #dependent model creation - - if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? - if objects_new.include?(object) - object = objects_new[object] - elsif !range_for_v.inmutable? - pre_val = get_preload_value(id, object, predicate) - object, objects_new = if !@read_only - preloaded_or_new_object(object, objects_new, pre_val, predicate) - else - #depedent read only - preloaded_or_new_struct(object, objects_new, pre_val, predicate) - end - else - object = range_for_v.find(object).first - end - end - - if list_attributes.include?(predicate) - # To handle attr that are lists - pre = if @klass_struct - @models_by_id[id][predicate] - else - @models_by_id[id].instance_variable_get("@#{predicate}") - end - if object.nil? && pre.nil? - object = [] - elsif object.nil? && !pre.nil? - object = pre - elsif object - object = !pre ? [object] : (pre.dup << object) - object.uniq! - end - end - [object, objects_new] - end - - def add_object_to_model(id, object, predicate, lang) - if @models_by_id[id].respond_to?(:klass) - @models_by_id[id][predicate] = object unless object.nil? && !@models_by_id[id][predicate].nil? - elsif !@models_by_id[id].class.handler?(predicate) && - !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id - - if (lang&.eql?(:no_lang)) || !lang - @models_by_id[id].send("#{predicate}=", object, on_load: true) - end - - end - end - - def get_preload_value(id, object, predicate) - pre_val = nil - if predicate_preloaded?(id, predicate) - pre_val = preloaded_value(id, predicate) - pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) - end - pre_val - end - - def preloaded_or_new_object(object, objects_new, pre_val, predicate) - object = pre_val || @klass.range_object(predicate, object) - objects_new[object.id] = object - [object, objects_new] - end - - def preloaded_or_new_struct(object, objects_new, pre_val, predicate) - struct = pre_val || @embed_struct[predicate].new - struct.id = object - struct.klass = @klass.range(predicate) - objects_new[struct.id] = struct - [struct, objects_new] - end - - def preloaded_value(id, predicate) - if !@read_only - @models_by_id[id].instance_variable_get("@#{predicate}") - else - @models_by_id[id][predicate] - end - end - - def predicate_preloaded?(id, predicate) - @models_by_id[id] && - (@models_by_id[id].respond_to?(:klass) || @models_by_id[id].loaded_attributes.include?(predicate)) - end - - def bnode_id?(object, predicate) - object.is_a?(RDF::Node) && object.anonymous? && @incl.include?(predicate) - end - - def bnode_id_tuple(id, object, objects_new, predicate) - range = @klass.range(predicate) - if range.respond_to?(:new) - objects_new[object] = BNODES_TUPLES.new(id, predicate) - end - objects_new - end - - def add_bnode_to_model(sol) - id = sol[:id] - struct = create_struct(@bnode_extraction, @models_by_id, sol, @variables) - @models_by_id[id].send("#{@bnode_extraction}=", struct) - end - - def create_model(id) - @models_by_id[id] = create_class_model(id, @klass, @klass_struct) unless @models_by_id.include?(id) - end - - def model_set_unmapped(id, predicate, value) - - if @models_by_id[id].respond_to? :klass #struct - @models_by_id[id][:unmapped] ||= {} - (@models_by_id[id][:unmapped][predicate] ||= []) << value - else - @models_by_id[id].unmapped_set(predicate, value) - end - end - - def create_struct(bnode_extraction, models_by_id, sol, variables) - list_attributes = Set.new(@klass.attributes(:list)) - struct = @klass.range(bnode_extraction).new - variables.each do |v| - next if v == :id - svalue = sol[v] - struct[v] = svalue.is_a?(RDF::Node) ? svalue : svalue.object - end - if list_attributes.include?(bnode_extraction) - pre = models_by_id[sol[:id]].instance_variable_get("@#{bnode_extraction}") - pre = pre ? (pre.dup << struct) : [struct] - struct = pre - end - struct - end - - def create_class_model(id, klass, klass_struct) - klass_model = klass_struct ? klass_struct.new : klass.new - klass_model.id = id - klass_model.persistent = true unless klass_struct - klass_model.klass = klass if klass_struct - klass_model - end - - def models_unmapped_to_array(models_by_id) - models_by_id.each do |idm, m| - m.unmmaped_to_array - end - end - - def include_bnodes(bnodes, models_by_id) - #group by attribute - attrs = bnodes.map { |x, y| y.attribute }.uniq - attrs.each do |attr| - struct = @klass.range(attr) - - #bnodes that are in a range of goo ground models - #for example parents and children in LD class models - #we skip this cases for the moment - next if struct.respond_to?(:model_name) - - bnode_attrs = struct.new.to_h.keys - ids = bnodes.select { |x, y| y.attribute == attr }.map { |x, y| y.id } - @klass.where.models(models_by_id.select { |x, y| ids.include?(x) }.values) - .in(@collection) - .include(bnode: { attr => bnode_attrs }).all - end - end - - def include_embed_attributes(incl_embed, objects_new) - incl_embed.each do |attr, next_attrs| - #anything to join ? - attr_range = @klass.range(attr) - next if attr_range.nil? - range_objs = objects_new.select { |id, obj| - obj.instance_of?(attr_range) || (obj.respond_to?(:klass) && obj[:klass] == attr_range) - }.values - unless range_objs.empty? - range_objs.uniq! - query = attr_range.where().models(range_objs).in(@collection).include(*next_attrs) - query = query.read_only if @read_only - query.all - end - end - end - - def models_set_all_persistent(models_by_id) - return unless @ids - models_by_id.each do |k, m| - m.persistent = true - end - end - - def model_set_collection_attributes(models_by_id, objects_new) - collection_value = get_collection_value - if collection_value - collection_attribute = @klass.collection_opts - models_by_id.each do |id, m| - m.send("#{collection_attribute}=", collection_value) - end - objects_new.each do |id, obj_new| - if obj_new.respond_to?(:klass) - collection_attribute = obj_new[:klass].collection_opts - obj_new[collection_attribute] = collection_value - elsif obj_new.class.respond_to?(:collection_opts) && - obj_new.class.collection_opts.instance_of?(Symbol) - collection_attribute = obj_new.class.collection_opts - obj_new.send("#{collection_attribute}=", collection_value) - end - end - end - end - - def get_collection_value - collection_value = nil - if @klass.collection_opts.instance_of?(Symbol) - if @collection.is_a?(Array) && (@collection.length == 1) - collection_value = @collection.first - end - if @collection.respond_to? :id - collection_value = @collection - end - end - collection_value - end - - - def object_to_array(id, klass_struct, models_by_id, object, predicate) - pre = if klass_struct - models_by_id[id][predicate] - else - models_by_id[id].instance_variable_get("@#{predicate}") - end - if object.nil? && pre.nil? - object = [] - elsif object.nil? && !pre.nil? - object = pre - elsif object - object = !pre ? [object] : (pre.dup << object) - object.uniq! - end - object - end - - def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new, v, options) - - read_only = options[:read_only] - if object.is_a?(RDF::URI) && v != :id - range_for_v = @klass.range(v) - if range_for_v - if objects_new.include?(object) - object = objects_new[object] - elsif !range_for_v.inmutable? - pre_val = get_pre_val(id, models_by_id, object, v, read_only) - object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) - else - object = range_for_v.find(object).first - end - end - end - object - end - - def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) - - range_for_v = @klass.range(predicate) - if !@read_only - object = pre_val || @klass.range_object(predicate, object) - objects_new[object.id] = object - else - #depedent read only - struct = pre_val || embed_struct[predicate].new - struct.id = object - struct.klass = range_for_v - objects_new[struct.id] = struct - object = struct - end - object - end - - def get_pre_val(id, models_by_id, object, predicate) - pre_val = nil - if models_by_id[id] && - ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || - models_by_id[id].loaded_attributes.include?(predicate)) - pre_val = if !@read_only - models_by_id[id].instance_variable_get("@#{predicate}") - else - models_by_id[id][predicate] - end - - pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) - end - pre_val - end - - def add_unmapped_to_model(sol) - predicate = sol[:attributeProperty].to_s.to_sym - return unless @properties_to_include[predicate] - - id = sol[:id] - value = sol[:attributeObject] - - model_set_unmapped(id, @properties_to_include[predicate][:uri], value) - end - - def add_aggregations_to_model(sol) - id = sol[:id] - @aggregate_projections&.each do |aggregate_key, aggregate_val| - if @models_by_id[id].respond_to?(:add_aggregate) - @models_by_id[id].add_aggregate(aggregate_val[1], aggregate_val[0], sol[aggregate_key].object) - else - (@models_by_id[id].aggregates ||= []) << Goo::Base::AGGREGATE_VALUE.new(aggregate_val[1], - aggregate_val[0], - sol[aggregate_key].object) - end - end - end - end - end -end - +module Goo + module SPARQL + class SolutionMapper + BNODES_TUPLES = Struct.new(:id, :attribute) + + def initialize(aggregate_projections, bnode_extraction, embed_struct, + incl_embed, klass_struct, models_by_id, + properties_to_include, unmapped, variables, ids, options) + + @aggregate_projections = aggregate_projections + @bnode_extraction = bnode_extraction + @embed_struct = embed_struct + @incl_embed = incl_embed + @klass_struct = klass_struct + @models_by_id = models_by_id + @properties_to_include = properties_to_include + @unmapped = unmapped + @variables = variables + @ids = ids + @klass = options[:klass] + @read_only = options[:read_only] + @incl = options[:include] + @count = options[:count] + @collection = options[:collection] + @options = options + end + + def map_each_solutions(select) + found = Set.new + objects_new = {} + list_attributes = Set.new(@klass.attributes(:list)) + all_attributes = Set.new(@klass.attributes(:all)) + + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, + list_attributes: list_attributes) + + select.each_solution do |sol| + + next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] + return sol[:count_var].object if @count + + found.add(sol[:id]) + id = sol[:id] + + create_model(id) + + if @bnode_extraction + add_bnode_to_model(sol) + next + end + + if @unmapped + add_unmapped_to_model(sol) + next + end + + if @aggregate_projections + add_aggregations_to_model(sol) + next + end + + predicate = sol[:attributeProperty].to_s.to_sym + + next if predicate.nil? || !all_attributes.include?(predicate) + + object = sol[:attributeObject] + + # bnodes + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) + next + end + + objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, objects, predicate) + end + + # for this moment we are not going to enrich models , maybe we will use it if the results are empty + @lang_filter.fill_models_with_all_languages(@models_by_id) + + init_unloaded_attributes(found, list_attributes) + + return @models_by_id if @bnode_extraction + + model_set_collection_attributes(@models_by_id, objects_new) + + # remove from models_by_id elements that were not touched + @models_by_id.select! { |k, _m| found.include?(k) } + + models_set_all_persistent(@models_by_id) unless @read_only + + # next level of embed attributes + include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? + + # bnodes + blank_nodes = objects_new.select { |id, _obj| id.is_a?(RDF::Node) && id.anonymous? } + include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? + + models_unmapped_to_array(@models_by_id) if @unmapped + + + @models_by_id + end + + private + + def init_unloaded_attributes(found, list_attributes) + return if @incl.nil? + + # Here we are setting to nil all attributes that have been included but not found in the triplestore + found.uniq.each do |model_id| + m = @models_by_id[model_id] + @incl.each do |attr_to_incl| + is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) + next if attr_to_incl.to_s.eql?('unmapped') || is_handler + + loaded = m.respond_to?('loaded_attributes') && m.loaded_attributes.include?(attr_to_incl) + is_list = list_attributes.include?(attr_to_incl) + is_struct = m.respond_to?(:klass) + + # Go through all models queried + if is_struct + m[attr_to_incl] = [] if is_list && m[attr_to_incl].nil? + elsif is_list && (!loaded || m.send(attr_to_incl.to_s).nil?) + m.send("#{attr_to_incl}=", [], on_load: true) + elsif !loaded && !is_list && m.respond_to?("#{attr_to_incl}=") + m.send("#{attr_to_incl}=", nil, on_load: true) + end + end + end + end + + def get_value_object(id, objects_new, object, list_attributes, predicate) + object = object.object if object && !(object.is_a? RDF::URI) + range_for_v = @klass.range(predicate) + + + if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? + if objects_new.include?(object) + object = objects_new[object] + elsif !range_for_v.inmutable? + pre_val = get_preload_value(id, object, predicate) + object, objects_new = if !@read_only + preloaded_or_new_object(object, objects_new, pre_val, predicate) + else + # depedent read only + preloaded_or_new_struct(object, objects_new, pre_val, predicate) + end + else + object = range_for_v.find(object).first + end + end + + if list_attributes.include?(predicate) + pre = @klass_struct ? @models_by_id[id][predicate] : @models_by_id[id].instance_variable_get("@#{predicate}") + + if object.nil? + object = pre.nil? ? [] : pre + else + object = pre.nil? ? [object] : (pre.dup << object) + object.uniq! + end + + end + [object, objects_new] + end + + def add_object_to_model(id, objects, predicate) + + if @models_by_id[id].respond_to?(:klass) + @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? + elsif !@models_by_id[id].class.handler?(predicate) && + !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id + @lang_filter.set_model_value(@models_by_id[id], predicate, objects) + end + end + + def get_preload_value(id, object, predicate) + pre_val = nil + if predicate_preloaded?(id, predicate) + pre_val = preloaded_value(id, predicate) + pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) + end + pre_val + end + + def preloaded_or_new_object(object, objects_new, pre_val, predicate) + object = pre_val || @klass.range_object(predicate, object) + objects_new[object.id] = object + [object, objects_new] + end + + def preloaded_or_new_struct(object, objects_new, pre_val, predicate) + struct = pre_val || @embed_struct[predicate].new + struct.id = object + struct.klass = @klass.range(predicate) + objects_new[struct.id] = struct + [struct, objects_new] + end + + def preloaded_value(id, predicate) + if !@read_only + @models_by_id[id].instance_variable_get("@#{predicate}") + + else + @models_by_id[id][predicate] + end + end + + def predicate_preloaded?(id, predicate) + @models_by_id[id] && + (@models_by_id[id].respond_to?(:klass) || @models_by_id[id].loaded_attributes.include?(predicate)) + end + + def bnode_id?(object, predicate) + object.is_a?(RDF::Node) && object.anonymous? && @incl.include?(predicate) + end + + def bnode_id_tuple(id, object, objects_new, predicate) + range = @klass.range(predicate) + objects_new[object] = BNODES_TUPLES.new(id, predicate) if range.respond_to?(:new) + objects_new + end + + def add_bnode_to_model(sol) + id = sol[:id] + struct = create_struct(@bnode_extraction, @models_by_id, sol, @variables) + @models_by_id[id].send("#{@bnode_extraction}=", struct) + end + + def create_model(id) + @models_by_id[id] = create_class_model(id, @klass, @klass_struct) unless @models_by_id.include?(id) + end + + + def create_struct(bnode_extraction, models_by_id, sol, variables) + list_attributes = Set.new(@klass.attributes(:list)) + struct = @klass.range(bnode_extraction).new + variables.each do |v| + next if v == :id + + svalue = sol[v] + struct[v] = svalue.is_a?(RDF::Node) ? svalue : svalue.object + end + if list_attributes.include?(bnode_extraction) + pre = models_by_id[sol[:id]].instance_variable_get("@#{bnode_extraction}") + pre = pre ? (pre.dup << struct) : [struct] + struct = pre + end + struct + end + + def create_class_model(id, klass, klass_struct) + klass_model = klass_struct ? klass_struct.new : klass.new + klass_model.id = id + klass_model.persistent = true unless klass_struct + klass_model.klass = klass if klass_struct + klass_model + end + + def models_unmapped_to_array(models_by_id) + models_by_id.each do |_idm, m| + @lang_filter.models_unmapped_to_array(m) + end + end + + + def is_multiple_langs? + return true if @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) + false + end + + def include_bnodes(bnodes, models_by_id) + # group by attribute + attrs = bnodes.map { |_x, y| y.attribute }.uniq + attrs.each do |attr| + struct = @klass.range(attr) + + # bnodes that are in a range of goo ground models + # for example parents and children in LD class models + # we skip this cases for the moment + next if struct.respond_to?(:model_name) + + bnode_attrs = struct.new.to_h.keys + ids = bnodes.select { |_x, y| y.attribute == attr }.map { |_x, y| y.id } + @klass.where.models(models_by_id.select { |x, _y| ids.include?(x) }.values) + .in(@collection) + .include(bnode: { attr => bnode_attrs }).all + end + end + + def include_embed_attributes(incl_embed, objects_new) + incl_embed.each do |attr, next_attrs| + # anything to join ? + attr_range = @klass.range(attr) + next if attr_range.nil? + + range_objs = objects_new.select do |_id, obj| + obj.instance_of?(attr_range) || (obj.respond_to?(:klass) && obj[:klass] == attr_range) + end.values + next if range_objs.empty? + + range_objs.uniq! + query = attr_range.where.models(range_objs).in(@collection).include(*next_attrs) + query = query.read_only if @read_only + query.all + end + end + + def models_set_all_persistent(models_by_id) + return unless @ids + + models_by_id.each do |_k, m| + m.persistent = true + end + end + + def model_set_collection_attributes(models_by_id, objects_new) + collection_value = get_collection_value + return unless collection_value + + collection_attribute = @klass.collection_opts + models_by_id.each do |_id, m| + m.send("#{collection_attribute}=", collection_value) + end + objects_new.each do |_id, obj_new| + if obj_new.respond_to?(:klass) + collection_attribute = obj_new[:klass].collection_opts + obj_new[collection_attribute] = collection_value + elsif obj_new.class.respond_to?(:collection_opts) && + obj_new.class.collection_opts.instance_of?(Symbol) + collection_attribute = obj_new.class.collection_opts + obj_new.send("#{collection_attribute}=", collection_value) + end + end + end + + def get_collection_value + collection_value = nil + if @klass.collection_opts.instance_of?(Symbol) + collection_value = @collection.first if @collection.is_a?(Array) && (@collection.length == 1) + collection_value = @collection if @collection.respond_to? :id + end + collection_value + end + + def object_to_array(id, klass_struct, models_by_id, object, predicate) + pre = if klass_struct + models_by_id[id][predicate] + else + models_by_id[id].instance_variable_get("@#{predicate}") + end + if object.nil? && pre.nil? + object = [] + elsif object.nil? && !pre.nil? + object = pre + elsif object + object = !pre ? [object] : (pre.dup << object) + object.uniq! + end + object + end + + def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new, v, options) + read_only = options[:read_only] + if object.is_a?(RDF::URI) && v != :id + range_for_v = @klass.range(v) + if range_for_v + if objects_new.include?(object) + object = objects_new[object] + elsif !range_for_v.inmutable? + pre_val = get_pre_val(id, models_by_id, object, v, read_only) + object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) + else + object = range_for_v.find(object).first + end + end + end + object + end + + def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) + range_for_v = @klass.range(predicate) + if !@read_only + object = pre_val || @klass.range_object(predicate, object) + objects_new[object.id] = object + else + # depedent read only + struct = pre_val || embed_struct[predicate].new + struct.id = object + struct.klass = range_for_v + objects_new[struct.id] = struct + object = struct + end + object + end + + def get_pre_val(id, models_by_id, object, predicate) + pre_val = nil + if models_by_id[id] && + ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || + models_by_id[id].loaded_attributes.include?(predicate)) + pre_val = if !@read_only + models_by_id[id].instance_variable_get("@#{predicate}") + else + models_by_id[id][predicate] + end + + pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) + end + pre_val + end + + def add_unmapped_to_model(sol) + predicate = sol[:attributeProperty].to_s.to_sym + return unless @properties_to_include[predicate] + + id = sol[:id] + value = sol[:attributeObject] + + @lang_filter.set_unmapped_value(@models_by_id[id], @properties_to_include[predicate][:uri], value) + end + + def add_aggregations_to_model(sol) + id = sol[:id] + @aggregate_projections&.each do |aggregate_key, aggregate_val| + if @models_by_id[id].respond_to?(:add_aggregate) + @models_by_id[id].add_aggregate(aggregate_val[1], aggregate_val[0], sol[aggregate_key].object) + else + (@models_by_id[id].aggregates ||= []) << Goo::Base::AGGREGATE_VALUE.new(aggregate_val[1], + aggregate_val[0], + sol[aggregate_key].object) + end + end + end + end + end +end From d4d9417c29e5dea346b482f69d710bba76ff39c2 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 4 Dec 2023 11:40:32 +0100 Subject: [PATCH 049/106] Merge to master: Release 2.3.4 - New model capabilities (validators, scheme file and callbacks) (#45) * Merge pull request #23 from ontoportal-lirmm/feature/add-property-datatype-dsl Feature: add property data type dsl * Merge pull request #26 from ontoportal-lirmm/refactor/resources-validators Refactor: resources validators * Merge pull request #27 from ontoportal-lirmm/feature/implement-new-validators Feature: Implement new validators * Merge pull request #28 from ontoportal-lirmm/feature/implement-new-validators Feature: Implement instance proc validators * Merge pull request #30 from ontoportal-lirmm/feature/add-model-scheme-yml-file Feature/add model scheme yml file * Merge pull request #31 from ontoportal-lirmm/feature/add-models-on-update-callbacks Feature: Add models on update callbacks --- lib/goo.rb | 4 + lib/goo/base/resource.rb | 90 ++-- lib/goo/base/settings/settings.rb | 30 +- lib/goo/base/settings/yaml_settings.rb | 45 ++ lib/goo/sparql/triples.rb | 10 - lib/goo/validators/enforce.rb | 213 ++++----- .../validators/implementations/data_type.rb | 66 +++ .../validators/implementations/distinct_of.rb | 34 ++ .../validators/implementations/existence.rb | 17 + .../validators/implementations/inverse_of.rb | 35 ++ .../validators/implementations/object_type.rb | 46 ++ .../implementations/superior_equal_to.rb | 26 ++ .../validators/implementations/symmetric.rb | 33 ++ lib/goo/validators/implementations/unique.rb | 20 + .../validators/implementations/value_range.rb | 49 ++ lib/goo/validators/validator.rb | 111 +++++ test/data/yaml_scheme_model_test.yml | 11 + test/models.rb | 5 +- test/test_dsl_settings.rb | 86 +++- test/test_update_callbacks.rb | 53 +++ test/test_validators.rb | 427 ++++++++++++++++++ 21 files changed, 1249 insertions(+), 162 deletions(-) create mode 100644 lib/goo/base/settings/yaml_settings.rb create mode 100644 lib/goo/validators/implementations/data_type.rb create mode 100644 lib/goo/validators/implementations/distinct_of.rb create mode 100644 lib/goo/validators/implementations/existence.rb create mode 100644 lib/goo/validators/implementations/inverse_of.rb create mode 100644 lib/goo/validators/implementations/object_type.rb create mode 100644 lib/goo/validators/implementations/superior_equal_to.rb create mode 100644 lib/goo/validators/implementations/symmetric.rb create mode 100644 lib/goo/validators/implementations/unique.rb create mode 100644 lib/goo/validators/implementations/value_range.rb create mode 100644 lib/goo/validators/validator.rb create mode 100644 test/data/yaml_scheme_model_test.yml create mode 100644 test/test_update_callbacks.rb create mode 100644 test/test_validators.rb diff --git a/lib/goo.rb b/lib/goo.rb index db863d2a..ff0e6279 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -16,6 +16,10 @@ require_relative "goo/search/search" require_relative "goo/base/base" require_relative "goo/validators/enforce" +require_relative "goo/validators/validator" +project_root = File.dirname(File.absolute_path(__FILE__)) +Dir.glob("#{project_root}/goo/validators/implementations/*", &method(:require)) + require_relative "goo/utils/utils" require_relative "goo/mixins/sparql_client" diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 88bbc8ce..26ac5859 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -74,23 +74,10 @@ def id=(new_id) end def id - if @id.nil? - raise IDGenerationError, ":id must be set if configured in name_with" if self.class.name_with == :id - custom_name = self.class.name_with - if custom_name.instance_of?(Symbol) - @id = id_from_attribute() - elsif custom_name - begin - @id = custom_name.call(self) - rescue => e - raise IDGenerationError, "Problem with custom id generation: #{e.message}" - end - else - raise IDGenerationError, "custom_name is nil. settings for this model are incorrect." + @id = generate_id if @id.nil? + + @id end - end - return @id - end def persistent? return @persistent @@ -104,22 +91,20 @@ def modified? return modified_attributes.length > 0 end - def exist?(from_valid=false) - #generate id with proc - begin - id() unless self.class.name_with.kind_of?(Symbol) - rescue IDGenerationError - end - - _id = @id - if _id.nil? && !from_valid && self.class.name_with.is_a?(Symbol) + def exist?(from_valid = false) begin - _id = id_from_attribute() + id unless self.class.name_with.kind_of?(Symbol) rescue IDGenerationError + # Ignored end + + _id = @id + if from_valid || _id.nil? + _id = generate_id rescue _id = nil end + return false unless _id - return Goo::SPARQL::Queries.model_exist(self,id=_id) + Goo::SPARQL::Queries.model_exist(self, id = _id) end def fully_loaded? @@ -247,10 +232,13 @@ def save(*opts) raise ArgumentError, "Enums can only be created on initialization" unless opts[0] && opts[0][:init_enum] end batch_file = nil - if opts && opts.length > 0 - if opts.first.is_a?(Hash) && opts.first[:batch] && opts.first[:batch].is_a?(File) + callbacks = true + if opts && opts.length > 0 && opts.first.is_a?(Hash) + if opts.first[:batch] && opts.first[:batch].is_a?(File) batch_file = opts.first[:batch] end + + callbacks = opts.first[:callbacks] end if !batch_file @@ -258,8 +246,28 @@ def save(*opts) raise Goo::Base::NotValidException, "Object is not valid. Check errors." unless valid? end + #set default values before saving + unless self.persistent? + self.class.attributes_with_defaults.each do |attr| + value = self.send("#{attr}") + if value.nil? + value = self.class.default(attr).call(self) + self.send("#{attr}=", value) + end + end + end + + #call update callback before saving + if callbacks + self.class.attributes_with_update_callbacks.each do |attr| + Goo::Validators::Enforce.enforce_callbacks(self, attr) + end + end + graph_insert, graph_delete = Goo::SPARQL::Triples.model_update_triples(self) - graph = self.graph() + graph = self.graph + + if graph_delete and graph_delete.size > 0 begin Goo.sparql_update_client.delete_data(graph_delete, graph: graph) @@ -442,10 +450,30 @@ def self.all end protected + def id_from_attribute() uattr = self.class.name_with uvalue = self.send("#{uattr}") - return self.class.id_from_unique_attribute(uattr,uvalue) + return self.class.id_from_unique_attribute(uattr, uvalue) + end + + def generate_id + return nil unless self.class.name_with + + raise IDGenerationError, ":id must be set if configured in name_with" if self.class.name_with == :id + custom_name = self.class.name_with + if custom_name.instance_of?(Symbol) + id = id_from_attribute + elsif custom_name + begin + id = custom_name.call(self) + rescue => e + raise IDGenerationError, "Problem with custom id generation: #{e.message}" + end + else + raise IDGenerationError, "custom_name is nil. settings for this model are incorrect." + end + id end end diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index a58daae0..a7008087 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -1,4 +1,5 @@ require 'active_support/core_ext/string' +require_relative 'yaml_settings' module Goo module Base @@ -12,8 +13,10 @@ module ClassMethods attr_reader :model_name attr_reader :attribute_uris + include YAMLScheme + def default_model_options - return {} + {} end def model(*args) @@ -34,7 +37,9 @@ def model(*args) @model_settings = default_model_options.merge(options || {}) - unless options.include?:name_with + init_yaml_scheme_settings + + unless options.include? :name_with raise ArgumentError, "The model `#{model_name}` definition should include the :name_with option" end Goo.add_model(@model_name,self) @@ -91,6 +96,16 @@ def attributes_with_defaults select{ |attr,opts| opts[:default] }).keys() end + def attributes_with_update_callbacks + (@model_settings[:attributes]. + select{ |attr,opts| opts[:onUpdate] }).keys + end + + + def update_callbacks(attr) + @model_settings[:attributes][attr][:onUpdate] + end + def default(attr) return @model_settings[:attributes][attr][:default] end @@ -185,10 +200,14 @@ def attribute(*args) attr_name = attr_name.to_sym options = options.pop options = {} if options.nil? - if options[:enforce].nil? or !options[:enforce].include?(:list) - options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] - end + + options[:enforce] ||= [] + + set_data_type(options) + set_no_list_by_default(options) + @model_settings[:attributes][attr_name] = options + load_yaml_scheme_options(attr_name) shape_attribute(attr_name) namespace = attribute_namespace(attr_name) namespace = namespace || @model_settings[:namespace] @@ -381,6 +400,7 @@ def read_only(attributes) instance end + def show_all_languages?(args) args.first.is_a?(Hash) && args.first.keys.include?(:include_languages) && args.first[:include_languages] end diff --git a/lib/goo/base/settings/yaml_settings.rb b/lib/goo/base/settings/yaml_settings.rb new file mode 100644 index 00000000..8a931b3a --- /dev/null +++ b/lib/goo/base/settings/yaml_settings.rb @@ -0,0 +1,45 @@ +require 'yaml' + +module Goo + module Base + module Settings + module YAMLScheme + attr_reader :yaml_settings + + def init_yaml_scheme_settings + scheme_file_path = @model_settings[:scheme] + @yaml_settings = read_yaml_settings_file(scheme_file_path) + end + + def attribute_yaml_settings(attr) + + return {} if yaml_settings.nil? + + yaml_settings[attr.to_sym] + end + + + + private + + def load_yaml_scheme_options(attr) + settings = attribute_settings(attr) + yaml_settings = attribute_yaml_settings(attr) + settings.merge! yaml_settings unless yaml_settings.nil? || yaml_settings.empty? + end + + def read_yaml_settings_file(scheme_file_path) + return if scheme_file_path.nil? + + yaml_contents = File.read(scheme_file_path) rescue return + + YAML.safe_load(yaml_contents, symbolize_names: true) + end + end + end + end +end + + + + diff --git a/lib/goo/sparql/triples.rb b/lib/goo/sparql/triples.rb index cb840df9..df3f9f1d 100644 --- a/lib/goo/sparql/triples.rb +++ b/lib/goo/sparql/triples.rb @@ -67,16 +67,6 @@ def self.model_update_triples(model) unless model.persistent? graph_insert << [subject, RDF.type, model.class.uri_type(model.collection)] end - #set default values before saving - if not model.persistent? - model.class.attributes_with_defaults.each do |attr| - value = model.send("#{attr}") - if value.nil? - value = model.class.default(attr).call(model) - model.send("#{attr}=",value) - end - end - end model.modified_attributes.each do |attr| next if model.class.collection?(attr) diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index d326839b..3c90e204 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -3,127 +3,132 @@ module Goo module Validators module Enforce - def self.enforce_by_attribute(model,attr) - return model.model_settings[:attributes][attr][:enforce] - end + class EnforceInstance + attr_reader :errors_by_opt + def initialize + @errors_by_opt = {} + end + + def enforce(inst,attr,value) + enforce_opts = enforce_by_attribute(inst.class,attr) + return nil if enforce_opts.nil? or enforce_opts.length == 0 - def self.enforce_type_boolean(attr,value) - if value.kind_of? Array - if (value.select {|x| !((x.class == TrueClass) || (x.class == FalseClass))} ).length > 0 - return "All values in attribute `#{attr}` must be `Boolean`" + enforce_opts.each do |opt| + case opt + when :unique + check Goo::Validators::Unique, inst, attr, value, opt + when :no_list + validator = Goo::Validators::DataType.new(inst, attr, value, Array) + if validator.valid? && !value.nil? + add_error(opt, + "`#{attr}` is defined as non Array - it cannot hold multiple values") + end + when :existence + check Goo::Validators::Existence, inst, attr, value, opt + when :list, Array + check Goo::Validators::DataType, inst, attr, value,opt, Array + when :uri, RDF::URI + check Goo::Validators::DataType, inst, attr, value,opt, RDF::URI + when :string, String + check Goo::Validators::DataType, inst, attr, value,opt, String + when :integer, Integer + check Goo::Validators::DataType, inst, attr, value,opt, Integer + when :boolean + check Goo::Validators::DataType, inst, attr, value, opt,:boolean + when :date_time, DateTime + check Goo::Validators::DataType, inst, attr, value, opt, DateTime + when :float, Float + check Goo::Validators::DataType, inst, attr, value, opt, Float + when :symmetric + check Goo::Validators::Symmetric, inst, attr, value, opt + when /^distinct_of_/ + check Goo::Validators::DistinctOf, inst, attr, value, opt, opt + when /^superior_equal_to_/ + check Goo::Validators::SuperiorEqualTo, inst, attr, value, opt, opt + when /^inverse_of_/ + check Goo::Validators::InverseOf, inst, attr, value, opt, opt + when Proc + call_proc(opt, inst, attr) + when /^max_/, /^min_/ + type = opt.to_s.index("max_") ? :max : :min + check Goo::Validators::ValueRange, inst, attr, value, type, opt.to_s + else + if object_type?(opt) + check_object_type inst, attr, value, opt + elsif instance_proc?(inst, opt) + call_proc(inst.method(opt), inst, attr) + end + end end - else - if !((value.class == TrueClass) || (value.class == FalseClass)) - return "Attribute `#{attr}` value `#{value}` must be a `Boolean`" + + errors_by_opt.length > 0 ? errors_by_opt : nil + end + + def enforce_callback(inst, attr) + callbacks = Array(inst.class.update_callbacks(attr)) + callbacks.each do |proc| + if instance_proc?(inst, proc) + call_proc(inst.method(proc), inst, attr) + elsif proc.is_a?(Proc) + call_proc(proc, inst, attr) + end end end - end - def self.enforce_type(attr,type,value) - if type == :boolean - return self.enforce_type_boolean(attr,value) + private + + def object_type(opt) + opt.respond_to?(:shape_attribute) ? opt : Goo.model_by_name(opt) end - if value.kind_of? Array - if (value.select {|x| !(x.kind_of? type)} ).length > 0 - return "All values in attribute `#{attr}` must be `#{type.name}`" - end - else - if !(value.kind_of? type) - return "Attribute `#{attr}` value `#{value}` must be a `#{type.name}`" - end + + def object_type?(opt) + opt.respond_to?(:shape_attribute) ? opt : Goo.model_by_name(opt) end - end - def self.enforce_range_length(type_range,attr,opt_s,value) - if !value.nil? && !(value.kind_of?(Array) || value.kind_of?(String)) - return "#{attr} value (#{value}) must be an Array or String - it has range length constraints" + def instance_proc?(inst, opt) + opt && (opt.is_a?(Symbol) || opt.is_a?(String)) && inst.respond_to?(opt) end - range = opt_s[4..opt_s.length].to_i - if type_range == :min - if !value.nil? && (value.length < range) - return "#{attr} value has length `#{value.length}` and the min length is `#{range}`" + + def check_object_type(inst, attr, value, opt) + model_range = object_type(opt) + if model_range && !value.nil? + check Goo::Validators::ObjectType, inst, attr, value, model_range.model_name, model_range end - else - if !value.nil? && (value.length > range) - return "#{attr} value has length `#{value.length}` and the max length is `#{range}`" + end + + def check(validator_class, inst, attr, value, opt, *options) + validator = validator_class.new(inst, attr, value, *options) + add_error(opt, validator.error) unless validator.valid? + end + def enforce_by_attribute(model, attr) + model.model_settings[:attributes][attr][:enforce] + end + + def call_proc(proc,inst, attr) + # This should return an array like [:name_of_error1, "Error message 1", :name_of_error2, "Error message 2"] + errors = proc.call(inst, attr) + + return unless !errors.nil? && errors.is_a?(Array) + + errors.each_slice(2) do |e| + next if e.nil? || e.compact.empty? + add_error(e[0].to_sym, e[1]) end end + + def add_error(opt, err) + return if err.nil? + @errors_by_opt[opt] = err + end end + def self.enforce(inst,attr,value) - enforce_opts = enforce_by_attribute(inst.class,attr) - return nil if enforce_opts.nil? or enforce_opts.length == 0 - errors_by_opt = {} - enforce_opts.each do |opt| - case opt - when :unique - unless value.nil? - dup = Goo::SPARQL::Queries.duplicate_attribute_value?(inst,attr) - if dup - add_error(opt, errors_by_opt, - "`#{attr}` must be unique. " + - "There are other model instances with the same attribute value `#{value}`.") - end - end - when :no_list - if value.kind_of? Array - add_error(opt, errors_by_opt, - "`#{attr}` is defined as non Array - it cannot hold multiple values") - end - when :existence - add_error(opt, errors_by_opt, "`#{attr}` value cannot be nil") if value.nil? - when :list, Array - if !value.nil? && !(value.kind_of? Array) - add_error(opt, errors_by_opt, "`#{attr}` value must be an Array") - end - when :uri, RDF::URI - add_error(opt, errors_by_opt, enforce_type(attr,RDF::URI,value)) unless value.nil? - when :string, String - add_error(opt, errors_by_opt, enforce_type(attr,String,value)) unless value.nil? - when :integer, Integer - add_error(opt, errors_by_opt, enforce_type(attr,Integer,value)) unless value.nil? - when :boolean - add_error(opt, errors_by_opt, enforce_type(attr,:boolean,value)) unless value.nil? - when :date_time, DateTime - add_error(opt, errors_by_opt, enforce_type(attr,DateTime,value)) unless value.nil? - when Proc - # This should return an array like [:name_of_error1, "Error message 1", :name_of_error2, "Error message 2"] - errors = opt.call(inst, attr) - errors.each_slice(2) do |e| - next if e.nil? || e.compact.empty? - add_error(e[0].to_sym, errors_by_opt, e[1]) rescue binding.pry - end - else - model_range = opt.respond_to?(:shape_attribute) ? opt : Goo.model_by_name(opt) - if model_range and !value.nil? - values = value.kind_of?(Array) ? value : [value] - values.each do |v| - if (!v.kind_of?(model_range)) && !(v.respond_to?(:klass) && v[:klass] == model_range) - add_error(model_range.model_name, errors_by_opt, - "`#{attr}` contains values that are not instance of `#{model_range.model_name}`") - else - if !v.respond_to?(:klass) && !v.persistent? - add_error(model_range.model_name, errors_by_opt, - "`#{attr}` contains non persistent models. It will not save.") - end - end - end - end - opt_s = opt.to_s - if opt_s.index("max_") == 0 - add_error(:max, errors_by_opt, enforce_range_length(:max,attr,opt_s,value)) unless value.nil? - end - if opt_s.index("min_") == 0 - add_error(:min, errors_by_opt, enforce_range_length(:min,attr,opt_s,value)) unless value.nil? - end - end - end - return errors_by_opt.length > 0 ? errors_by_opt : nil + EnforceInstance.new.enforce(inst,attr,value) end - def self.add_error(opt, h, err) - return if err.nil? - h[opt] = err + def self.enforce_callbacks(inst, attr) + EnforceInstance.new.enforce_callback(inst, attr) end end end diff --git a/lib/goo/validators/implementations/data_type.rb b/lib/goo/validators/implementations/data_type.rb new file mode 100644 index 00000000..0ea65ab3 --- /dev/null +++ b/lib/goo/validators/implementations/data_type.rb @@ -0,0 +1,66 @@ +module Goo + module Validators + class DataType < ValidatorBase + include Validator + + keys [:list, :uri, :string, :integer, :boolean, :date_time, :float] + + error_message ->(obj) { + if @value.kind_of? Array + return "All values in attribute `#{@attr}` must be `#{@type}`" + else + return "Attribute `#{@attr}` with the value `#{@value}` must be `#{@type}`" + + end + } + + validity_check -> (obj) do + self.enforce_type(@type, @value) + end + + def initialize(inst, attr, value, type) + super(inst, attr, value) + @type = type + end + + + + def enforce_type(type, value) + return true if value.nil? + + if type == :boolean + return self.enforce_type_boolean(value) + elsif type.eql?(:uri) || type.eql?(RDF::URI) + return self.enforce_type_uri(value) + elsif type.eql?(:uri) || type.eql?(Array) + return value.is_a? Array + else + if value.is_a? Array + return value.select{|x| !x.is_a?(type)}.empty? + else + return value.is_a? type + end + end + + end + + def enforce_type_uri(value) + return true if value.nil? + + value.is_a?(RDF::URI) && value.valid? + end + + def enforce_type_boolean(value) + if value.kind_of? Array + return value.select { |x| !is_a_boolean?(x) }.empty? + else + return is_a_boolean?(value) + end + end + + def is_a_boolean?(value) + return (value.class == TrueClass) || (value.class == FalseClass) + end + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/distinct_of.rb b/lib/goo/validators/implementations/distinct_of.rb new file mode 100644 index 00000000..2e93313b --- /dev/null +++ b/lib/goo/validators/implementations/distinct_of.rb @@ -0,0 +1,34 @@ +module Goo + module Validators + class DistinctOf < ValidatorBase + include Validator + + key :distinct_of_ + + error_message ->(obj) { "`#{@attr}` must be distinct of `#{@property}`"} + + validity_check -> (obj) do + return true if self.class.empty_value?(@value) + + self.distinct?(@inst, @property, @value) + end + + def initialize(inst, attr, value, key) + super(inst, attr, value) + @property = self.class.property(key) + end + + + + def distinct?(inst, property, value) + target_values = self.class.attr_value(property, inst) + current_values = Array(value) + + !current_values.any?{ |x| self.find_any?(target_values, x)} + end + def find_any?(array, value) + array.any?{ |x| self.class.equivalent_value?(value, x)} + end + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/existence.rb b/lib/goo/validators/implementations/existence.rb new file mode 100644 index 00000000..fcf04d61 --- /dev/null +++ b/lib/goo/validators/implementations/existence.rb @@ -0,0 +1,17 @@ +module Goo + module Validators + class Existence < ValidatorBase + include Validator + + key :existence + + error_message ->(obj) { "`#{@value}` value cannot be nil"} + + validity_check -> (obj) do + not self.class.empty_value?(@value) + end + + + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/inverse_of.rb b/lib/goo/validators/implementations/inverse_of.rb new file mode 100644 index 00000000..60518af3 --- /dev/null +++ b/lib/goo/validators/implementations/inverse_of.rb @@ -0,0 +1,35 @@ +module Goo + module Validators + class InverseOf < ValidatorBase + include Validator + + key :inverse_of_ + + error_message ->(obj) { + "`#{@attr}` must be the inverse of ``#{@property}``" + } + + validity_check -> (obj) do + return true if self.class.empty_value?(@value) + + return Array(@value).select{|x| not inverse?(@property,x, @inst)}.empty? + end + + def initialize(inst, attr, value, key) + super(inst, attr, value) + @property = self.class.property(key) + end + + def inverse?(attr, value, source_object) + if self.class.respond_to?(attr, value) + target_values = self.class.attr_value(attr, value) + return target_values.any?{ |target_object| self.class.equivalent_value?(target_object, source_object)} + end + + false + end + + + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/object_type.rb b/lib/goo/validators/implementations/object_type.rb new file mode 100644 index 00000000..3af97b41 --- /dev/null +++ b/lib/goo/validators/implementations/object_type.rb @@ -0,0 +1,46 @@ +module Goo + module Validators + class ObjectType < ValidatorBase + include Validator + + key :object_type + + error_message ->(obj) { + if @error.eql?(:persistence) + "`#{@attr}` contains non persistent models. It will not save." + else + "`#{@attr}` contains values that are not instance of `#{@model_range.model_name}`" + end + } + + validity_check -> (obj) do + values = Array(@value) + + unless values.select { |v| !self.is_a_model?(v, @model_range) }.empty? + @error = :no_range + return false + end + + unless values.select { |v| !self.persistent?(v) }.empty? + @error = :persistence + return false + end + + return true + end + + def initialize(inst, attr, value, model_range) + super(inst, attr, value) + @model_range = model_range + end + + def is_a_model?(value, model_range) + value.is_a?(model_range) || (value.respond_to?(:klass) && value[:klass] == model_range) + end + + def persistent?(value) + value.respond_to?(:klass) || value.persistent? + end + end + end +end diff --git a/lib/goo/validators/implementations/superior_equal_to.rb b/lib/goo/validators/implementations/superior_equal_to.rb new file mode 100644 index 00000000..91508f30 --- /dev/null +++ b/lib/goo/validators/implementations/superior_equal_to.rb @@ -0,0 +1,26 @@ +module Goo + module Validators + class SuperiorEqualTo < ValidatorBase + include Validator + + key :superior_equal_to_ + + error_message ->(obj) { + "`#{@attr}` must be superior or equal to `#{@property}`" + } + + validity_check -> (obj) do + target_values = self.class.attr_value(@property, @inst) + + return true if target_values.empty? + + return @value >= target_values.first + end + + def initialize(inst, attr, value, key) + super(inst, attr, value) + @property = self.class.property(key) + end + end + end +end diff --git a/lib/goo/validators/implementations/symmetric.rb b/lib/goo/validators/implementations/symmetric.rb new file mode 100644 index 00000000..e9ceb3f4 --- /dev/null +++ b/lib/goo/validators/implementations/symmetric.rb @@ -0,0 +1,33 @@ +module Goo + module Validators + class Symmetric < ValidatorBase + include Validator + + key :symmetric + + error_message ->(obj) { + "`#{@attr}` must be symmetric" + } + + validity_check -> (obj) do + return true if self.class.empty_value?(@value) + + return Array(@value).select{|x| not symmetric?(@attr,x, @inst)}.empty? + end + + def symmetric?(attr, value, source_object) + if respond_to?(attr, value) + target_values = self.class.attr_value(attr, value) + return target_values.any?{ |target_object| self.class.equivalent_value?(target_object, source_object)} + end + + return false + end + + def respond_to?(attr, object) + object && object.respond_to?(attr) + end + + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/unique.rb b/lib/goo/validators/implementations/unique.rb new file mode 100644 index 00000000..feb13a4b --- /dev/null +++ b/lib/goo/validators/implementations/unique.rb @@ -0,0 +1,20 @@ +module Goo + module Validators + class Unique < ValidatorBase + include Validator + + key :unique + + error_message ->(obj) { "`#{@attr}` must be unique. " + + "There are other model instances with the same attribute value `#{@value}`."} + + validity_check -> (obj) do + return true if @value.nil? + + !Goo::SPARQL::Queries.duplicate_attribute_value?(@inst,@attr) + end + + + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/value_range.rb b/lib/goo/validators/implementations/value_range.rb new file mode 100644 index 00000000..71440bcf --- /dev/null +++ b/lib/goo/validators/implementations/value_range.rb @@ -0,0 +1,49 @@ +module Goo + module Validators + class ValueRange < ValidatorBase + include Validator + + keys [:min_, :max_] + + error_message ->(obj) { + value = self.value_length(@value) + if @type == :min + "#{@attr} value has length `#{value}` and the min length is `#{@range}`" + else + "#{@attr} value has length `#{value}` and the max length is `#{@range}`" + end + } + + validity_check -> (obj) do + self.enforce_range_length(@type, @range, @value) + end + + def initialize(inst, attr, value, type) + super(inst, attr, value) + @type = type.index("max_") ? :max : :min + @range = self.range(type) + end + + def enforce_range_length(type_range, range, value) + return false if value.nil? + value_length = self.value_length(value) + + (type_range.eql?(:min) && (value_length >= range)) || (type_range.eql?(:max) && (value_length <= range)) + end + + def range(opt) + opt[4..opt.length].to_i + end + + def value_length(value) + return 0 if value.nil? + + if value.is_a?(String) || value.is_a?(Array) + value.length + else + value + end + end + end + end +end diff --git a/lib/goo/validators/validator.rb b/lib/goo/validators/validator.rb new file mode 100644 index 00000000..e7db80a4 --- /dev/null +++ b/lib/goo/validators/validator.rb @@ -0,0 +1,111 @@ +module Goo + module Validators + + class ValidatorBase + + def initialize(inst, attr, value) + @inst = inst + @attr = attr + @value = value + end + + def valid? + self.instance_eval(&self.class.validator_settings[:check]) + end + + def error + message = self.class.validator_settings[:message] + if message.is_a? Proc + self.instance_eval(&message) + else + message + end + end + + end + + module Validator + + def self.included(base) + base.extend(ClassMethods) + end + + + module ClassMethods + + def key(id) + validator_settings[:id] = id + end + + def keys(ids) + key ids + end + + def validity_check(block) + validator_settings[:check] = block + end + + def error_message(message) + validator_settings[:message] = message + end + + def validator_settings + @validator_settings ||= {} + end + + def ids + Array(validator_settings[:id]) + end + + def property(key) + key[ids.first.size..key.size].to_sym + end + + def respond_to?(attr, object) + object && object.respond_to?(attr) + end + + + def equivalent_value?(object1, object2) + if object1.respond_to?(:id) && object2.respond_to?(:id) + object1.id.eql?(object2.id) + else + object2 == object1 + end + end + + def attr_value(attr, object) + Array(object.send(attr)) + end + + def empty_value?(value) + value.nil? || empty?(value) || empty_array?(value) + end + def empty?(value) + empty_string?(value) || empty_to_s?(value) + end + def empty_string?(string) + string.is_a?(String) && string.strip.empty? + end + + def empty_to_s?(object) + begin + object && object.to_s&.strip.empty? + rescue + return false + end + end + + def empty_array?(array) + array.is_a?(Array) && array && array.reject{|x| x.nil? || empty?(x)}.empty? + end + end + + + + + + end + end +end + diff --git a/test/data/yaml_scheme_model_test.yml b/test/data/yaml_scheme_model_test.yml new file mode 100644 index 00000000..fd8c4921 --- /dev/null +++ b/test/data/yaml_scheme_model_test.yml @@ -0,0 +1,11 @@ +name: + label: 'Name' + description: 'Person name' + equivalents: ['test:name' , 'test2:name', 'test3:person_name'] + help: 'Put the person name as string' + example: 'John' +nationality: + label: 'Person nationality' + enforcedValues: {'fr': 'france', 'us': 'USA'} + + diff --git a/test/models.rb b/test/models.rb index cd606eed..7d490a4a 100644 --- a/test/models.rb +++ b/test/models.rb @@ -101,7 +101,10 @@ def self.create_test_case_data end def self.delete_test_case_data - objects = [Student, University, Program, Category, Address] + delete_all [Student, University, Program, Category, Address] + end + + def self.delete_all(objects) objects.each do |obj| obj.where.include(obj.attributes).each do |i| i.delete diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index c444e829..9a8f03df 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -2,6 +2,22 @@ GooTest.configure_goo +class NewPersonModel < Goo::Base::Resource + model :person_model_new, name_with: :name + attribute :name, type: :string, enforce: [ :existence, :unique] + attribute :multiple_values, type: [:list, :integer], enforce: [ :existence, :min_3, :max_5 ] + attribute :one_number, type: :integer,enforce: [ :existence ] #by default not a list + attribute :birth_date, type: :date_time, enforce: [ :existence ] + + attribute :created, type: DateTime , + default: lambda { |record| DateTime.now }, + namespace: :omv + + attribute :friends, type: NewPersonModel , enforce: [ :existence] + attribute :status, type: :status, enforce: [ :existence], + default: lambda { |record| StatusModel.find("single") } +end + class StatusModel < Goo::Base::Resource model :status_model, name_with: :name attribute :description, enforce: [ :existence, :unique] @@ -32,13 +48,46 @@ def initialize(attributes = {}) end end + +class YamlSchemeModelTest < Goo::Base::Resource + model :yaml_scheme_model_test, name_with: :name, scheme: 'test/data/yaml_scheme_model_test.yml' + attribute :name, enforce: [ :existence, :string, :unique] + attribute :last_name, enforce: [ :existence, :string, :unique] + attribute :birth_date, enforce: [ :existence, :date_time ] + attribute :nationality, enforce: [ :existence, :string ] + attribute :created, enforce: [ DateTime ], + default: lambda { |record| DateTime.now }, + namespace: :omv + attribute :friends, enforce: [ :existence , PersonModel] + attribute :status, enforce: [ :existence, :status ], + default: lambda { |record| StatusModel.find("single") } +end + + class TestDSLSeeting < MiniTest::Unit::TestCase def initialize(*args) super(*args) end + def test_data_type_dsl + _test_attributes_enforce NewPersonModel + end + def test_attributes_set_get + _test_attributes_enforce PersonModel + end + + def test_default_value + #default is on save ... returns` person = PersonModel.new + assert_equal nil, person.created + end + + + private + def _test_attributes_enforce(model) + person = model.new + model_key_name = model.model_name assert(person.respond_to? :id) assert(person.kind_of? Goo::Base::Resource) assert !person.valid? @@ -67,7 +116,7 @@ def test_attributes_set_get assert !person.valid? assert !person.errors[:birth_date] - person.birth_date = "X" + person.birth_date = "X" assert !person.valid? assert person.errors[:birth_date][:date_time] @@ -103,17 +152,17 @@ def test_attributes_set_get person.multiple_values << 99 end - friends = [PersonModel.new , PersonModel.new] + friends = [model.new , model.new] person.friends = friends assert !person.valid? assert person.errors[:friends][:no_list] - person.friends = PersonModel.new + person.friends = model.new assert !person.valid? - assert person.errors[:friends][:person_model] + assert person.errors[:friends][model_key_name] person.friends = "some one" assert !person.valid? - assert person.errors[:friends][:person_model] - person.friends = PersonModel.new + assert person.errors[:friends][model_key_name] + person.friends = model.new person.one_number = 99 assert !person.valid? @@ -127,7 +176,7 @@ def test_attributes_set_get assert !person.valid? assert person.errors[:one_number][:no_list] - person.one_number = 99 + person.one_number = 99 assert_equal(99, person.one_number) assert !person.valid? assert !person.errors[:one_number] @@ -139,10 +188,25 @@ def test_attributes_set_get assert !person.valid? end - def test_default_value - #default is on save ... returns` - person = PersonModel.new - assert_equal nil, person.created + def test_model_with_yaml_scheme + + settings = YamlSchemeModelTest.model_settings + attributes_settings = settings[:attributes] + + + assert_equal "test/data/yaml_scheme_model_test.yml", settings[:scheme] + + assert_equal 'Name', attributes_settings[:name][:label] + assert_equal 'Person name', attributes_settings[:name][:description] + assert_equal %w[test:name test2:name test3:person_name], attributes_settings[:name][:equivalents] + assert_equal 'Put the person name as string', attributes_settings[:name][:help] + assert_equal 'John', attributes_settings[:name][:example] + + + assert_equal 'Person nationality', attributes_settings[:nationality][:label] + hash = {fr: 'france', us: 'USA'} + assert_equal hash, attributes_settings[:nationality][:enforcedValues] + end end diff --git a/test/test_update_callbacks.rb b/test/test_update_callbacks.rb new file mode 100644 index 00000000..bef38a68 --- /dev/null +++ b/test/test_update_callbacks.rb @@ -0,0 +1,53 @@ +require_relative 'test_case' + + +require_relative 'models' + +class TestUpdateCallBack < Goo::Base::Resource + model :update_callback_model, name_with: :code + attribute :code, enforce: [:string, :existence] + attribute :name, enforce: [:string, :existence] + attribute :first_name, onUpdate: :update_name + attribute :last_name, onUpdate: :update_name + + + def update_name(inst, attr) + self.name = self.first_name + self.last_name + end +end + +class TestUpdateCallBacks < MiniTest::Unit::TestCase + + def self.before_suite + GooTestData.delete_all [TestUpdateCallBack] + end + + def self.after_suite + GooTestData.delete_all [TestUpdateCallBack] + end + + + def test_update_callback + p = TestUpdateCallBack.new + p.code = "1" + p.name = "name" + p.first_name = "first_name" + p.last_name = "last_name" + + assert p.valid? + p.save + + p.bring_remaining + + assert_equal p.first_name + p.last_name, p.name + + p.last_name = "last_name2" + p.save + + p.bring_remaining + assert_equal "last_name2", p.last_name + assert_equal p.first_name + p.last_name, p.name + end + +end + diff --git a/test/test_validators.rb b/test/test_validators.rb new file mode 100644 index 00000000..8795fccf --- /dev/null +++ b/test/test_validators.rb @@ -0,0 +1,427 @@ +require_relative 'test_case' + +GooTest.configure_goo +require_relative 'models' + +class Person < Goo::Base::Resource + model :person_model_validators, name_with: :name + attribute :name, enforce: [:string, :existence] + attribute :last_name, enforce: [:string] + attribute :multiple_values, enforce: [ :list, :integer] + attribute :one_number, enforce: [ :integer ] + attribute :birth_date, enforce: [ :date_time ] + attribute :male, enforce: [:boolean] + attribute :social, enforce: [:uri] + attribute :weight, enforce: [:float] + attribute :friends, enforce: [Person, :list] +end + + +class RangeTestModel < Goo::Base::Resource + model :range_test_model, name_with: :name + attribute :name, enforce: [:string, :existence, :min_3, :max_5] + attribute :multiple_values, enforce: [ :list, :integer, :min_3, :max_5 ] + attribute :one_number, enforce: [ :integer, :min_3, :max_5] + attribute :weight, enforce: [:float, :min_3, :max_5] +end + +class SymmetricTestModel < Goo::Base::Resource + model :symmetric_test_model, name_with: :name + attribute :name, enforce: [:unique, :existence] + attribute :friend, enforce: [SymmetricTestModel, :symmetric] + attribute :friends, enforce: [SymmetricTestModel, :symmetric, :list] +end + +class DistinctOfTestModel < Goo::Base::Resource + model :distinct_of_test_model, name_with: :name + attribute :name, enforce: [:unique, :existence, :string] + attribute :last_name, enforce: [:distinct_of_name, :string] + attribute :names, enforce: [:list, :string] + attribute :last_names, enforce: [:list, :distinct_of_names, :string] +end + +class SuperiorToTestModel < Goo::Base::Resource + model :superior_to_test_model, name_with: :name + attribute :name, enforce: [:unique, :existence, :string] + attribute :birth_date, enforce: [:date_time] + attribute :death_date, enforce: [:superior_equal_to_birth_date, :date_time] +end + +class InverseOfTestModel < Goo::Base::Resource + model :inverse_test_model_one, name_with: :name + attribute :name, enforce: [:unique, :existence, :string] + attribute :state, enforce: [InverseOfTestModel] + attribute :city, enforce: [:inverse_of_state, InverseOfTestModel] + attribute :states, enforce: [InverseOfTestModel, :list] + attribute :cities, enforce: [:inverse_of_states, InverseOfTestModel, :list] +end + + +class ProcValidatorsTestModel < Goo::Base::Resource + model :proc_validator_test_model, name_with: :name + attribute :name, enforce: [:unique, :equal_to_test] + attribute :last_name, enforce: [:unique, ->(inst, attr) { equal_to_test_2(inst, attr)}] + + + def self.equal_to_test_2(inst, attr) + value = inst.send(attr) + + return nil if value && value.eql?('test 2') + + [:equal_to_test_2, "#{attr} need to be equal to `test 2`"] + end + + def equal_to_test(inst, attr) + value = inst.send(attr) + + return nil if value && value.eql?('test') + + [:equal_to_test, "#{attr} need to be equal to `test`"] + end +end + +class TestValidators < MiniTest::Unit::TestCase + + def self.before_suite + begin + GooTestData.create_test_case_data + rescue Exception => e + puts e.message + end + end + + def self.after_suite + GooTestData.delete_test_case_data + GooTestData.delete_all [SymmetricTestModel, InverseOfTestModel] + end + + + def test_unique_validator + + s = Student.new + s.birth_date = DateTime.parse('1978-01-01') + + s.name = "Susan" + + refute s.valid? + + s.name = "new" + + assert s.valid? + end + + def test_existence_validator + s = Student.new + + refute s.valid? + + assert s.errors[:name][:existence] + assert s.errors[:birth_date][:existence] + + + s.name = '' + s.birth_date = '' + assert s.errors[:name][:existence] + assert s.errors[:birth_date][:existence] + + + s.name = 'new' + s.birth_date = DateTime.parse('1978-01-01') + + assert s.valid? + end + + def test_datatype_validators + p = Person.new + p.name = 'test' + #nil values are valid + assert p.valid? + + p.last_name = false + p.multiple_values = "hello" + p.one_number = "hello" + p.birth_date = 100 + p.male = "ok" + p.social = 100 + p.weight = 100 + + + #wrong types are not valid + refute p.valid? + assert p.errors[:last_name][:string] + assert p.errors[:multiple_values][:list] + assert p.errors[:multiple_values][:integer] + assert p.errors[:one_number][:integer] + assert p.errors[:birth_date][:date_time] + assert p.errors[:male][:boolean] + assert p.errors[:social][:uri] + + p.last_name = "hello" + p.multiple_values = [22,11] + p.one_number = 12 + p.birth_date = DateTime.parse('1978-01-01') + p.male = true + p.social = RDF::URI.new('https://test.com/') + p.weight = 100.0 + #good types are valid + assert p.valid? + end + + def test_uri_datatype_validator + p = Person.new + p.name = 'test' + + assert p.valid? + + p.social = RDF::URI.new('') #empty uri + refute p.valid? + + p.social = RDF::URI.new('wrong/uri') + refute p.valid? + + p.social = RDF::URI.new('https://test.com/') + assert p.valid? + end + + def test_object_type_validator + p = Person.new + p.name = 'test' + p.friends = [1] + + refute p.valid? + + new_person = Person.new + p.friends = [new_person] + + refute p.valid? + + new_person.persistent = true + p.friends = [new_person] + + assert p.valid? + end + + def test_value_range_validator + p = RangeTestModel.new + + p.name = "h" + p.multiple_values = [22,11] + p.one_number = 1 + p.weight = 1.1 + + refute p.valid? + assert p.errors[:name][:min] + assert p.errors[:multiple_values][:min] + assert p.errors[:one_number][:min] + assert p.errors[:weight][:min] + + p.name = "hello hello" + p.multiple_values = [22,11,11,33,44, 55, 66] + p.one_number = 12 + p.weight = 12.1 + + refute p.valid? + assert p.errors[:name][:max] + assert p.errors[:multiple_values][:max] + assert p.errors[:one_number][:max] + assert p.errors[:weight][:max] + + p.name = "hello" + p.multiple_values = [22,11,11,3] + p.one_number = 4 + p.weight = 3.1 + + assert p.valid? + + end + + def test_symmetric_validator_no_list + p1 = SymmetricTestModel.new + p2 = SymmetricTestModel.new + p3 = SymmetricTestModel.new + p1.name = "p1" + p2.name = "p2" + p3.name = "p3" + + p2.save + p3.save + + p1.friend = p2 + + refute p1.valid? + assert p1.errors[:friend][:symmetric] + + p3.friend = p1 + + refute p1.valid? + + p2.friend = p1 + p1.friend = p2 + + assert p1.valid? + + p1.save + + assert p2.valid? + GooTestData.delete_all [SymmetricTestModel] + end + + def test_symmetric_validator_list + p1 = SymmetricTestModel.new + p2 = SymmetricTestModel.new + p3 = SymmetricTestModel.new + p4 = SymmetricTestModel.new + p1.name = "p1" + p2.name = "p2" + p3.name = "p3" + p4.name = "p4" + + p2.save + p3.save + p4.save + + p1.friends = [p2, p3] + + refute p1.valid? + assert p1.errors[:friends][:symmetric] + + p2.friends = [p1, p3, p4] + p3.friends = [p2] + p4.friends = [p2] + + refute p1.valid? + refute p2.valid? + + + p3.friends = [p2, p1] + + assert p1.valid? + p1.save + + assert p3.valid? + p3.save + + + assert p2.valid? + + p2.save + + assert p4.valid? + GooTestData.delete_all [SymmetricTestModel] + end + + def test_distinct_of_validator + p = DistinctOfTestModel.new + p.name = "p1" + p.last_name = "p1" + p.names = ["p1", "p2"] + p.last_names = ["p1", "p2"] + + + refute p.valid? + + p.last_name = "last name" + p.last_names = ["last name 1", "last name 2"] + + assert p.valid? + + p.last_name = "last name" + p.last_names = ["last name 1", "p2"] + + refute p.valid? + + p.last_name = "" + p.last_names = [] + + assert p.valid? + end + + def test_superior_equal_to_validator + p = SuperiorToTestModel.new + p.name = "p" + p.birth_date = DateTime.parse('1998-12-02') + p.death_date = DateTime.parse('1995-12-02') + + refute p.valid? + assert p.errors[:death_date][:superior_equal_to_birth_date] + + p.death_date = DateTime.parse('2023-12-02') + + assert p.valid? + + p.birth_date = nil + + assert p.valid? + end + + def test_inverse_of_validator_no_list + GooTestData.delete_all [InverseOfTestModel] + p1 = InverseOfTestModel.new + p2 = InverseOfTestModel.new + + p1.name = 'p1' + p2.name = 'p2' + + + p2.save + + p1.city = p2 + + refute p1.valid? + assert p1.errors[:city][:inverse_of_state] + + + p2.state = p1 + + assert p1.valid? + + end + + def test_inverse_of_validator_list + GooTestData.delete_all [InverseOfTestModel] + p1 = InverseOfTestModel.new + p2 = InverseOfTestModel.new + p3 = InverseOfTestModel.new + p4 = InverseOfTestModel.new + + p1.name = 'p1' + p2.name = 'p2' + p3.name = 'p3' + p4.name = 'p4' + + p2.save + p3.save + + p1.cities = [p2,p3] + + refute p1.valid? + assert p1.errors[:cities][:inverse_of_states] + + p2.states = [p1, p4] + p3.states = [p2, p4] + + refute p1.valid? + assert p1.errors[:cities][:inverse_of_states] + + p3.states = [p2, p4, p1] + + assert p1.valid? + + end + + + def test_proc_validators + p = ProcValidatorsTestModel.new + p.name = "hi" + p.last_name = "hi" + + refute p.valid? + assert p.errors[:name][:equal_to_test] + assert p.errors[:last_name][:equal_to_test_2] + + p.name = "test" + p.last_name = "test 2" + + assert p.valid? + end +end From 74ea47defc7f6260b045a6c6997bbe6a59c7bf62 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Tue, 5 Dec 2023 12:14:56 +0100 Subject: [PATCH 050/106] Merge to master - Release 2.3.5 - Optimize order by and filters queries (#35) * add tests for the new dsl to write property data types * append the property :type values to the :enforce array * update solution mapper to support multilingual * update solution mapper to support multilingual * fix typo ( name ) * add validators tests file * add validator interface module * implement data_type validator * migrate existence validator to the new DSL * migrate uniqueness validator to the new DSL * implement object_type validator with the new DSL * migrate range validator to the new DSL * refactor the enforce module to use the new validators implementation * force to regenerate the id when we update related attribute (named_with) * require the validators implementation * update existence validator to not accept empty to_s objects * update exist? test * add symmetric validator tests for no_list and list cases * implement symmetric validator * move re used methods to the parent class * update symmetric code and error message * add distinct of validator tests * implement distinct_of validator * add superior_equal_to validator tests * extract property method to ValidatorBase class * implement superior_equal_to validator * add inverse of validator tests * implement inverse_of validator * use the class method property in distinct of * add proc validator tests * add instance proc validators * fix call_proc validator to test if the returned values are correct * add model_with_yaml_scheme test * implement YAMLScheme module * use YAMLScheme module in Settings module * use platform lang and code refacto * filter by lang in properties * do some refactoring * add unmapped_get to goo resources * update lang filter module to support requested_lang and portal_lang * use the new lang filter module in the solution_mapper * remove the usage of the old lang filter module in map_attributes * add request language global variable * fix datatype check for list values * remove old unused test if clause * for no unmapped values cast them to object before sending * for resource unmapped_set merge new value if an array * prevent add_object_to_model if no_lang and previous value exist * move from the mapper lang_filter related code to lang_filter module * move internal lang filter module methods to private section * add request_store gem to save request language globally * save requested language in model_load options * force requested_lang and portal_langs to be upcase and symbol * change methodes/vars names * get the last item in objects instead of passing the current object * Revert "get the last item in objects instead of passing the current object" This reverts commit 996922a9dfae06da9a7214e1322e1d403d3f1b39. * handle this case where values is nil in save_model_values * handle the casf of nil values for the SuperiorEqualTo validator * add onUpdate callback tests * implement enforce_callback to run an attribute callback * move the attribute default callback to the save method * implement onUpdate DSL in the ressource settings * call to the attributes onUpdate callback in the save method * in validators bring attribute if needed * make superior_equal_to works for list attributes * add email validator test * implement email validator * add filters patterns to select variables * make make regex filter no-case sensitive * if requested_lang = 'all' return all * support select multilanguage * show the values with their corresponding language * use @attributes_to_translate * change methode name * remove platform languages * add complex_order_by unit test * refactor query_builder to extract internal_variables as instance variable * update order_by to work for joined patterns (object attributes) * downcase lang key * Fix the issue of undefined 'id' of the language filter module * Show literal attribute if we requested all the languages * Use portal language by default in the language filter module * group unmapped properties by lang * Feature: group unmapped properties by language (#38) * group unmapped properties by lang * downcase language keys of unmapped properties --------- Co-authored-by: Syphax bouazzouni * assert that pre in an array in get_value_object * add label to attributes_to_translate * update define_method * update solution mapper * update get_preload_value * Feature: Support multi lingual - add show_language argument to the attributes getters (#39) * update define_method * update solution mapper * update get_preload_value * fix save_model_values if unmmaped condition * fix getters for list attributes to not take only the first value * remove the languages hash for the unmapped if not a mutli langual asked * move some language helper from the mapper to the lang_filter module * move @requested_lang variable from the mapper to the lang_filter module * remove no more used @attributes_to_translate variable in lang_filter * fix save_model_values method to not save RDF:Literal object but a string * remove not used method in lang filter module * refecator and rename some methods of the lang_filter module * use the new name of the lang filter methods in the solution mapper * replace the getters argument to show languages from :show_all_languages to :show_languages: true * catch transform_values of unmapped if it is nil * change the getters show_all_languages argument from to include_languages * make the map_attributes handle the option showing all the languages * fix order by an attribute that is already filtered * don't add the filtered variables to the select clause of the query * add filters patterns to select variables * fix pagination with order_by with filter that returns empty pages for 4store * include the in the select variables filtered variables * optimize pagination query by not re-doing the filters and order in the include query --------- Co-authored-by: HADDAD Zineddine --- lib/goo/base/resource.rb | 2 +- lib/goo/base/where.rb | 5 + lib/goo/sparql/mixins/solution_lang_filter.rb | 4 +- lib/goo/sparql/query_builder.rb | 87 +- lib/goo/sparql/solutions_mapper.rb | 878 +++++++++--------- lib/goo/validators/enforce.rb | 2 + .../validators/implementations/data_type.rb | 27 +- lib/goo/validators/implementations/email.rb | 22 + .../implementations/superior_equal_to.rb | 4 +- lib/goo/validators/validator.rb | 2 + test/test_validators.rb | 11 +- test/test_where.rb | 13 + 12 files changed, 569 insertions(+), 488 deletions(-) create mode 100644 lib/goo/validators/implementations/email.rb diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 26ac5859..02709f5e 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -137,7 +137,7 @@ def unmmaped_to_array end def unmapped(*args) - @unmapped.transform_values do |language_values| + @unmapped&.transform_values do |language_values| self.class.not_show_all_languages?(language_values, args) ? language_values.values.flatten: language_values end end diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 5bc0fa8c..81cd26ce 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -209,6 +209,11 @@ def process_query_intl(count=false) options_load[:ids] = ids if ids models_by_id = {} + if @page_i && (options_load[:models].length > 0) + options_load.delete(:filters) + options_load.delete(:order_by) + end + if (@page_i && options_load[:models].length > 0) || (!@page_i && (@count.nil? || @count > 0)) models_by_id = Goo::SPARQL::Queries.model_load(options_load) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index 8980dcdc..49d62c8e 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -25,8 +25,8 @@ def fill_models_with_all_languages(models_by_id) end - def set_model_value(model, predicate, values) - set_value(model, predicate, values) do + def set_model_value(model, predicate, values, value) + set_value(model, predicate, value) do model.send("#{predicate}=", values, on_load: true) end end diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 40e888d0..31880859 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -25,21 +25,18 @@ def build_select_query(ids, variables, graphs, patterns, query_options, properties_to_include) patterns = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) - - aggregate_projections, aggregate_vars, - variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, - @klass, @unions, variables) - - @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) variables, patterns = add_some_type_to_id(patterns, query_options, variables) - + aggregate_projections, aggregate_vars, variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, @klass, @unions, variables) query_filter_str, patterns, optional_patterns, filter_variables = filter_query_strings(@collection, graphs, @klass, optional_patterns, patterns, @query_filters) + @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) + order_by_str, order_variables = order_by_string + variables = [] if @count variables.delete :some_type - select_distinct(variables, aggregate_projections, filter_variables) + select_distinct(variables, aggregate_projections, filter_variables, order_variables) .from(graphs) .where(patterns) .union_bind_in_where(properties_to_include) @@ -55,7 +52,10 @@ def build_select_query(ids, variables, graphs, patterns, @query.union(*@unions) unless @unions.empty? ids_filter(ids) if ids - order_by if @order_by + + + @query.order_by(*order_by_str) if @order_by + put_query_aggregate_vars(aggregate_vars) if aggregate_vars count if @count @@ -117,16 +117,17 @@ def put_query_aggregate_vars(aggregate_vars) self end - def order_by - order_by_str = @order_by.map do |attr, order| + def order_by_string + order_variables = [] + order_str = @order_by&.map do |attr, order| if order.is_a?(Hash) sub_attr, order = order.first - attr = @internal_variables_map[sub_attr] + attr = @internal_variables_map.select{ |internal_var, attr_var| attr_var.eql?({attr => sub_attr}) || attr_var.eql?(sub_attr)}.keys.last end + order_variables << attr "#{order.to_s.upcase}(?#{attr})" end - @query.order_by(*order_by_str) - self + [order_str,order_variables] end def from(graphs) @@ -141,10 +142,11 @@ def from(graphs) self end - def select_distinct(variables, aggregate_projections, filter_variables) + def select_distinct(variables, aggregate_variables, filter_variables, order_variables) select_vars = variables.dup - reject_aggregations_from_vars(select_vars, aggregate_projections) if aggregate_projections - select_vars = (select_vars + filter_variables).uniq if @page # Fix for 4store pagination with a filter + reject_aggregations_from_vars(select_vars, aggregate_variables) if aggregate_variables + # Fix for 4store pagination with a filter https://github.com/ontoportal-lirmm/ontologies_api/issues/25 + select_vars = (select_vars + filter_variables + order_variables).uniq if @page @query = @query.select(*select_vars).distinct(true) self end @@ -165,23 +167,24 @@ def ids_filter(ids) def patterns_for_match(klass, attr, value, graphs, patterns, unions, internal_variables, subject = :id, in_union = false, in_aggregate = false, query_options = {}, collection = nil) + new_internal_var = value if value.respond_to?(:each) || value.instance_of?(Symbol) next_pattern = value.instance_of?(Array) ? value.first : value #for filters next_pattern = { next_pattern => [] } if next_pattern.instance_of?(Symbol) - value = "internal_join_var_#{internal_variables.length}".to_sym + new_internal_var = "internal_join_var_#{internal_variables.length}".to_sym if in_aggregate - value = "#{attr}_agg_#{in_aggregate}".to_sym + new_internal_var = "#{attr}_agg_#{in_aggregate}".to_sym end - internal_variables << value - @internal_variables_map[attr] = value + internal_variables << new_internal_var + @internal_variables_map[new_internal_var] = value.empty? ? attr : {attr => value} end add_rules(attr, klass, query_options) graph, pattern = - query_pattern(klass, attr, value: value, subject: subject, collection: collection) + query_pattern(klass, attr, value: new_internal_var, subject: subject, collection: collection) if pattern if !in_union patterns << pattern @@ -194,7 +197,7 @@ def patterns_for_match(klass, attr, value, graphs, patterns, unions, range = klass.range(attr) next_pattern.each do |next_attr, next_value| patterns_for_match(range, next_attr, next_value, graphs, - patterns, unions, internal_variables, subject = value, + patterns, unions, internal_variables, subject = new_internal_var, in_union, in_aggregate, collection = collection) end end @@ -270,15 +273,35 @@ def init_order_by(count, klass, order_by, optional_patterns, variables, patterns order_by.each do |attr, direction| if direction.is_a?(Hash) + # TODO this part can be improved/refactored, the complexity was added because order by don't work + # if the pattern is in the mandatory ones (variable `patterns`) + # and optional (variable `optional_patterns`) at the same time sub_attr, direction = direction.first graph_match_iteration = Goo::Base::PatternIteration.new(Goo::Base::Pattern.new({attr => [sub_attr]})) old_internal = internal_variables.dup + old_patterns = optional_patterns.dup + walk_pattern(klass, graph_match_iteration, graphs, optional_patterns, @unions, internal_variables, in_aggregate = false, query_options, @collection) - variables << (internal_variables - old_internal).last + new_variables = (internal_variables - old_internal) + internal_variables.delete(new_variables) + new_patterns = optional_patterns - old_patterns + already_existent_pattern = patterns.select{|x| x[1].eql?(new_patterns.last[1])}.first + + if already_existent_pattern + already_existent_variable = already_existent_pattern[2] + optional_patterns = old_patterns + key = @internal_variables_map.select{|key, value| key.eql?(new_variables.last)}.keys.first + @internal_variables_map[key] = (already_existent_variable || new_variables.last) if key + + #variables << already_existent_variable + else + #variables << new_variables.last + end + else quad = query_pattern(klass, attr) optional_patterns << quad[1] - variables << attr + #variables << attr end #patterns << quad[1] @@ -325,7 +348,12 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, end filter_var = inspected_patterns[filter_pattern_match] - unless filter_operation.value.instance_of?(Goo::Filter) + if filter_operation.value.instance_of?(Goo::Filter) + filter_operations << "#{sparql_op_string(filter_operation.operator)}" + query_filter_sparql(klass, filter_operation.value, filter_patterns, + filter_graphs, filter_operations, + internal_variables, inspected_patterns, collection) + else case filter_operation.operator when :unbound filter_operations << "!BOUND(?#{filter_var.to_s})" @@ -349,11 +377,6 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, " #{value.to_ntriples}") end - else - filter_operations << "#{sparql_op_string(filter_operation.operator)}" - query_filter_sparql(klass, filter_operation.value, filter_patterns, - filter_graphs, filter_operations, - internal_variables, inspected_patterns, collection) end end end @@ -399,7 +422,7 @@ def add_some_type_to_id(patterns, query_options, variables) end def internal_variables - @internal_variables_map.values + @internal_variables_map.keys end end end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 879c1ff7..64d258d5 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -1,439 +1,439 @@ -module Goo - module SPARQL - class SolutionMapper - BNODES_TUPLES = Struct.new(:id, :attribute) - - def initialize(aggregate_projections, bnode_extraction, embed_struct, - incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables, ids, options) - - @aggregate_projections = aggregate_projections - @bnode_extraction = bnode_extraction - @embed_struct = embed_struct - @incl_embed = incl_embed - @klass_struct = klass_struct - @models_by_id = models_by_id - @properties_to_include = properties_to_include - @unmapped = unmapped - @variables = variables - @ids = ids - @klass = options[:klass] - @read_only = options[:read_only] - @incl = options[:include] - @count = options[:count] - @collection = options[:collection] - @options = options - end - - def map_each_solutions(select) - found = Set.new - objects_new = {} - list_attributes = Set.new(@klass.attributes(:list)) - all_attributes = Set.new(@klass.attributes(:all)) - - @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, - list_attributes: list_attributes) - - select.each_solution do |sol| - - next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] - return sol[:count_var].object if @count - - found.add(sol[:id]) - id = sol[:id] - - create_model(id) - - if @bnode_extraction - add_bnode_to_model(sol) - next - end - - if @unmapped - add_unmapped_to_model(sol) - next - end - - if @aggregate_projections - add_aggregations_to_model(sol) - next - end - - predicate = sol[:attributeProperty].to_s.to_sym - - next if predicate.nil? || !all_attributes.include?(predicate) - - object = sol[:attributeObject] - - # bnodes - if bnode_id?(object, predicate) - objects_new = bnode_id_tuple(id, object, objects_new, predicate) - next - end - - objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) - add_object_to_model(id, objects, predicate) - end - - # for this moment we are not going to enrich models , maybe we will use it if the results are empty - @lang_filter.fill_models_with_all_languages(@models_by_id) - - init_unloaded_attributes(found, list_attributes) - - return @models_by_id if @bnode_extraction - - model_set_collection_attributes(@models_by_id, objects_new) - - # remove from models_by_id elements that were not touched - @models_by_id.select! { |k, _m| found.include?(k) } - - models_set_all_persistent(@models_by_id) unless @read_only - - # next level of embed attributes - include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? - - # bnodes - blank_nodes = objects_new.select { |id, _obj| id.is_a?(RDF::Node) && id.anonymous? } - include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? - - models_unmapped_to_array(@models_by_id) if @unmapped - - - @models_by_id - end - - private - - def init_unloaded_attributes(found, list_attributes) - return if @incl.nil? - - # Here we are setting to nil all attributes that have been included but not found in the triplestore - found.uniq.each do |model_id| - m = @models_by_id[model_id] - @incl.each do |attr_to_incl| - is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) - next if attr_to_incl.to_s.eql?('unmapped') || is_handler - - loaded = m.respond_to?('loaded_attributes') && m.loaded_attributes.include?(attr_to_incl) - is_list = list_attributes.include?(attr_to_incl) - is_struct = m.respond_to?(:klass) - - # Go through all models queried - if is_struct - m[attr_to_incl] = [] if is_list && m[attr_to_incl].nil? - elsif is_list && (!loaded || m.send(attr_to_incl.to_s).nil?) - m.send("#{attr_to_incl}=", [], on_load: true) - elsif !loaded && !is_list && m.respond_to?("#{attr_to_incl}=") - m.send("#{attr_to_incl}=", nil, on_load: true) - end - end - end - end - - def get_value_object(id, objects_new, object, list_attributes, predicate) - object = object.object if object && !(object.is_a? RDF::URI) - range_for_v = @klass.range(predicate) - - - if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? - if objects_new.include?(object) - object = objects_new[object] - elsif !range_for_v.inmutable? - pre_val = get_preload_value(id, object, predicate) - object, objects_new = if !@read_only - preloaded_or_new_object(object, objects_new, pre_val, predicate) - else - # depedent read only - preloaded_or_new_struct(object, objects_new, pre_val, predicate) - end - else - object = range_for_v.find(object).first - end - end - - if list_attributes.include?(predicate) - pre = @klass_struct ? @models_by_id[id][predicate] : @models_by_id[id].instance_variable_get("@#{predicate}") - - if object.nil? - object = pre.nil? ? [] : pre - else - object = pre.nil? ? [object] : (pre.dup << object) - object.uniq! - end - - end - [object, objects_new] - end - - def add_object_to_model(id, objects, predicate) - - if @models_by_id[id].respond_to?(:klass) - @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? - elsif !@models_by_id[id].class.handler?(predicate) && - !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id - @lang_filter.set_model_value(@models_by_id[id], predicate, objects) - end - end - - def get_preload_value(id, object, predicate) - pre_val = nil - if predicate_preloaded?(id, predicate) - pre_val = preloaded_value(id, predicate) - pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) - end - pre_val - end - - def preloaded_or_new_object(object, objects_new, pre_val, predicate) - object = pre_val || @klass.range_object(predicate, object) - objects_new[object.id] = object - [object, objects_new] - end - - def preloaded_or_new_struct(object, objects_new, pre_val, predicate) - struct = pre_val || @embed_struct[predicate].new - struct.id = object - struct.klass = @klass.range(predicate) - objects_new[struct.id] = struct - [struct, objects_new] - end - - def preloaded_value(id, predicate) - if !@read_only - @models_by_id[id].instance_variable_get("@#{predicate}") - - else - @models_by_id[id][predicate] - end - end - - def predicate_preloaded?(id, predicate) - @models_by_id[id] && - (@models_by_id[id].respond_to?(:klass) || @models_by_id[id].loaded_attributes.include?(predicate)) - end - - def bnode_id?(object, predicate) - object.is_a?(RDF::Node) && object.anonymous? && @incl.include?(predicate) - end - - def bnode_id_tuple(id, object, objects_new, predicate) - range = @klass.range(predicate) - objects_new[object] = BNODES_TUPLES.new(id, predicate) if range.respond_to?(:new) - objects_new - end - - def add_bnode_to_model(sol) - id = sol[:id] - struct = create_struct(@bnode_extraction, @models_by_id, sol, @variables) - @models_by_id[id].send("#{@bnode_extraction}=", struct) - end - - def create_model(id) - @models_by_id[id] = create_class_model(id, @klass, @klass_struct) unless @models_by_id.include?(id) - end - - - def create_struct(bnode_extraction, models_by_id, sol, variables) - list_attributes = Set.new(@klass.attributes(:list)) - struct = @klass.range(bnode_extraction).new - variables.each do |v| - next if v == :id - - svalue = sol[v] - struct[v] = svalue.is_a?(RDF::Node) ? svalue : svalue.object - end - if list_attributes.include?(bnode_extraction) - pre = models_by_id[sol[:id]].instance_variable_get("@#{bnode_extraction}") - pre = pre ? (pre.dup << struct) : [struct] - struct = pre - end - struct - end - - def create_class_model(id, klass, klass_struct) - klass_model = klass_struct ? klass_struct.new : klass.new - klass_model.id = id - klass_model.persistent = true unless klass_struct - klass_model.klass = klass if klass_struct - klass_model - end - - def models_unmapped_to_array(models_by_id) - models_by_id.each do |_idm, m| - @lang_filter.models_unmapped_to_array(m) - end - end - - - def is_multiple_langs? - return true if @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) - false - end - - def include_bnodes(bnodes, models_by_id) - # group by attribute - attrs = bnodes.map { |_x, y| y.attribute }.uniq - attrs.each do |attr| - struct = @klass.range(attr) - - # bnodes that are in a range of goo ground models - # for example parents and children in LD class models - # we skip this cases for the moment - next if struct.respond_to?(:model_name) - - bnode_attrs = struct.new.to_h.keys - ids = bnodes.select { |_x, y| y.attribute == attr }.map { |_x, y| y.id } - @klass.where.models(models_by_id.select { |x, _y| ids.include?(x) }.values) - .in(@collection) - .include(bnode: { attr => bnode_attrs }).all - end - end - - def include_embed_attributes(incl_embed, objects_new) - incl_embed.each do |attr, next_attrs| - # anything to join ? - attr_range = @klass.range(attr) - next if attr_range.nil? - - range_objs = objects_new.select do |_id, obj| - obj.instance_of?(attr_range) || (obj.respond_to?(:klass) && obj[:klass] == attr_range) - end.values - next if range_objs.empty? - - range_objs.uniq! - query = attr_range.where.models(range_objs).in(@collection).include(*next_attrs) - query = query.read_only if @read_only - query.all - end - end - - def models_set_all_persistent(models_by_id) - return unless @ids - - models_by_id.each do |_k, m| - m.persistent = true - end - end - - def model_set_collection_attributes(models_by_id, objects_new) - collection_value = get_collection_value - return unless collection_value - - collection_attribute = @klass.collection_opts - models_by_id.each do |_id, m| - m.send("#{collection_attribute}=", collection_value) - end - objects_new.each do |_id, obj_new| - if obj_new.respond_to?(:klass) - collection_attribute = obj_new[:klass].collection_opts - obj_new[collection_attribute] = collection_value - elsif obj_new.class.respond_to?(:collection_opts) && - obj_new.class.collection_opts.instance_of?(Symbol) - collection_attribute = obj_new.class.collection_opts - obj_new.send("#{collection_attribute}=", collection_value) - end - end - end - - def get_collection_value - collection_value = nil - if @klass.collection_opts.instance_of?(Symbol) - collection_value = @collection.first if @collection.is_a?(Array) && (@collection.length == 1) - collection_value = @collection if @collection.respond_to? :id - end - collection_value - end - - def object_to_array(id, klass_struct, models_by_id, object, predicate) - pre = if klass_struct - models_by_id[id][predicate] - else - models_by_id[id].instance_variable_get("@#{predicate}") - end - if object.nil? && pre.nil? - object = [] - elsif object.nil? && !pre.nil? - object = pre - elsif object - object = !pre ? [object] : (pre.dup << object) - object.uniq! - end - object - end - - def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new, v, options) - read_only = options[:read_only] - if object.is_a?(RDF::URI) && v != :id - range_for_v = @klass.range(v) - if range_for_v - if objects_new.include?(object) - object = objects_new[object] - elsif !range_for_v.inmutable? - pre_val = get_pre_val(id, models_by_id, object, v, read_only) - object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) - else - object = range_for_v.find(object).first - end - end - end - object - end - - def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) - range_for_v = @klass.range(predicate) - if !@read_only - object = pre_val || @klass.range_object(predicate, object) - objects_new[object.id] = object - else - # depedent read only - struct = pre_val || embed_struct[predicate].new - struct.id = object - struct.klass = range_for_v - objects_new[struct.id] = struct - object = struct - end - object - end - - def get_pre_val(id, models_by_id, object, predicate) - pre_val = nil - if models_by_id[id] && - ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || - models_by_id[id].loaded_attributes.include?(predicate)) - pre_val = if !@read_only - models_by_id[id].instance_variable_get("@#{predicate}") - else - models_by_id[id][predicate] - end - - pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) - end - pre_val - end - - def add_unmapped_to_model(sol) - predicate = sol[:attributeProperty].to_s.to_sym - return unless @properties_to_include[predicate] - - id = sol[:id] - value = sol[:attributeObject] - - @lang_filter.set_unmapped_value(@models_by_id[id], @properties_to_include[predicate][:uri], value) - end - - def add_aggregations_to_model(sol) - id = sol[:id] - @aggregate_projections&.each do |aggregate_key, aggregate_val| - if @models_by_id[id].respond_to?(:add_aggregate) - @models_by_id[id].add_aggregate(aggregate_val[1], aggregate_val[0], sol[aggregate_key].object) - else - (@models_by_id[id].aggregates ||= []) << Goo::Base::AGGREGATE_VALUE.new(aggregate_val[1], - aggregate_val[0], - sol[aggregate_key].object) - end - end - end - end - end -end +module Goo + module SPARQL + class SolutionMapper + BNODES_TUPLES = Struct.new(:id, :attribute) + + def initialize(aggregate_projections, bnode_extraction, embed_struct, + incl_embed, klass_struct, models_by_id, + properties_to_include, unmapped, variables, ids, options) + + @aggregate_projections = aggregate_projections + @bnode_extraction = bnode_extraction + @embed_struct = embed_struct + @incl_embed = incl_embed + @klass_struct = klass_struct + @models_by_id = models_by_id + @properties_to_include = properties_to_include + @unmapped = unmapped + @variables = variables + @ids = ids + @klass = options[:klass] + @read_only = options[:read_only] + @incl = options[:include] + @count = options[:count] + @collection = options[:collection] + @options = options + end + + def map_each_solutions(select) + found = Set.new + objects_new = {} + list_attributes = Set.new(@klass.attributes(:list)) + all_attributes = Set.new(@klass.attributes(:all)) + + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, + list_attributes: list_attributes) + + select.each_solution do |sol| + + next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] + return sol[:count_var].object if @count + + found.add(sol[:id]) + id = sol[:id] + + create_model(id) + + if @bnode_extraction + add_bnode_to_model(sol) + next + end + + if @unmapped + add_unmapped_to_model(sol) + next + end + + if @aggregate_projections + add_aggregations_to_model(sol) + next + end + + predicate = sol[:attributeProperty].to_s.to_sym + + next if predicate.nil? || !all_attributes.include?(predicate) + + object = sol[:attributeObject] + + # bnodes + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) + next + end + + objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, objects, object, predicate) + end + + # for this moment we are not going to enrich models , maybe we will use it if the results are empty + @lang_filter.fill_models_with_all_languages(@models_by_id) + + init_unloaded_attributes(found, list_attributes) + + return @models_by_id if @bnode_extraction + + model_set_collection_attributes(@models_by_id, objects_new) + + # remove from models_by_id elements that were not touched + @models_by_id.select! { |k, _m| found.include?(k) } + + models_set_all_persistent(@models_by_id) unless @read_only + + # next level of embed attributes + include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? + + # bnodes + blank_nodes = objects_new.select { |id, _obj| id.is_a?(RDF::Node) && id.anonymous? } + include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? + + models_unmapped_to_array(@models_by_id) if @unmapped + + + @models_by_id + end + + private + + def init_unloaded_attributes(found, list_attributes) + return if @incl.nil? + + # Here we are setting to nil all attributes that have been included but not found in the triplestore + found.uniq.each do |model_id| + m = @models_by_id[model_id] + @incl.each do |attr_to_incl| + is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) + next if attr_to_incl.to_s.eql?('unmapped') || is_handler + + loaded = m.respond_to?('loaded_attributes') && m.loaded_attributes.include?(attr_to_incl) + is_list = list_attributes.include?(attr_to_incl) + is_struct = m.respond_to?(:klass) + + # Go through all models queried + if is_struct + m[attr_to_incl] = [] if is_list && m[attr_to_incl].nil? + elsif is_list && (!loaded || m.send(attr_to_incl.to_s).nil?) + m.send("#{attr_to_incl}=", [], on_load: true) + elsif !loaded && !is_list && m.respond_to?("#{attr_to_incl}=") + m.send("#{attr_to_incl}=", nil, on_load: true) + end + end + end + end + + def get_value_object(id, objects_new, object, list_attributes, predicate) + object = object.object if object && !(object.is_a? RDF::URI) + range_for_v = @klass.range(predicate) + + + if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? + if objects_new.include?(object) + object = objects_new[object] + elsif !range_for_v.inmutable? + pre_val = get_preload_value(id, object, predicate) + object, objects_new = if !@read_only + preloaded_or_new_object(object, objects_new, pre_val, predicate) + else + # depedent read only + preloaded_or_new_struct(object, objects_new, pre_val, predicate) + end + else + object = range_for_v.find(object).first + end + end + + if list_attributes.include?(predicate) + pre = @klass_struct ? @models_by_id[id][predicate] : @models_by_id[id].instance_variable_get("@#{predicate}") + + if object.nil? + object = pre.nil? ? [] : pre + else + object = pre.nil? ? [object] : (Array(pre).dup << object) + object.uniq! + end + + end + [object, objects_new] + end + + def add_object_to_model(id, objects, current_obj, predicate) + + if @models_by_id[id].respond_to?(:klass) + @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? + elsif !@models_by_id[id].class.handler?(predicate) && + !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id + @lang_filter.set_model_value(@models_by_id[id], predicate, objects, current_obj) + end + end + + def get_preload_value(id, object, predicate) + pre_val = nil + if predicate_preloaded?(id, predicate) + pre_val = preloaded_value(id, predicate) + pre_val = pre_val.select { |x| x.respond_to?(:id) && (x.id == object) }.first if pre_val.is_a?(Array) + end + pre_val + end + + def preloaded_or_new_object(object, objects_new, pre_val, predicate) + object = pre_val || @klass.range_object(predicate, object) + objects_new[object.id] = object + [object, objects_new] + end + + def preloaded_or_new_struct(object, objects_new, pre_val, predicate) + struct = pre_val || @embed_struct[predicate].new + struct.id = object + struct.klass = @klass.range(predicate) + objects_new[struct.id] = struct + [struct, objects_new] + end + + def preloaded_value(id, predicate) + if !@read_only + @models_by_id[id].instance_variable_get("@#{predicate}") + + else + @models_by_id[id][predicate] + end + end + + def predicate_preloaded?(id, predicate) + @models_by_id[id] && + (@models_by_id[id].respond_to?(:klass) || @models_by_id[id].loaded_attributes.include?(predicate)) + end + + def bnode_id?(object, predicate) + object.is_a?(RDF::Node) && object.anonymous? && @incl.include?(predicate) + end + + def bnode_id_tuple(id, object, objects_new, predicate) + range = @klass.range(predicate) + objects_new[object] = BNODES_TUPLES.new(id, predicate) if range.respond_to?(:new) + objects_new + end + + def add_bnode_to_model(sol) + id = sol[:id] + struct = create_struct(@bnode_extraction, @models_by_id, sol, @variables) + @models_by_id[id].send("#{@bnode_extraction}=", struct) + end + + def create_model(id) + @models_by_id[id] = create_class_model(id, @klass, @klass_struct) unless @models_by_id.include?(id) + end + + + def create_struct(bnode_extraction, models_by_id, sol, variables) + list_attributes = Set.new(@klass.attributes(:list)) + struct = @klass.range(bnode_extraction).new + variables.each do |v| + next if v == :id + + svalue = sol[v] + struct[v] = svalue.is_a?(RDF::Node) ? svalue : svalue.object + end + if list_attributes.include?(bnode_extraction) + pre = models_by_id[sol[:id]].instance_variable_get("@#{bnode_extraction}") + pre = pre ? (pre.dup << struct) : [struct] + struct = pre + end + struct + end + + def create_class_model(id, klass, klass_struct) + klass_model = klass_struct ? klass_struct.new : klass.new + klass_model.id = id + klass_model.persistent = true unless klass_struct + klass_model.klass = klass if klass_struct + klass_model + end + + def models_unmapped_to_array(models_by_id) + models_by_id.each do |_idm, m| + @lang_filter.models_unmapped_to_array(m) + end + end + + + def is_multiple_langs? + return true if @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) + false + end + + def include_bnodes(bnodes, models_by_id) + # group by attribute + attrs = bnodes.map { |_x, y| y.attribute }.uniq + attrs.each do |attr| + struct = @klass.range(attr) + + # bnodes that are in a range of goo ground models + # for example parents and children in LD class models + # we skip this cases for the moment + next if struct.respond_to?(:model_name) + + bnode_attrs = struct.new.to_h.keys + ids = bnodes.select { |_x, y| y.attribute == attr }.map { |_x, y| y.id } + @klass.where.models(models_by_id.select { |x, _y| ids.include?(x) }.values) + .in(@collection) + .include(bnode: { attr => bnode_attrs }).all + end + end + + def include_embed_attributes(incl_embed, objects_new) + incl_embed.each do |attr, next_attrs| + # anything to join ? + attr_range = @klass.range(attr) + next if attr_range.nil? + + range_objs = objects_new.select do |_id, obj| + obj.instance_of?(attr_range) || (obj.respond_to?(:klass) && obj[:klass] == attr_range) + end.values + next if range_objs.empty? + + range_objs.uniq! + query = attr_range.where.models(range_objs).in(@collection).include(*next_attrs) + query = query.read_only if @read_only + query.all + end + end + + def models_set_all_persistent(models_by_id) + return unless @ids + + models_by_id.each do |_k, m| + m.persistent = true + end + end + + def model_set_collection_attributes(models_by_id, objects_new) + collection_value = get_collection_value + return unless collection_value + + collection_attribute = @klass.collection_opts + models_by_id.each do |_id, m| + m.send("#{collection_attribute}=", collection_value) + end + objects_new.each do |_id, obj_new| + if obj_new.respond_to?(:klass) + collection_attribute = obj_new[:klass].collection_opts + obj_new[collection_attribute] = collection_value + elsif obj_new.class.respond_to?(:collection_opts) && + obj_new.class.collection_opts.instance_of?(Symbol) + collection_attribute = obj_new.class.collection_opts + obj_new.send("#{collection_attribute}=", collection_value) + end + end + end + + def get_collection_value + collection_value = nil + if @klass.collection_opts.instance_of?(Symbol) + collection_value = @collection.first if @collection.is_a?(Array) && (@collection.length == 1) + collection_value = @collection if @collection.respond_to? :id + end + collection_value + end + + def object_to_array(id, klass_struct, models_by_id, object, predicate) + pre = if klass_struct + models_by_id[id][predicate] + else + models_by_id[id].instance_variable_get("@#{predicate}") + end + if object.nil? && pre.nil? + object = [] + elsif object.nil? && !pre.nil? + object = pre + elsif object + object = !pre ? [object] : (pre.dup << object) + object.uniq! + end + object + end + + def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new, v, options) + read_only = options[:read_only] + if object.is_a?(RDF::URI) && v != :id + range_for_v = @klass.range(v) + if range_for_v + if objects_new.include?(object) + object = objects_new[object] + elsif !range_for_v.inmutable? + pre_val = get_pre_val(id, models_by_id, object, v, read_only) + object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) + else + object = range_for_v.find(object).first + end + end + end + object + end + + def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) + range_for_v = @klass.range(predicate) + if !@read_only + object = pre_val || @klass.range_object(predicate, object) + objects_new[object.id] = object + else + # depedent read only + struct = pre_val || embed_struct[predicate].new + struct.id = object + struct.klass = range_for_v + objects_new[struct.id] = struct + object = struct + end + object + end + + def get_pre_val(id, models_by_id, object, predicate) + pre_val = nil + if models_by_id[id] && + ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || + models_by_id[id].loaded_attributes.include?(predicate)) + pre_val = if !@read_only + models_by_id[id].instance_variable_get("@#{predicate}") + else + models_by_id[id][predicate] + end + + pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) + end + pre_val + end + + def add_unmapped_to_model(sol) + predicate = sol[:attributeProperty].to_s.to_sym + return unless @properties_to_include[predicate] + + id = sol[:id] + value = sol[:attributeObject] + + @lang_filter.set_unmapped_value(@models_by_id[id], @properties_to_include[predicate][:uri], value) + end + + def add_aggregations_to_model(sol) + id = sol[:id] + @aggregate_projections&.each do |aggregate_key, aggregate_val| + if @models_by_id[id].respond_to?(:add_aggregate) + @models_by_id[id].add_aggregate(aggregate_val[1], aggregate_val[0], sol[aggregate_key].object) + else + (@models_by_id[id].aggregates ||= []) << Goo::Base::AGGREGATE_VALUE.new(aggregate_val[1], + aggregate_val[0], + sol[aggregate_key].object) + end + end + end + end + end +end diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index 3c90e204..d6f3816d 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -41,6 +41,8 @@ def enforce(inst,attr,value) check Goo::Validators::DataType, inst, attr, value, opt, Float when :symmetric check Goo::Validators::Symmetric, inst, attr, value, opt + when :email + check Goo::Validators::Email, inst, attr, value, opt when /^distinct_of_/ check Goo::Validators::DistinctOf, inst, attr, value, opt, opt when /^superior_equal_to_/ diff --git a/lib/goo/validators/implementations/data_type.rb b/lib/goo/validators/implementations/data_type.rb index 0ea65ab3..04f46d0c 100644 --- a/lib/goo/validators/implementations/data_type.rb +++ b/lib/goo/validators/implementations/data_type.rb @@ -29,16 +29,16 @@ def enforce_type(type, value) return true if value.nil? if type == :boolean - return self.enforce_type_boolean(value) + self.enforce_type_boolean(value) elsif type.eql?(:uri) || type.eql?(RDF::URI) - return self.enforce_type_uri(value) + self.enforce_type_uri(value) elsif type.eql?(:uri) || type.eql?(Array) - return value.is_a? Array + value.is_a? Array else if value.is_a? Array - return value.select{|x| !x.is_a?(type)}.empty? + value.select{|x| !x.is_a?(type)}.empty? else - return value.is_a? type + value.is_a? type end end @@ -47,19 +47,28 @@ def enforce_type(type, value) def enforce_type_uri(value) return true if value.nil? - value.is_a?(RDF::URI) && value.valid? + if value.kind_of? Array + value.select { |x| !is_a_uri?(x) }.empty? + else + is_a_uri?(value) + end + end def enforce_type_boolean(value) if value.kind_of? Array - return value.select { |x| !is_a_boolean?(x) }.empty? + value.select { |x| !is_a_boolean?(x) }.empty? else - return is_a_boolean?(value) + is_a_boolean?(value) end end def is_a_boolean?(value) - return (value.class == TrueClass) || (value.class == FalseClass) + (value.class == TrueClass) || (value.class == FalseClass) + end + + def is_a_uri?(value) + value.is_a?(RDF::URI) && value.valid? end end end diff --git a/lib/goo/validators/implementations/email.rb b/lib/goo/validators/implementations/email.rb new file mode 100644 index 00000000..f8405714 --- /dev/null +++ b/lib/goo/validators/implementations/email.rb @@ -0,0 +1,22 @@ +module Goo + module Validators + class Email < ValidatorBase + include Validator + EMAIL_REGEXP = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i + key :email + + error_message ->(obj) { + if @value.kind_of? Array + return "All values in attribute `#{@attr}` must be a valid emails" + else + return "Attribute `#{@attr}` with the value `#{@value}` must be a valid email" + + end + } + + validity_check -> (obj) do + @value.nil? || @value.match?(EMAIL_REGEXP) + end + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/superior_equal_to.rb b/lib/goo/validators/implementations/superior_equal_to.rb index 91508f30..46676794 100644 --- a/lib/goo/validators/implementations/superior_equal_to.rb +++ b/lib/goo/validators/implementations/superior_equal_to.rb @@ -12,9 +12,9 @@ class SuperiorEqualTo < ValidatorBase validity_check -> (obj) do target_values = self.class.attr_value(@property, @inst) - return true if target_values.empty? + return true if target_values.nil? || target_values.empty? - return @value >= target_values.first + return Array(@value).all? {|v| v.nil? || target_values.all?{|t_v| v >= t_v}} end def initialize(inst, attr, value, key) diff --git a/lib/goo/validators/validator.rb b/lib/goo/validators/validator.rb index e7db80a4..2536f985 100644 --- a/lib/goo/validators/validator.rb +++ b/lib/goo/validators/validator.rb @@ -75,6 +75,8 @@ def equivalent_value?(object1, object2) end def attr_value(attr, object) + object.bring attr if object.respond_to?(:bring?) && object.bring?(attr) + Array(object.send(attr)) end diff --git a/test/test_validators.rb b/test/test_validators.rb index 8795fccf..5110da80 100644 --- a/test/test_validators.rb +++ b/test/test_validators.rb @@ -12,6 +12,8 @@ class Person < Goo::Base::Resource attribute :birth_date, enforce: [ :date_time ] attribute :male, enforce: [:boolean] attribute :social, enforce: [:uri] + attribute :email, enforce: [:email] + attribute :socials, enforce: [:uri, :list] attribute :weight, enforce: [:float] attribute :friends, enforce: [Person, :list] end @@ -143,9 +145,9 @@ def test_datatype_validators p.birth_date = 100 p.male = "ok" p.social = 100 + p.socials = [100] p.weight = 100 - - + p.email = "test@test" #wrong types are not valid refute p.valid? assert p.errors[:last_name][:string] @@ -155,14 +157,17 @@ def test_datatype_validators assert p.errors[:birth_date][:date_time] assert p.errors[:male][:boolean] assert p.errors[:social][:uri] + assert p.errors[:email][:email] p.last_name = "hello" p.multiple_values = [22,11] p.one_number = 12 p.birth_date = DateTime.parse('1978-01-01') p.male = true - p.social = RDF::URI.new('https://test.com/') + p.social = RDF::URI.new('https://test.com/') + p.socials = [RDF::URI.new('https://test.com/'), RDF::URI.new('https://test.com/')] p.weight = 100.0 + p.email = "test@test.hi.com" #good types are valid assert p.valid? end diff --git a/test/test_where.rb b/test/test_where.rb index 30d933e3..c80fed33 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -262,6 +262,19 @@ def test_embed_two_levels end end + def test_paging_with_filter_order + + f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') + total_count = Student.where.filter(f).count + page_1 = Student.where.include(:name, :birth_date).page(1, total_count - 1).filter(f).order_by(name: :asc).to_a + refute_empty page_1 + assert page_1.next? + page_2 = Student.where.include(:name, :birth_date).page(page_1.next_page, total_count - 1).filter(f).order_by(name: :asc).to_a + + + refute_empty page_2 + assert_equal total_count, page_1.size + page_2.size + end def test_unique_object_references From 03da25b671d2ffa515b5dce51c6bd35980ae60c7 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 14 Dec 2023 00:20:44 +0100 Subject: [PATCH 051/106] Fix: map attribute for properties that contain a mix of string and URIs (#51) * add tests for the new dsl to write property data types * append the property :type values to the :enforce array * update solution mapper to support multilingual * update solution mapper to support multilingual * fix typo ( name ) * add validators tests file * add validator interface module * implement data_type validator * migrate existence validator to the new DSL * migrate uniqueness validator to the new DSL * implement object_type validator with the new DSL * migrate range validator to the new DSL * refactor the enforce module to use the new validators implementation * force to regenerate the id when we update related attribute (named_with) * require the validators implementation * update existence validator to not accept empty to_s objects * update exist? test * add symmetric validator tests for no_list and list cases * implement symmetric validator * move re used methods to the parent class * update symmetric code and error message * add distinct of validator tests * implement distinct_of validator * add superior_equal_to validator tests * extract property method to ValidatorBase class * implement superior_equal_to validator * add inverse of validator tests * implement inverse_of validator * use the class method property in distinct of * add proc validator tests * add instance proc validators * fix call_proc validator to test if the returned values are correct * add model_with_yaml_scheme test * implement YAMLScheme module * use YAMLScheme module in Settings module * use platform lang and code refacto * filter by lang in properties * do some refactoring * add unmapped_get to goo resources * update lang filter module to support requested_lang and portal_lang * use the new lang filter module in the solution_mapper * remove the usage of the old lang filter module in map_attributes * add request language global variable * fix datatype check for list values * remove old unused test if clause * for no unmapped values cast them to object before sending * for resource unmapped_set merge new value if an array * prevent add_object_to_model if no_lang and previous value exist * move from the mapper lang_filter related code to lang_filter module * move internal lang filter module methods to private section * add request_store gem to save request language globally * save requested language in model_load options * force requested_lang and portal_langs to be upcase and symbol * change methodes/vars names * get the last item in objects instead of passing the current object * Revert "get the last item in objects instead of passing the current object" This reverts commit 996922a9dfae06da9a7214e1322e1d403d3f1b39. * handle this case where values is nil in save_model_values * handle the casf of nil values for the SuperiorEqualTo validator * add onUpdate callback tests * implement enforce_callback to run an attribute callback * move the attribute default callback to the save method * implement onUpdate DSL in the ressource settings * call to the attributes onUpdate callback in the save method * in validators bring attribute if needed * make superior_equal_to works for list attributes * add email validator test * implement email validator * add filters patterns to select variables * make make regex filter no-case sensitive * if requested_lang = 'all' return all * support select multilanguage * show the values with their corresponding language * use @attributes_to_translate * change methode name * remove platform languages * add complex_order_by unit test * refactor query_builder to extract internal_variables as instance variable * update order_by to work for joined patterns (object attributes) * downcase lang key * Fix the issue of undefined 'id' of the language filter module * Show literal attribute if we requested all the languages * Use portal language by default in the language filter module * group unmapped properties by lang * Feature: group unmapped properties by language (#38) * group unmapped properties by lang * downcase language keys of unmapped properties --------- Co-authored-by: Syphax bouazzouni * assert that pre in an array in get_value_object * add label to attributes_to_translate * update define_method * update solution mapper * update get_preload_value * Feature: Support multi lingual - add show_language argument to the attributes getters (#39) * update define_method * update solution mapper * update get_preload_value * fix save_model_values if unmmaped condition * fix getters for list attributes to not take only the first value * remove the languages hash for the unmapped if not a mutli langual asked * move some language helper from the mapper to the lang_filter module * move @requested_lang variable from the mapper to the lang_filter module * remove no more used @attributes_to_translate variable in lang_filter * fix save_model_values method to not save RDF:Literal object but a string * remove not used method in lang filter module * refecator and rename some methods of the lang_filter module * use the new name of the lang filter methods in the solution mapper * replace the getters argument to show languages from :show_all_languages to :show_languages: true * catch transform_values of unmapped if it is nil * change the getters show_all_languages argument from to include_languages * make the map_attributes handle the option showing all the languages * fix order by an attribute that is already filtered * don't add the filtered variables to the select clause of the query * add filters patterns to select variables * fix pagination with order_by with filter that returns empty pages for 4store * include the in the select variables filtered variables * optimize pagination query by not re-doing the filters and order in the include query * fix map attribute for properties that contain a mix of string and URIs --------- Co-authored-by: HADDAD Zineddine --- lib/goo/base/resource.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 02709f5e..2eaf17ad 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -379,17 +379,17 @@ def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false attr_uri = klass.attribute_uri(attr,inst.collection).to_s if unmapped_string_keys.include?(attr_uri.to_s) || (equivalent_predicates && equivalent_predicates.include?(attr_uri)) - object = nil if !unmapped_string_keys.include?(attr_uri) - equivalent_predicates[attr_uri].each do |eq_attr| - if object.nil? and !unmapped_string_keys[eq_attr].nil? - object = unmapped_string_keys[eq_attr].dup - else - if object.is_a?Array - object.concat(unmapped_string_keys[eq_attr]) if !unmapped_string_keys[eq_attr].nil? - end + object = Array(equivalent_predicates[attr_uri].map { |eq_attr| unmapped_string_keys[eq_attr] }).flatten.compact + if include_languages && [RDF::URI, Hash].all?{|c| object.map(&:class).include?(c)} + object = object.reduce({}) do |all, new_v| + new_v = { none: [new_v] } if new_v.is_a?(RDF::URI) + all.merge(new_v) {|_, a, b| a + b } end + elsif include_languages + object = object.first end + if object.nil? inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) next From 8ddd2d719617ad082c6964a9efdac153cdd2b48e Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 7 Feb 2024 01:33:46 +0100 Subject: [PATCH 052/106] fix multilingual for single value attributes selecting no lang values (#55) --- lib/goo/sparql/mixins/solution_lang_filter.rb | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index 49d62c8e..dedc09fb 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -50,16 +50,26 @@ def models_unmapped_to_array(m) def set_value(model, predicate, value, &block) language = object_language(value) - - if requested_lang.eql?(:ALL) || !literal?(value) || language_match?(language) - block.call + + if requested_lang.eql?(:ALL) || !literal?(value) || (language_match?(language) && can_add_new_value(model,predicate, language)) + block.call end if requested_lang.eql?(:ALL) || requested_lang.is_a?(Array) - language = "@none" if language.nil? || language.eql?(:no_lang) + language = "@none" if no_lang?(language) store_objects_by_lang(model.id, predicate, value, language) end end + + + def can_add_new_value(model, predicate, new_language) + old_val = model.send(predicate) rescue nil + list_attributes?(predicate) || old_val.blank? || !no_lang?(new_language) + end + + def no_lang?(language) + language.nil? || language.eql?(:no_lang) + end def model_group_by_lang(model) unmapped = model.unmapped From 1237bd4975bc41409097afbfb14329285202a99b Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 29 Jan 2024 21:50:30 +0100 Subject: [PATCH 053/106] Feature: Add Virtuso, Allegrograph and Graphdb integration to GOO (#48) * simplify the test configuration init * add docker based tests rake task to run test against 4s, ag, gb, vo * remove faraday gem usage * update test CI to test against all the supported backends with diffirent slice sizes * add high level helper to to know which backend we are currently using * extract sparql processor module from where module * handle language_match? value to upcase by default * add support for virtuoso and graphdb sparql client * replace delete sparql query by delete graph in the model complex test * add some new edge cases tests t o test_where.rb and test_schemaless * make test_chunks_write.rb tests support multiple backends * replace native insert_data with execute_append_request in model save * remove add_rules as it seems to no more be used * move expand_equivalent_predicates from loader to builder module * build two diffirent queries depending on which backend used * update mapper to handle the two different queries depending on the backend used * simplify the loader code, by removing inferable variables * refactor and simplify map_attributes method * fix test chunks write concenrency issues --- .github/workflows/ruby-unit-test.yml | 15 +- .gitignore | 2 + Gemfile | 1 - Gemfile.lock | 63 ++--- Rakefile | 1 - config/config.rb.sample | 23 ++ docker-compose.yml | 101 +++++++- lib/goo.rb | 36 ++- lib/goo/base/resource.rb | 28 +- lib/goo/base/where.rb | 114 +-------- lib/goo/config/config.rb | 78 ++++++ lib/goo/sparql/client.rb | 27 +- lib/goo/sparql/loader.rb | 63 ++--- lib/goo/sparql/mixins/query_pattern.rb | 3 - lib/goo/sparql/mixins/solution_lang_filter.rb | 2 +- lib/goo/sparql/processor.rb | 137 ++++++++++ lib/goo/sparql/query_builder.rb | 98 ++++--- lib/goo/sparql/solutions_mapper.rb | 110 ++++---- lib/goo/sparql/sparql.rb | 1 + rakelib/docker_based_test.rake | 107 ++++++++ test/app/bioportal.rb | 2 - test/app/models.rb | 2 - test/app/test_app.rb | 2 - test/console.rb | 1 - test/data/graphdb-repo-config.ttl | 33 +++ test/data/graphdb-test-load.nt | 0 test/data/virtuoso.init | 240 ++++++++++++++++++ test/test_basic_persistence.rb | 2 - test/test_cache.rb | 18 +- test/test_case.rb | 33 +-- test/test_chunks_write.rb | 82 +++--- test/test_collections.rb | 2 - test/test_dsl_settings.rb | 1 - test/test_enum.rb | 2 - test/test_index.rb | 2 - test/test_inmutable.rb | 2 - test/test_inverse.rb | 2 - test/test_model_complex.rb | 30 ++- test/test_name_with.rb | 2 - test/test_namespaces.rb | 2 - test/test_read_only.rb | 2 - test/test_schemaless.rb | 21 +- test/test_search.rb | 2 - test/test_validators.rb | 1 - test/test_where.rb | 82 +++--- 45 files changed, 1081 insertions(+), 497 deletions(-) create mode 100644 config/config.rb.sample create mode 100644 lib/goo/config/config.rb create mode 100644 lib/goo/sparql/processor.rb create mode 100644 rakelib/docker_based_test.rake create mode 100644 test/data/graphdb-repo-config.ttl create mode 100644 test/data/graphdb-test-load.nt create mode 100644 test/data/virtuoso.init diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml index ccb161f3..ac40314b 100644 --- a/.github/workflows/ruby-unit-test.yml +++ b/.github/workflows/ruby-unit-test.yml @@ -12,21 +12,26 @@ jobs: strategy: fail-fast: false matrix: - ruby-version: ['2.7'] + goo-slice: [ '20', '100', '500' ] + ruby-version: [ '2.7', '3.0' ] + triplestore: [ 'fs', 'ag', 'vo', 'gb' ] steps: - uses: actions/checkout@v3 - name: Install Dependencies - run: sudo apt-get -y install raptor2-utils + run: sudo apt-get update && sudo apt-get -y install raptor2-utils - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true # runs 'bundle install' and caches installed gems automatically - - name: Start backend services via docker-compose - run: docker compose up -d + - name: Add config file + # tempoaray workaround for the config.rb file requirement + run: echo 'Goo.config do |config| end' > config/config.rb + - name: List directory contents + run: ls -R ./test/data - name: Run tests - run: bundle exec rake test + run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/.gitignore b/.gitignore index f887556d..5dcefa7c 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ doc/ .idea/* projectFilesBackup/* + +config/config.rb \ No newline at end of file diff --git a/Gemfile b/Gemfile index 3564fe3b..49dd2b38 100644 --- a/Gemfile +++ b/Gemfile @@ -4,7 +4,6 @@ gemspec gem "activesupport" gem "cube-ruby", require: "cube" -gem "faraday", '~> 1.9' gem "rake" gem "uuid" gem "request_store" diff --git a/Gemfile.lock b/Gemfile.lock index 9fe7bd02..f0c24969 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -30,41 +30,24 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.1) + addressable (2.8.5) public_suffix (>= 2.0.2, < 6.0) + base64 (0.1.1) builder (3.2.4) coderay (1.1.3) concurrent-ruby (1.2.2) - connection_pool (2.3.0) + connection_pool (2.4.1) cube-ruby (0.0.3) daemons (1.4.1) docile (1.4.0) domain_name (0.5.20190701) unf (>= 0.0.5, < 1.0.0) eventmachine (1.2.7) - faraday (1.10.3) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0) - faraday-multipart (~> 1.0) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.0) - faraday-patron (~> 1.0) - faraday-rack (~> 1.0) - faraday-retry (~> 1.0) + faraday (2.7.11) + base64 + faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-multipart (1.0.4) - multipart-post (~> 2) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - faraday-rack (1.0.0) - faraday-retry (1.0.3) + faraday-net_http (3.0.2) http-accept (1.7.0) http-cookie (1.0.5) domain_name (~> 0.5) @@ -74,12 +57,11 @@ GEM macaddr (1.7.2) systemu (~> 2.6.5) method_source (1.0.0) - mime-types (3.4.1) + mime-types (3.5.1) mime-types-data (~> 3.2015) - mime-types-data (3.2023.0218.1) + mime-types-data (3.2023.1003) minitest (4.7.5) multi_json (1.15.0) - multipart-post (2.3.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) net-http-persistent (2.9.4) @@ -87,20 +69,20 @@ GEM pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.1) - rack (2.2.6.3) + public_suffix (5.0.3) + rack (2.2.8) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) activesupport (>= 2.3) - rack-protection (3.0.5) - rack + rack-protection (3.1.0) + rack (~> 2.2, >= 2.2.4) rake (13.0.6) rdf (1.0.8) addressable (>= 2.2) - redis (5.0.6) + redis (5.0.7) redis-client (>= 0.9.0) - redis-client (0.13.0) + redis-client (0.17.0) connection_pool request_store (1.5.1) rack (>= 1.4) @@ -109,7 +91,7 @@ GEM http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rexml (3.2.5) + rexml (3.2.6) rsolr (2.5.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) @@ -123,19 +105,19 @@ GEM simplecov (~> 0.19) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - sinatra (3.0.5) + sinatra (3.1.0) mustermann (~> 3.0) rack (~> 2.2, >= 2.2.4) - rack-protection (= 3.0.5) + rack-protection (= 3.1.0) tilt (~> 2.0) systemu (2.6.5) - thin (1.8.1) + thin (1.8.2) daemons (~> 1.0, >= 1.0.9) eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thread_safe (0.3.6) - tilt (2.1.0) - tzinfo (0.3.61) + tilt (2.3.0) + tzinfo (0.3.62) unf (0.1.4) unf_ext unf_ext (0.0.8.2) @@ -150,7 +132,6 @@ PLATFORMS DEPENDENCIES activesupport cube-ruby - faraday (~> 1.9) goo! minitest (< 5.0) pry @@ -166,4 +147,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.3.22 + 2.3.15 diff --git a/Rakefile b/Rakefile index e593ddce..42ddf39d 100644 --- a/Rakefile +++ b/Rakefile @@ -107,7 +107,6 @@ end desc "Console for working with data" task :console do require_relative "test/test_case" - GooTest.configure_goo binding.pry end diff --git a/config/config.rb.sample b/config/config.rb.sample new file mode 100644 index 00000000..12abdccb --- /dev/null +++ b/config/config.rb.sample @@ -0,0 +1,23 @@ +Goo.config do |config| + # 4store + config.goo_backend_name = '4store' + config.goo_port = 8080 + config.goo_host = 'localhost' + config.goo_path_query = '/sparql/' + config.goo_path_data = '/data/' + config.goo_path_update = '/update/' + + # AllegroGraph + # config.goo_backend_name = 'AG' + # config.goo_port = 10035 + # config.goo_host = 'localhost' + # config.goo_path_query = "/repositories/ontoportal" + # config.goo_path_data = "/repositories/ontoportal/statements/" + # config.goo_path_update = "/repositories/ontoportal/statements/" + + config.search_server_url = 'http://localhost:8983/solr/term_search_core1' + config.redis_host = 'localhost' + config.redis_port = 6379 + config.bioportal_namespace = 'http://data.bioontology.org/' + config.queries_debug = false +end \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index fe29bc33..fd832341 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,16 +1,5 @@ -version: '3' - services: - 4store: - image: bde2020/4store - ports: - - 9000:9000 - command: > - bash -c "4s-backend-setup --segments 4 ontoportal_kb - && 4s-backend ontoportal_kb - && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" - - redis: + redis-ut: image: redis ports: - 6379:6379 @@ -20,7 +9,91 @@ services: timeout: 3s retries: 30 - solr: - image: ontoportal/solr-ut:0.1 + solr-ut: + image: ontoportal/solr-ut:0.0.2 ports: - 8983:8983 + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:8983/solr/term_search_core1/admin/ping?wt=json | grep -iq '\"status\":\"OK\"}' || exit 1"] + start_period: 10s + interval: 10s + timeout: 5s + retries: 5 + + agraph-ut: + image: franzinc/agraph:v8.0.0.rc1 + platform: linux/amd64 + environment: + - AGRAPH_SUPER_USER=test + - AGRAPH_SUPER_PASSWORD=xyzzy + shm_size: 1g + ports: + # - 10035:10035 + - 10000-10035:10000-10035 + volumes: + - agdata:/agraph/data + # - ./agraph/etc:/agraph/etc + command: > + bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start + ; agtool repos create ontoportal_test --supersede + ; agtool users add anonymous + ; agtool users grant anonymous root:ontoportal_test:rw + ; tail -f /agraph/data/agraph.log" + # healthcheck: + # test: ["CMD-SHELL", "curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1"] + # start_period: 10s + # interval: 10s + # timeout: 5s + # retries: 5 + profiles: + - ag + + 4store-ut: + image: bde2020/4store + platform: linux/amd64 + ports: + - 9000:9000 + command: > + bash -c "4s-backend-setup --segments 4 ontoportal_kb + && 4s-backend ontoportal_kb + && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" + profiles: + - fs + virtuoso-ut: + image: tenforce/virtuoso:virtuoso7.2.5 + platform: linux/amd64 + environment: + - SPARQL_UPDATE=true + ports: + - 1111:1111 + - 8890:8890 + + profiles: + - vo + + graphdb: + image: ontotext/graphdb:10.3.3 + platform: linux/amd64 + privileged: true + environment: + GDB_HEAP_SIZE: 5G + GDB_JAVA_OPTS: >- + -Xms5g -Xmx5g + ports: + - 7200:7200 + - 7300:7300 + volumes: + - ./test/data/graphdb-repo-config.ttl:/opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl + - ./test/data/graphdb-test-load.nt:/opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt + + entrypoint: > + bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt ; graphdb -Ddefault.min.distinct.threshold=3000 " + profiles: + - gb + +volumes: + agdata: + + + + diff --git a/lib/goo.rb b/lib/goo.rb index ff0e6279..a63722d6 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -12,6 +12,7 @@ require 'uuid' require "cube" +require_relative "goo/config/config" require_relative "goo/sparql/sparql" require_relative "goo/search/search" require_relative "goo/base/base" @@ -52,6 +53,31 @@ module Goo @@slice_loading_size = 500 + + def self.log_debug_file(str) + debug_file = "./queries.txt" + File.write(debug_file, str.to_s + "\n", mode: 'a') + end + + + + def backend_4s? + sparql_backend_name.downcase.eql?("4store") + end + + def backend_ag? + sparql_backend_name.downcase.eql?("allegrograph") + end + + def backend_gb? + sparql_backend_name.downcase.eql?("graphdb") + end + + def backend_vo? + sparql_backend_name.downcase.eql?("virtuoso") + end + + def self.main_languages @@main_languages end @@ -110,16 +136,6 @@ def self.add_sparql_backend(name, *opts) @@sparql_backends.freeze end - def self.test_reset - if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? - raise Exception, "only for testing" - end - @@sparql_backends[:main][:query]=Goo::SPARQL::Client.new("http://localhost:9000/sparql/", - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 300, - redis_cache: @@redis_client }) - end - def self.main_lang @@main_lang end diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 2eaf17ad..4d497f8b 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -289,7 +289,8 @@ def save(*opts) batch_file.write(lines.join("")) batch_file.flush() else - Goo.sparql_update_client.insert_data(graph_insert, graph: graph) + data = graph_insert.to_a.reduce("") { |acc, x| acc << x.to_s + " " } + Goo.sparql_data_client.execute_append_request(graph, data, "application/x-turtle") end rescue Exception => e raise e @@ -377,25 +378,26 @@ def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false next if inst.class.collection?(attr) #collection is already there next unless inst.respond_to?(attr) attr_uri = klass.attribute_uri(attr,inst.collection).to_s - if unmapped_string_keys.include?(attr_uri.to_s) || - (equivalent_predicates && equivalent_predicates.include?(attr_uri)) - if !unmapped_string_keys.include?(attr_uri) - object = Array(equivalent_predicates[attr_uri].map { |eq_attr| unmapped_string_keys[eq_attr] }).flatten.compact - if include_languages && [RDF::URI, Hash].all?{|c| object.map(&:class).include?(c)} - object = object.reduce({}) do |all, new_v| - new_v = { none: [new_v] } if new_v.is_a?(RDF::URI) - all.merge(new_v) {|_, a, b| a + b } + if unmapped_string_keys.include?(attr_uri.to_s) || equivalent_predicates&.include?(attr_uri) + object = nil + + if unmapped_string_keys.include?(attr_uri) + object = unmapped_string_keys[attr_uri] + else + equivalent_predicates[attr_uri].each do |eq_attr| + next if unmapped_string_keys[eq_attr].nil? + + if object.nil? + object = unmapped_string_keys[eq_attr].dup + elsif object.is_a?(Array) + object.concat(unmapped_string_keys[eq_attr]) end - elsif include_languages - object = object.first end if object.nil? inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) next end - else - object = unmapped_string_keys[attr_uri] end if object.is_a?(Hash) diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 81cd26ce..7aaad6ce 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -6,6 +6,7 @@ class Where AGGREGATE_PATTERN = Struct.new(:pattern,:aggregate) attr_accessor :where_options_load + include Goo::SPARQL::Processor def initialize(klass,*match_patterns) if Goo.queries_debug? && Thread.current[:ncbo_debug].nil? @@ -122,118 +123,7 @@ def unmmaped_predicates() end def process_query(count=false) - if Goo.queries_debug? && Thread.current[:ncbo_debug] - tstart = Time.now - query_resp = process_query_intl(count=count) - (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - tstart) - return query_resp - end - return process_query_intl(count=count) - end - - def process_query_intl(count=false) - if @models == [] - @result = [] - return @result - end - - @include << @include_embed if @include_embed.length > 0 - - @predicates = unmmaped_predicates() - @equivalent_predicates = retrieve_equivalent_predicates() - - options_load = { models: @models, include: @include, ids: @ids, - graph_match: @pattern, klass: @klass, - filters: @filters, order_by: @order_by , - read_only: @read_only, rules: @rules, - predicates: @predicates, - no_graphs: @no_graphs, - equivalent_predicates: @equivalent_predicates } - - options_load.merge!(@where_options_load) if @where_options_load - if !@klass.collection_opts.nil? and !options_load.include?(:collection) - raise ArgumentError, "Collection needed call `#{@klass.name}`" - end - - ids = nil - if @index_key - raise ArgumentError, "Redis is not configured" unless Goo.redis_client - rclient = Goo.redis_client - cache_key = cache_key_for_index(@index_key) - raise ArgumentError, "Index not found" unless rclient.exists(cache_key) - if @page_i - if !@count - @count = rclient.llen(cache_key) - end - rstart = (@page_i -1) * @page_size - rstop = (rstart + @page_size) -1 - ids = rclient.lrange(cache_key,rstart,rstop) - else - ids = rclient.lrange(cache_key,0,-1) - end - ids = ids.map { |i| RDF::URI.new(i) } - end - - if @page_i && !@index_key - page_options = options_load.dup - page_options.delete(:include) - page_options[:include_pagination] = @include - if not @pre_count.nil? - @count = @pre_count - else - if !@count && @do_count - page_options[:count] = :count - @count = Goo::SPARQL::Queries.model_load(page_options).to_i - end - end - page_options.delete :count - page_options[:query_options] = @query_options - page_options[:page] = { page_i: @page_i, page_size: @page_size } - models_by_id = Goo::SPARQL::Queries.model_load(page_options) - options_load[:models] = models_by_id.values - - #models give the constraint - options_load.delete :graph_match - elsif count - count_options = options_load.dup - count_options.delete(:include) - count_options[:count] = :count - return Goo::SPARQL::Queries.model_load(count_options).to_i - end - - if @indexing - #do not care about include values - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - return @result - end - - options_load[:ids] = ids if ids - models_by_id = {} - if @page_i && (options_load[:models].length > 0) - options_load.delete(:filters) - options_load.delete(:order_by) - end - - if (@page_i && options_load[:models].length > 0) || - (!@page_i && (@count.nil? || @count > 0)) - models_by_id = Goo::SPARQL::Queries.model_load(options_load) - if @aggregate - if models_by_id.length > 0 - options_load_agg = { models: models_by_id.values, klass: @klass, - filters: @filters, read_only: @read_only, - aggregate: @aggregate, rules: @rules } - - options_load_agg.merge!(@where_options_load) if @where_options_load - Goo::SPARQL::Queries.model_load(options_load_agg) - end - end - end - unless @page_i - @result = @models ? @models : models_by_id.values - else - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - end - @result + process_query_call(count = count) end def disable_rules diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb new file mode 100644 index 00000000..ff51e8b7 --- /dev/null +++ b/lib/goo/config/config.rb @@ -0,0 +1,78 @@ +require 'ostruct' + +module Goo + extend self + attr_reader :settings + + @settings = OpenStruct.new + @settings_run = false + + def config(&block) + return if @settings_run + @settings_run = true + + yield @settings if block_given? + + # Set defaults + @settings.goo_backend_name ||= ENV['GOO_BACKEND_NAME'] || '4store' + @settings.goo_port ||= ENV['GOO_PORT'] || 9000 + @settings.goo_host ||= ENV['GOO_HOST'] || 'localhost' + @settings.goo_path_query ||= ENV['GOO_PATH_QUERY'] || '/sparql/' + @settings.goo_path_data ||= ENV['GOO_PATH_DATA'] || '/data/' + @settings.goo_path_update ||= ENV['GOO_PATH_UPDATE'] || '/update/' + @settings.search_server_url ||= ENV['SEARCH_SERVER_URL'] || 'http://localhost:8983/solr/term_search_core1' + @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' + @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 + @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' + @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false + @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 + puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" + puts "(GOO) >> Using term search server at #{@settings.search_server_url}" + puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" + + connect_goo + end + + def connect_goo + begin + Goo.configure do |conf| + conf.queries_debug(@settings.queries_debug) + conf.add_sparql_backend(:main, + backend_name: @settings.goo_backend_name, + query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", + data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", + update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", + options: { rules: :NONE }) + conf.add_search_backend(:main, service: @settings.search_server_url) + conf.add_redis_backend(host: @settings.goo_redis_host, port: @settings.goo_redis_port) + + conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) + conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) + conf.add_namespace(:owl, RDF::Vocabulary.new("http://www.w3.org/2002/07/owl#")) + conf.add_namespace(:rdfs, RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")) + conf.add_namespace(:goo, RDF::Vocabulary.new("http://goo.org/default/"), default = true) + conf.add_namespace(:metadata, RDF::Vocabulary.new("http://goo.org/metadata/")) + conf.add_namespace(:foaf, RDF::Vocabulary.new("http://xmlns.com/foaf/0.1/")) + conf.add_namespace(:rdf, RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#")) + conf.add_namespace(:tiger, RDF::Vocabulary.new("http://www.census.gov/tiger/2002/vocab#")) + conf.add_namespace(:nemo, RDF::Vocabulary.new("http://purl.bioontology.org/NEMO/ontology/NEMO_annotation_properties.owl#")) + conf.add_namespace(:bioportal, RDF::Vocabulary.new(@settings.bioportal_namespace)) + conf.use_cache = false + end + rescue StandardError => e + abort("EXITING: Goo cannot connect to triplestore and/or search server:\n #{e}\n#{e.backtrace.join("\n")}") + end + end + + def self.test_reset + if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? + raise Exception, "only for testing" + end + @@sparql_backends[:main][:query] = Goo::SPARQL::Client.new("http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", + {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", + read_timeout: 300, + redis_cache: @@redis_client }) + end + + +end diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 8f7ad9e1..cf958398 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -14,7 +14,6 @@ class Client < RSPARQL::Client "text/x-nquads" => "nquads" } - BACKEND_4STORE = "4store" def status_based_sleep_time(operation) sleep(0.5) @@ -39,16 +38,17 @@ def status_based_sleep_time(operation) end class DropGraph - def initialize(g) + def initialize(g, silent: false) @graph = g @caching_options = { :graph => @graph.to_s } + @silent = silent end def to_s - return "DROP GRAPH <#{@graph.to_s}>" + "DROP #{@silent ? 'SILENT' : ''} GRAPH <#{@graph.to_s}>" end def options #Returns the caching option - return @caching_options + @caching_options end end @@ -77,7 +77,7 @@ def bnodes_filter_file(file_path,mime_type) end def delete_data_graph(graph) - Goo.sparql_update_client.update(DropGraph.new(graph)) + Goo.sparql_update_client.update(DropGraph.new(graph, silent: Goo.backend_vo?)) end def append_triples_no_bnodes(graph,file_path,mime_type_in) @@ -184,9 +184,7 @@ def status resp end - private - - def execute_append_request(graph, data_file, mime_type_in) + def params_for_backend(graph, data_file, mime_type_in, method = :post) mime_type = "text/turtle" if mime_type_in == "text/x-nquads" @@ -194,10 +192,9 @@ def execute_append_request(graph, data_file, mime_type_in) graph = "http://data.bogus.graph/uri" end - params = {method: :post, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - backend_name = Goo.sparql_backend_name + params = {method: method, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - if backend_name == BACKEND_4STORE + if Goo.backend_4s? params[:payload] = { graph: graph.to_s, data: data_file, @@ -205,12 +202,18 @@ def execute_append_request(graph, data_file, mime_type_in) } #for some reason \\\\ breaks parsing params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") + elsif Goo.backend_vo? + params[:url] = "http://localhost:8890/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" + params[:payload] = data_file else params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" params[:payload] = data_file end + params + end - RestClient::Request.execute(params) + def execute_append_request(graph, data_file, mime_type_in) + RestClient::Request.execute(params_for_backend(graph, data_file, mime_type_in)) end end end diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index 094fbba2..f3dcdb3d 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -36,63 +36,48 @@ def model_load(*options) ## def model_load_sliced(*options) options = options.last - ids = options[:ids] klass = options[:klass] incl = options[:include] models = options[:models] - aggregate = options[:aggregate] - read_only = options[:read_only] collection = options[:collection] - count = options[:count] - include_pagination = options[:include_pagination] - equivalent_predicates = options[:equivalent_predicates] - predicates = options[:predicates] - embed_struct, klass_struct = get_structures(aggregate, count, incl, include_pagination, klass, read_only) - - raise_resource_must_persistent_error(models) if models + embed_struct, klass_struct = get_structures(options[:aggregate], options[:count] , incl, options[:include_pagination], klass, options[:read_only]) + raise_not_persistent_error(models) if models graphs = get_graphs(collection, klass) - ids, models_by_id = get_models_by_id_hash(ids, klass, klass_struct, models) + models_by_id = get_models_by_id_hash( options[:ids], klass, klass_struct, models) - query_options = {} #TODO: breaks the reasoner patterns = [[:id, RDF.type, klass.uri_type(collection)]] incl_embed = nil - unmapped = nil bnode_extraction = nil properties_to_include = [] variables = [:id] - if incl - if incl.first && incl.first.is_a?(Hash) && incl.first.include?(:bnode) + if incl && !incl.empty? + if incl.first.is_a?(Hash) && incl.first.include?(:bnode) #limitation only one level BNODE bnode_extraction, patterns, variables = get_bnode_extraction(collection, incl, klass, patterns) else variables = %i[id attributeProperty attributeObject] if incl.first == :unmapped - unmapped = true - properties_to_include = predicate_map(predicates) + properties_to_include = predicate_map(options[:predicates]) else - #make it deterministic - incl_embed = get_embed_includes(incl) - graphs, properties_to_include, query_options = get_includes(collection, graphs, incl, - klass, query_options) + graphs, properties_to_include, incl_embed = get_includes(collection, graphs, incl, klass) end end end - expand_equivalent_predicates(properties_to_include, equivalent_predicates) - query_builder = Goo::SPARQL::QueryBuilder.new options - select, aggregate_projections = query_builder.build_select_query(ids, variables, graphs, - patterns, query_options, - properties_to_include) + options[:properties_to_include] = properties_to_include + + + select, aggregate_projections = Goo::SPARQL::QueryBuilder.new(options) + .build_query(models_by_id.keys, variables, graphs, patterns) solution_mapper = Goo::SPARQL::SolutionMapper.new aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, - variables, ids, options + variables, options solution_mapper.map_each_solutions(select) end @@ -102,16 +87,6 @@ def model_load_sliced(*options) def set_request_lang(options) options[:requested_lang] = RequestStore.store[:requested_lang] end - def expand_equivalent_predicates(properties_to_include, eq_p) - - return unless eq_p && !eq_p.empty? - - properties_to_include&.each do |property_attr, property| - property_uri = property[:uri] - property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) - end - - end def predicate_map(predicates) predicates_map = nil @@ -132,19 +107,19 @@ def predicate_map(predicates) predicates_map end - def get_includes(collection, graphs, incl, klass, query_options) + def get_includes(collection, graphs, incl, klass) + incl_embed ,incl = get_embed_includes(incl) incl = incl.to_a incl.delete_if { |a| !a.instance_of?(Symbol) } properties_to_include = {} incl.each do |attr| graph, pattern = query_pattern(klass, attr, collection: collection) - add_rules(attr, klass, query_options) if klass.attributes(:all).include?(attr) properties_to_include[attr] = { uri: pattern[1], is_inverse: klass.inverse?(attr) } # [property_attr, property_uri , inverse: true] end graphs << graph if graph && (!klass.collection_opts || klass.inverse?(attr)) end - [graphs, properties_to_include,query_options] + [graphs, properties_to_include, incl_embed] end def get_bnode_extraction(collection, incl, klass, patterns) @@ -181,7 +156,7 @@ def get_models_by_id_hash(ids, klass, klass_struct, models) #a where without models end - return ids, models_by_id + models_by_id end def get_graphs(collection, klass) @@ -234,7 +209,7 @@ def get_structures(aggregate, count, incl, include_pagination, klass, read_only) [embed_struct, klass_struct] end - def raise_resource_must_persistent_error(models) + def raise_not_persistent_error(models) models.each do |m| if (not m.nil?) && !m.respond_to?(:klass) #read only raise ArgumentError, @@ -252,7 +227,7 @@ def get_embed_includes(incl) #variables.concat(embed_variables) incl.concat(embed_variables) end - incl_embed + [incl_embed, incl] end end diff --git a/lib/goo/sparql/mixins/query_pattern.rb b/lib/goo/sparql/mixins/query_pattern.rb index cc370795..9ee0df7d 100644 --- a/lib/goo/sparql/mixins/query_pattern.rb +++ b/lib/goo/sparql/mixins/query_pattern.rb @@ -3,9 +3,6 @@ module SPARQL module QueryPatterns - def add_rules(attr,klass,query_options) - (query_options[:rules] ||= []) << :SUBC if klass.transitive?(attr) - end def query_pattern(klass,attr,**opts) value = opts[:value] || nil diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index dedc09fb..32660cca 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -105,7 +105,7 @@ def language_match?(language) return requested_lang.include?(language) if requested_lang.is_a?(Array) - language.eql?(requested_lang) + language&.upcase.eql?(requested_lang) end def literal?(object) diff --git a/lib/goo/sparql/processor.rb b/lib/goo/sparql/processor.rb new file mode 100644 index 00000000..c89778ed --- /dev/null +++ b/lib/goo/sparql/processor.rb @@ -0,0 +1,137 @@ +module Goo + module SPARQL + module Processor + def process_query_call(count=false) + if Goo.queries_debug? && Thread.current[:ncbo_debug] + start = Time.now + query_resp = process_query_intl(count=count) + (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - start) + return query_resp + end + process_query_init(count=count) + end + + private + def process_query_init(count=false) + if @models == [] + @result = [] + return @result + end + + @include << @include_embed if @include_embed.length > 0 + + @predicates = unmmaped_predicates() + @equivalent_predicates = retrieve_equivalent_predicates() + + options_load = { models: @models, include: @include, ids: @ids, + graph_match: @pattern, klass: @klass, + filters: @filters, order_by: @order_by , + read_only: @read_only, rules: @rules, + predicates: @predicates, + no_graphs: @no_graphs, + equivalent_predicates: @equivalent_predicates } + + options_load.merge!(@where_options_load) if @where_options_load + + if !@klass.collection_opts.nil? and !options_load.include?(:collection) + raise ArgumentError, "Collection needed call `#{@klass.name}`" + end + + ids = nil + + + ids = redis_indexed_ids if use_redis_index? + + if @page_i && !use_redis_index? + page_options = options_load.dup + page_options.delete(:include) + page_options[:include_pagination] = @include + page_options[:query_options] = @query_options + + @count = run_count_query(page_options) + page_options[:page] = { page_i: @page_i, page_size: @page_size } + + models_by_id = Goo::SPARQL::Queries.model_load(page_options) + options_load[:models] = models_by_id.values + #models give the constraint + options_load.delete :graph_match + elsif count + count_options = options_load.dup + count_options.delete(:include) + return run_count_query(count_options) + end + + if @indexing + #do not care about include values + @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) + return @result + end + + options_load[:ids] = ids if ids + models_by_id = {} + + if (@page_i && options_load[:models].nil?) || + (@page_i && options_load[:models].length > 0) || + (!@page_i && (@count.nil? || @count > 0)) + + models_by_id = Goo::SPARQL::Queries.model_load(options_load) + run_aggregate_query(models_by_id) if @aggregate && models_by_id.length > 0 + end + + if @page_i + @result = Goo::Base::Page.new(@page_i, @page_size, @count, models_by_id.values) + else + @result = @models ? @models : models_by_id.values + end + @result + end + + + def use_redis_index? + @index_key + end + + def run_aggregate_query(models_by_id) + options_load_agg = { models: models_by_id.values, klass: @klass, + filters: @filters, read_only: @read_only, + aggregate: @aggregate, rules: @rules } + options_load_agg.merge!(@where_options_load) if @where_options_load + Goo::SPARQL::Queries.model_load(options_load_agg) + end + def run_count_query(page_options) + count = 0 + if @pre_count + count = @pre_count + elsif !@count && @do_count + page_options[:count] = :count + r = Goo::SPARQL::Queries.model_load(page_options) + if r.is_a? Numeric + count = r.to_i + end + elsif @count + count = @count + end + page_options.delete :count + count + end + + def redis_indexed_ids + raise ArgumentError, "Redis is not configured" unless Goo.redis_client + rclient = Goo.redis_client + cache_key = cache_key_for_index(@index_key) + raise ArgumentError, "Index not found" unless rclient.exists(cache_key) + if @page_i + if !@count + @count = rclient.llen(cache_key) + end + rstart = (@page_i -1) * @page_size + rstop = (rstart + @page_size) -1 + ids = rclient.lrange(cache_key,rstart,rstop) + else + ids = rclient.lrange(cache_key,0,-1) + end + ids = ids.map { |i| RDF::URI.new(i) } + end + end + end +end diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 31880859..1a6ea740 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -14,29 +14,31 @@ def initialize(options) @unions = options[:unions] || [] @aggregate = options[:aggregate] @collection = options[:collection] - @model_query_options = options[:query_options] @enable_rules = options[:rules] @order_by = options[:order_by] @internal_variables_map = {} + @equivalent_predicates = options[:equivalent_predicates] + @properties_to_include = options[:properties_to_include] @query = get_client end - def build_select_query(ids, variables, graphs, patterns, - query_options, properties_to_include) + def build_query(ids, variables, graphs, patterns) + query_options = {} + expand_equivalent_predicates(@properties_to_include, @equivalent_predicates) + + properties_to_include = @properties_to_include patterns = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) variables, patterns = add_some_type_to_id(patterns, query_options, variables) aggregate_projections, aggregate_vars, variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, @klass, @unions, variables) query_filter_str, patterns, optional_patterns, filter_variables = filter_query_strings(@collection, graphs, @klass, optional_patterns, patterns, @query_filters) @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) - order_by_str, order_variables = order_by_string - variables = [] if @count variables.delete :some_type - select_distinct(variables, aggregate_projections, filter_variables, order_variables) + select_distinct(variables, aggregate_projections, filter_variables) .from(graphs) .where(patterns) .union_bind_in_where(properties_to_include) @@ -54,43 +56,43 @@ def build_select_query(ids, variables, graphs, patterns, ids_filter(ids) if ids - @query.order_by(*order_by_str) if @order_by + @query.order_by(*order_by_string) if @order_by put_query_aggregate_vars(aggregate_vars) if aggregate_vars count if @count paginate if @page - ## TODO see usage of rules and query_options - query_options.merge!(@model_query_options) if @model_query_options - query_options[:rules] = [:NONE] unless @enable_rules - query_options = nil if query_options.empty? - if query_options - query_options[:rules] = query_options[:rules]&.map { |x| x.to_s }.join('+') - else - query_options = { rules: ['NONE'] } - end - @query.options[:query_options] = query_options [@query, aggregate_projections] end def union_bind_in_where(properties) binding_as = [] - properties.each do |property_attr, property| - predicates = [property[:uri]] + (property[:equivalents] || []) - options = { - binds: [{ value: property_attr, as: :attributeProperty }] - } - subject = property[:subject] || :id - predicates.uniq.each do |predicate_uri| - pattern = if property[:is_inverse] - [:attributeObject, predicate_uri, subject] - else - [subject, predicate_uri, :attributeObject] - end - binding_as << [[pattern], options] + if Goo.backend_4s? || Goo.backend_gb? + properties.each do |property_attr, property| + predicates = [property[:uri]] + (property[:equivalents] || []) + options = { + binds: [{ value: property_attr, as: :attributeProperty }] + } + subject = property[:subject] || :id + predicates.uniq.each do |predicate_uri| + pattern = if property[:is_inverse] + [:attributeObject, predicate_uri, subject] + else + [subject, predicate_uri, :attributeObject] + end + binding_as << [[pattern], options] + end end + + else + direct_predicate, inverse_predicate = include_properties + direct_filter = direct_predicate.empty? ? [] : [{ values: direct_predicate, predicate: :attributeProperty }] + inverse_filter = inverse_predicate.empty? ? [] : [{ values: inverse_predicate, predicate: :attributeProperty }] + binding_as << [[[:id, :attributeProperty, :attributeObject]], { filters: direct_filter}] unless direct_filter.empty? + binding_as << [[[:inverseAttributeObject, :attributeProperty, :id]], { filters: inverse_filter}] unless inverse_filter.empty? end + @query.optional_union_with_bind_as(*binding_as) unless binding_as.empty? self end @@ -127,7 +129,7 @@ def order_by_string order_variables << attr "#{order.to_s.upcase}(?#{attr})" end - [order_str,order_variables] + order_str end def from(graphs) @@ -142,11 +144,11 @@ def from(graphs) self end - def select_distinct(variables, aggregate_variables, filter_variables, order_variables) + def select_distinct(variables, aggregate_patterns, filter_variables) + variables << :inverseAttributeObject if inverse_predicate? select_vars = variables.dup - reject_aggregations_from_vars(select_vars, aggregate_variables) if aggregate_variables - # Fix for 4store pagination with a filter https://github.com/ontoportal-lirmm/ontologies_api/issues/25 - select_vars = (select_vars + filter_variables + order_variables).uniq if @page + reject_aggregations_from_vars(select_vars, aggregate_patterns) if aggregate_patterns + select_vars = (select_vars + filter_variables).uniq if @page && Goo.backend_4s? # Fix for 4store pagination with a filter @query = @query.select(*select_vars).distinct(true) self end @@ -164,6 +166,16 @@ def ids_filter(ids) private + def include_properties + direct_predicates = @properties_to_include.select { |_, property| !property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + inverse_predicates = @properties_to_include.select { |_, property| property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + [direct_predicates, inverse_predicates] + end + + def inverse_predicate? + @properties_to_include.any? { |_, property| property[:is_inverse] } + end + def patterns_for_match(klass, attr, value, graphs, patterns, unions, internal_variables, subject = :id, in_union = false, in_aggregate = false, query_options = {}, collection = nil) @@ -182,7 +194,7 @@ def patterns_for_match(klass, attr, value, graphs, patterns, unions, @internal_variables_map[new_internal_var] = value.empty? ? attr : {attr => value} end - add_rules(attr, klass, query_options) + graph, pattern = query_pattern(klass, attr, value: new_internal_var, subject: subject, collection: collection) if pattern @@ -370,7 +382,7 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, else value = RDF::Literal.new(filter_operation.value) if filter_operation.value.is_a? String - value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) + value = RDF::Literal.new(filter_operation.value) end filter_operations << ( "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + @@ -403,7 +415,7 @@ def filter_query_strings(collection, graphs, klass, patterns.concat(filter_patterns) end end - filter_variables << inspected_patterns.values.last + #filter_variables << inspected_patterns.values.last end [query_filter_str, patterns, optional_patterns, filter_variables] end @@ -424,6 +436,16 @@ def add_some_type_to_id(patterns, query_options, variables) def internal_variables @internal_variables_map.keys end + + def expand_equivalent_predicates(query_properties, eq_p) + + return unless eq_p && !eq_p.empty? + + query_properties&.each do |_, property| + property_uri = property[:uri] + property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) + end + end end end end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 64d258d5..d849be3a 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -3,39 +3,36 @@ module SPARQL class SolutionMapper BNODES_TUPLES = Struct.new(:id, :attribute) - def initialize(aggregate_projections, bnode_extraction, embed_struct, - incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables, ids, options) + def initialize(aggregate_projections, bnode_extraction, embed_struct,incl_embed, klass_struct, models_by_id, variables, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @embed_struct = embed_struct @incl_embed = incl_embed + @incl = options[:include] @klass_struct = klass_struct @models_by_id = models_by_id - @properties_to_include = properties_to_include - @unmapped = unmapped + @properties_to_include = options[:properties_to_include] + @unmapped = options[:include] && options[:include].first.eql?(:unmapped) @variables = variables - @ids = ids + @ids = models_by_id.keys @klass = options[:klass] @read_only = options[:read_only] - @incl = options[:include] @count = options[:count] @collection = options[:collection] @options = options end - + def map_each_solutions(select) found = Set.new objects_new = {} list_attributes = Set.new(@klass.attributes(:list)) - all_attributes = Set.new(@klass.attributes(:all)) @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, - list_attributes: list_attributes) - + list_attributes: list_attributes) + select.each_solution do |sol| - + next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] return sol[:count_var].object if @count @@ -59,26 +56,32 @@ def map_each_solutions(select) next end - predicate = sol[:attributeProperty].to_s.to_sym + predicates = find_predicate(sol[:attributeProperty], inverse: !sol[:inverseAttributeObject].nil?) + next if predicates.empty? - next if predicate.nil? || !all_attributes.include?(predicate) + object = if sol[:attributeObject] + sol[:attributeObject] + elsif sol[:inverseAttributeObject] + sol[:inverseAttributeObject] + end - object = sol[:attributeObject] - # bnodes - if bnode_id?(object, predicate) - objects_new = bnode_id_tuple(id, object, objects_new, predicate) - next + predicates.each do |predicate| + # bnodes + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) + next + end + + objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, objects, object, predicate) end - objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) - add_object_to_model(id, objects, object, predicate) end - - # for this moment we are not going to enrich models , maybe we will use it if the results are empty + # for this moment we are not going to enrich models , maybe we will use it if the results are empty @lang_filter.fill_models_with_all_languages(@models_by_id) - init_unloaded_attributes(found, list_attributes) + init_unloaded_attributes(list_attributes) return @models_by_id if @bnode_extraction @@ -97,19 +100,30 @@ def map_each_solutions(select) include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? models_unmapped_to_array(@models_by_id) if @unmapped - - + + @models_by_id end private - def init_unloaded_attributes(found, list_attributes) - return if @incl.nil? + def find_predicate(predicate, unmapped: false, inverse: false) + if Goo.backend_4s? || Goo.backend_gb? + return [] if predicate.nil? || unmapped && @properties_to_include[predicate].nil? + predicate = predicate.to_s.to_sym + else + predicate = @properties_to_include.select { |x, v| v[:uri].to_s.eql?(predicate.to_s) || v[:equivalents]&.any? { |e| e.to_s.eql?(predicate.to_s) } } + return [] if predicate.empty? + predicate = predicate.select{|x, y| y[:is_inverse]&.eql?(inverse)}.keys + end + Array(predicate) + end + + def init_unloaded_attributes(list_attributes) + return if @incl.nil? || @incl.empty? # Here we are setting to nil all attributes that have been included but not found in the triplestore - found.uniq.each do |model_id| - m = @models_by_id[model_id] + @models_by_id.each do |id, m| @incl.each do |attr_to_incl| is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) next if attr_to_incl.to_s.eql?('unmapped') || is_handler @@ -133,7 +147,7 @@ def init_unloaded_attributes(found, list_attributes) def get_value_object(id, objects_new, object, list_attributes, predicate) object = object.object if object && !(object.is_a? RDF::URI) range_for_v = @klass.range(predicate) - + if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? if objects_new.include?(object) @@ -156,7 +170,7 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) if object.nil? object = pre.nil? ? [] : pre - else + else object = pre.nil? ? [object] : (Array(pre).dup << object) object.uniq! end @@ -170,8 +184,8 @@ def add_object_to_model(id, objects, current_obj, predicate) if @models_by_id[id].respond_to?(:klass) @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? elsif !@models_by_id[id].class.handler?(predicate) && - !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id + !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id @lang_filter.set_model_value(@models_by_id[id], predicate, objects, current_obj) end end @@ -329,7 +343,7 @@ def model_set_collection_attributes(models_by_id, objects_new) collection_attribute = obj_new[:klass].collection_opts obj_new[collection_attribute] = collection_value elsif obj_new.class.respond_to?(:collection_opts) && - obj_new.class.collection_opts.instance_of?(Symbol) + obj_new.class.collection_opts.instance_of?(Symbol) collection_attribute = obj_new.class.collection_opts obj_new.send("#{collection_attribute}=", collection_value) end @@ -370,8 +384,8 @@ def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new if objects_new.include?(object) object = objects_new[object] elsif !range_for_v.inmutable? - pre_val = get_pre_val(id, models_by_id, object, v, read_only) - object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) + pre_val = get_pre_val(id, models_by_id, object, v) + object = get_object_from_range(pre_val, embed_struct, object, objects_new, v) else object = range_for_v.find(object).first end @@ -399,8 +413,8 @@ def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) def get_pre_val(id, models_by_id, object, predicate) pre_val = nil if models_by_id[id] && - ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || - models_by_id[id].loaded_attributes.include?(predicate)) + ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || + models_by_id[id].loaded_attributes.include?(predicate)) pre_val = if !@read_only models_by_id[id].instance_variable_get("@#{predicate}") else @@ -413,13 +427,17 @@ def get_pre_val(id, models_by_id, object, predicate) end def add_unmapped_to_model(sol) - predicate = sol[:attributeProperty].to_s.to_sym - return unless @properties_to_include[predicate] - - id = sol[:id] - value = sol[:attributeObject] - - @lang_filter.set_unmapped_value(@models_by_id[id], @properties_to_include[predicate][:uri], value) + predicates = find_predicate(sol[:attributeProperty]) + predicates.each do |predicate| + if Goo.backend_4s? || Goo.backend_gb? + predicate = @properties_to_include[predicate][:uri] + else + predicate = sol[:attributeProperty] + end + id = sol[:id] + value = sol[:attributeObject] + @lang_filter.set_unmapped_value(@models_by_id[id], predicate, value) + end end def add_aggregations_to_model(sol) diff --git a/lib/goo/sparql/sparql.rb b/lib/goo/sparql/sparql.rb index 6fa1d582..d5315cde 100644 --- a/lib/goo/sparql/sparql.rb +++ b/lib/goo/sparql/sparql.rb @@ -8,3 +8,4 @@ require_relative "triples" require_relative "loader" require_relative "queries" +require_relative 'processor' diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake new file mode 100644 index 00000000..d9b334f4 --- /dev/null +++ b/rakelib/docker_based_test.rake @@ -0,0 +1,107 @@ +# Rake tasks for running unit tests with backend services running as docker containers + +desc 'Run unit tests with docker based backend' +namespace :test do + namespace :docker do + task :up do + system("docker compose up -d") || abort("Unable to start docker containers") + end + task :down do + #system("docker compose --profile fs --profile ag stop") + #system("docker compose --profile fs --profile ag kill") + end + desc "run tests with docker AG backend" + task :ag do + ENV["GOO_BACKEND_NAME"]="allegrograph" + ENV["GOO_PORT"]="10035" + ENV["GOO_PATH_QUERY"]="/repositories/ontoportal_test" + ENV["GOO_PATH_DATA"]="/repositories/ontoportal_test/statements" + ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal_test/statements" + ENV["COMPOSE_PROFILES"]="ag" + Rake::Task["test:docker:up"].invoke + # AG takes some time to start and create databases/accounts + # TODO: replace system curl command with native ruby code + unless system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") + printf("waiting for AllegroGraph container to initialize") + sec = 0 + until system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") do + sleep(1) + printf(".") + sec += 1 + end + end + puts + system("docker compose ps") # TODO: remove after GH actions troubleshooting is complete + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + desc "run tests with docker 4store backend" + task :fs do + ENV["GOO_PORT"]="9000" + ENV["COMPOSE_PROFILES"]='fs' + Rake::Task["test:docker:up"].invoke + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + desc "run tests with docker Virtuoso backend" + task :vo do + ENV["GOO_BACKEND_NAME"]="virtuoso" + ENV["GOO_PORT"]="8890" + ENV["GOO_PATH_QUERY"]="/sparql" + ENV["GOO_PATH_DATA"]="/sparql" + ENV["GOO_PATH_UPDATE"]="/sparql" + ENV["COMPOSE_PROFILES"]="vo" + Rake::Task["test:docker:up"].invoke + # + unless system("curl -sf http://localhost:8890/sparql || exit 1") + printf("waiting for Virtuoso container to initialize") + sec = 0 + until system("curl -sf http://localhost:8890/sparql || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs virtuoso-ut") + abort(" Virtuoso container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + + desc "run tests with docker GraphDb backend" + task :gb do + ENV["GOO_BACKEND_NAME"]="graphdb" + ENV["GOO_PORT"]="7200" + ENV["GOO_PATH_QUERY"]="/repositories/ontoportal" + ENV["GOO_PATH_DATA"]="/repositories/ontoportal/statements" + ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal/statements" + ENV["COMPOSE_PROFILES"]="gb" + Rake::Task["test:docker:up"].invoke + + #system("docker compose cp ./test/data/graphdb-repo-config.ttl graphdb:/opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl") + #system("docker compose cp ./test/data/graphdb-test-load.nt graphdb:/opt/graphdb/dist/configs/templates/graphdb-test-load.nt") + #system('docker compose exec graphdb sh -c "importrdf load -f -c /opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/graphdb-test-load.nt ;"') + unless system("curl -sf http://localhost:7200/repositories || exit 1") + printf("waiting for Graphdb container to initialize") + sec = 0 + until system("curl -sf http://localhost:7200/repositories || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs graphdb") + abort(" Graphdb container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + end +end diff --git a/test/app/bioportal.rb b/test/app/bioportal.rb index 2cef2c2b..2b3c57b3 100644 --- a/test/app/bioportal.rb +++ b/test/app/bioportal.rb @@ -4,8 +4,6 @@ require_relative '../test_case' require_relative './query_profiler' -GooTest.configure_goo - module Test module BioPortal class Ontology < Goo::Base::Resource diff --git a/test/app/models.rb b/test/app/models.rb index 5aeb2a2e..876b70df 100644 --- a/test/app/models.rb +++ b/test/app/models.rb @@ -1,7 +1,5 @@ require_relative '../test_case' -GooTest.configure_goo - module Test module Models diff --git a/test/app/test_app.rb b/test/app/test_app.rb index 26a88d60..4d444ad4 100644 --- a/test/app/test_app.rb +++ b/test/app/test_app.rb @@ -1,6 +1,4 @@ require_relative '../test_case' require_relative 'bioportal' -GooTest.configure_goo - binding.pry diff --git a/test/console.rb b/test/console.rb index e64d4adf..39d19aa2 100644 --- a/test/console.rb +++ b/test/console.rb @@ -1,5 +1,4 @@ require_relative "../lib/goo.rb" require_relative "./test_case.rb" -GooTest.configure_goo binding.pry diff --git a/test/data/graphdb-repo-config.ttl b/test/data/graphdb-repo-config.ttl new file mode 100644 index 00000000..9200da9a --- /dev/null +++ b/test/data/graphdb-repo-config.ttl @@ -0,0 +1,33 @@ +@prefix rdfs: . +@prefix rep: . +@prefix sail: . +@prefix xsd: . + +<#ontoportal> a rep:Repository; + rep:repositoryID "ontoportal"; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository"; + [ + "http://example.org/owlim#"; + "false"; + ""; + "true"; + "false"; + "true"; + "true"; + "32"; + "10000000"; + ""; + "true"; + ""; + "0"; + "0"; + "false"; + "file-repository"; + "rdfsplus-optimized"; + "storage"; + "false"; + sail:sailType "owlim:Sail" + ] + ]; + rdfs:label "" . \ No newline at end of file diff --git a/test/data/graphdb-test-load.nt b/test/data/graphdb-test-load.nt new file mode 100644 index 00000000..e69de29b diff --git a/test/data/virtuoso.init b/test/data/virtuoso.init new file mode 100644 index 00000000..e5f4bd85 --- /dev/null +++ b/test/data/virtuoso.init @@ -0,0 +1,240 @@ + ; + ; virtuoso.ini + ; + ; Configuration file for the OpenLink Virtuoso VDBMS Server + ; + ; To learn more about this product, or any other product in our + ; portfolio, please check out our web site at: + ; + ; http://virtuoso.openlinksw.com/ + ; + ; or contact us at: + ; + ; general.information@openlinksw.com + ; + ; If you have any technical questions, please contact our support + ; staff at: + ; + ; technical.support@openlinksw.com + ; + ; + ; Database setup + ; + [Database] + DatabaseFile = ../database/virtuoso.db + ErrorLogFile = ../database/virtuoso.log + LockFile = ../database/virtuoso.lck + TransactionFile = ../database/virtuoso.trx + xa_persistent_file = ../database/virtuoso.pxa + ErrorLogLevel = 7 + FileExtend = 200 + MaxCheckpointRemap = 2000 + Striping = 0 + TempStorage = TempDatabase + + [TempDatabase] + DatabaseFile = ../database/virtuoso-temp.db + TransactionFile = ../database/virtuoso-temp.trx + MaxCheckpointRemap = 2000 + Striping = 0 + + ; + ; Server parameters + ; + [Parameters] + ServerPort = 1111 + LiteMode = 0 + DisableUnixSocket = 1 + DisableTcpSocket = 0 + ;SSLServerPort = 2111 + ;SSLCertificate = cert.pem + ;SSLPrivateKey = pk.pem + ;X509ClientVerify = 0 + ;X509ClientVerifyDepth = 0 + ;X509ClientVerifyCAFile = ca.pem + MaxClientConnections = 10 + CheckpointInterval = 60 + O_DIRECT = 0 + CaseMode = 2 + MaxStaticCursorRows = 5000 + CheckpointAuditTrail = 0 + AllowOSCalls = 0 + SchedulerInterval = 10 + DirsAllowed = ., ../vad, /usr/share/proj + ThreadCleanupInterval = 0 + ThreadThreshold = 10 + ResourcesCleanupInterval = 0 + FreeTextBatchSize = 100000 + SingleCPU = 0 + VADInstallDir = ../vad/ + PrefixResultNames = 0 + RdfFreeTextRulesSize = 100 + IndexTreeMaps = 64 + MaxMemPoolSize = 200000000 + PrefixResultNames = 0 + MacSpotlight = 0 + MaxQueryMem = 2G ; memory allocated to query processor + VectorSize = 1000 ; initial parallel query vector (array of query operations) size + MaxVectorSize = 1000000 ; query vector size threshold. + AdjustVectorSize = 0 + ThreadsPerQuery = 4 + AsyncQueueMaxThreads = 10 + ;; + ;; When running with large data sets, one should configure the Virtuoso + ;; process to use between 2/3 to 3/5 of free system memory and to stripe + ;; storage on all available disks. + ;; + ;; Uncomment next two lines if there is 2 GB system memory free + ;NumberOfBuffers = 170000 + ;MaxDirtyBuffers = 130000 + ;; Uncomment next two lines if there is 4 GB system memory free + ;NumberOfBuffers = 340000 + ; MaxDirtyBuffers = 250000 + ;; Uncomment next two lines if there is 8 GB system memory free + ;NumberOfBuffers = 680000 + ;MaxDirtyBuffers = 500000 + ;; Uncomment next two lines if there is 16 GB system memory free + ;NumberOfBuffers = 1360000 + ;MaxDirtyBuffers = 1000000 + ;; Uncomment next two lines if there is 32 GB system memory free + ;NumberOfBuffers = 2720000 + ;MaxDirtyBuffers = 2000000 + ;; Uncomment next two lines if there is 48 GB system memory free + ;NumberOfBuffers = 4000000 + ;MaxDirtyBuffers = 3000000 + ;; Uncomment next two lines if there is 64 GB system memory free + ;NumberOfBuffers = 5450000 + ;MaxDirtyBuffers = 4000000 + ;; + ;; Note the default settings will take very little memory + ;; but will not result in very good performance + ;; + NumberOfBuffers = 10000 + MaxDirtyBuffers = 6000 + + [HTTPServer] + ServerPort = 8890 + ServerRoot = ../vsp + MaxClientConnections = 10 + DavRoot = DAV + EnabledDavVSP = 0 + HTTPProxyEnabled = 0 + TempASPXDir = 0 + DefaultMailServer = localhost:25 + MaxKeepAlives = 10 + KeepAliveTimeout = 10 + MaxCachedProxyConnections = 10 + ProxyConnectionCacheTimeout = 15 + HTTPThreadSize = 280000 + HttpPrintWarningsInOutput = 0 + Charset = UTF-8 + ;HTTPLogFile = logs/http.log + MaintenancePage = atomic.html + EnabledGzipContent = 1 + + [AutoRepair] + BadParentLinks = 0 + + [Client] + SQL_PREFETCH_ROWS = 100 + SQL_PREFETCH_BYTES = 16000 + SQL_QUERY_TIMEOUT = 0 + SQL_TXN_TIMEOUT = 0 + ;SQL_NO_CHAR_C_ESCAPE = 1 + ;SQL_UTF8_EXECS = 0 + ;SQL_NO_SYSTEM_TABLES = 0 + ;SQL_BINARY_TIMESTAMP = 1 + ;SQL_ENCRYPTION_ON_PASSWORD = -1 + + [VDB] + ArrayOptimization = 0 + NumArrayParameters = 10 + VDBDisconnectTimeout = 1000 + KeepConnectionOnFixedThread = 0 + + [Replication] + ServerName = db-BIONIC-PORT + ServerEnable = 1 + QueueMax = 50000 + + ; + ; Striping setup + ; + ; These parameters have only effect when Striping is set to 1 in the + ; [Database] section, in which case the DatabaseFile parameter is ignored. + ; + ; With striping, the database is spawned across multiple segments + ; where each segment can have multiple stripes. + ; + ; Format of the lines below: + ; Segment = , [, .. ] + ; + ; must be ordered from 1 up. + ; + ; The is the total size of the segment which is equally divided + ; across all stripes forming the segment. Its specification can be in + ; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks + ; (b, the default) + ; + ; Note that the segment size must be a multiple of the database page size + ; which is currently 8k. Also, the segment size must be divisible by the + ; number of stripe files forming the segment. + ; + ; The example below creates a 200 meg database striped on two segments + ; with two stripes of 50 meg and one of 100 meg. + ; + ; You can always add more segments to the configuration, but once + ; added, do not change the setup. + ; + [Striping] + Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + Segment2 = 100M, db-seg2-1.db + ;... + ;[TempStriping] + ;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + ;Segment2 = 100M, db-seg2-1.db + ;... + ;[Ucms] + ;UcmPath = + ;Ucm1 = + ;Ucm2 = + ;... + + [Zero Config] + ServerName = virtuoso (BIONIC-PORT) + ;ServerDSN = ZDSN + ;SSLServerName = + ;SSLServerDSN = + + [Mono] + ;MONO_TRACE = Off + ;MONO_PATH = + ;MONO_ROOT = + ;MONO_CFG_DIR = + ;virtclr.dll = + + [URIQA] + DynamicLocal = 0 + DefaultHost = localhost:8890 + + [SPARQL] + ;ExternalQuerySource = 1 + ;ExternalXsltSource = 1 + ;DefaultGraph = http://localhost:8890/dataspace + ;ImmutableGraphs = http://localhost:8890/dataspace + ResultSetMaxRows = 10000 + MaxConstructTriples = 10000 + MaxQueryCostEstimationTime = 400 ; in seconds + MaxQueryExecutionTime = 60 ; in seconds + DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100 + DeferInferenceRulesInit = 0 ; controls inference rules loading + MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited) + ;LabelInferenceName = facets ; Only needed when using the Faceted Browser + ;PingService = http://rpc.pingthesemanticweb.com/ + + [Plugins] + LoadPath = ../hosting + Load1 = plain, geos + Load2 = plain, graphql + Load3 = plain, proj4 + Load4 = plain, shapefileio \ No newline at end of file diff --git a/test/test_basic_persistence.rb b/test/test_basic_persistence.rb index 0cafdfbd..665a5d60 100644 --- a/test/test_basic_persistence.rb +++ b/test/test_basic_persistence.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module Dep class Ontology < Goo::Base::Resource model :ontology, name_with: :name diff --git a/test/test_cache.rb b/test/test_cache.rb index aecbdef3..f5659ca4 100644 --- a/test/test_cache.rb +++ b/test/test_cache.rb @@ -1,7 +1,4 @@ require_relative 'test_case' - -GooTest.configure_goo - require_relative 'models' class TestCache < MiniTest::Unit::TestCase @@ -103,7 +100,7 @@ def test_cache_models_back_door data = " " + " " + " ." - + Goo.sparql_data_client.append_triples(Student.type_uri,data,"application/x-turtle") programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) .include(:students).all @@ -131,11 +128,16 @@ def x.response_backup *args def x.response *args raise Exception, "Should be a successful hit" end - programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) - .include(:students).all + begin + programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) + .include(:students).all + rescue Exception + assert false, "should be cached" + end + #from cache - assert programs.length == 1 - assert_raises Exception do + assert_equal 1, programs.length + assert_raises Exception do #different query programs = Program.where(name: "BioInformatics X", university: [ name: "Stanford" ]).all end diff --git a/test/test_case.rb b/test/test_case.rb index 82d9c50e..af7f2a84 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -20,6 +20,7 @@ MiniTest::Unit.autorun require_relative "../lib/goo.rb" +require_relative '../config/config' class GooTest @@ -41,9 +42,8 @@ def _run_suites(suites, type) end def _run_suite(suite, type) - %[1,5,10,20] ret = [] - [1,5,10,20].each do |slice_size| + [Goo.slice_loading_size].each do |slice_size| puts "\nrunning test with slice_loading_size=#{slice_size}" Goo.slice_loading_size=slice_size begin @@ -59,35 +59,6 @@ def _run_suite(suite, type) MiniTest::Unit.runner = GooTest::Unit.new - def self.configure_goo - if not Goo.configure? - Goo.configure do |conf| - conf.add_redis_backend(host: "localhost") - conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) - conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) - conf.add_namespace(:owl, RDF::Vocabulary.new("http://www.w3.org/2002/07/owl#")) - conf.add_namespace(:rdfs, RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")) - conf.add_namespace(:goo, RDF::Vocabulary.new("http://goo.org/default/"),default=true) - conf.add_namespace(:metadata, RDF::Vocabulary.new("http://goo.org/metadata/")) - conf.add_namespace(:foaf, RDF::Vocabulary.new("http://xmlns.com/foaf/0.1/")) - conf.add_namespace(:rdf, RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#")) - conf.add_namespace(:tiger, RDF::Vocabulary.new("http://www.census.gov/tiger/2002/vocab#")) - conf.add_namespace(:bioportal, RDF::Vocabulary.new("http://data.bioontology.org/")) - conf.add_namespace(:nemo, RDF::Vocabulary.new("http://purl.bioontology.org/NEMO/ontology/NEMO_annotation_properties.owl#")) - conf.add_sparql_backend( - :main, - backend_name: "4store", - query: "http://localhost:9000/sparql/", - data: "http://localhost:9000/data/", - update: "http://localhost:9000/update/", - options: { rules: :NONE } - ) - conf.add_search_backend(:main, service: "http://localhost:8983/solr/term_search_core1") - conf.use_cache = false - end - end - end - def self.triples_for_subject(resource_id) rs = Goo.sparql_query_client.query("SELECT * WHERE { #{resource_id.to_ntriples} ?p ?o . }") count = 0 diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index 4f0a8676..afee11f1 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -1,10 +1,8 @@ require_relative 'test_case' -GooTest.configure_goo - module TestChunkWrite - ONT_ID = "http:://example.org/data/nemo" - ONT_ID_EXTRA = "http:://example.org/data/nemo/extra" + ONT_ID = "http://example.org/data/nemo" + ONT_ID_EXTRA = "http://example.org/data/nemo/extra" class TestChunkWrite < MiniTest::Unit::TestCase @@ -20,14 +18,14 @@ def self.after_suite _delete end + def setup + self.class._delete + end + + def self._delete - graphs = [ONT_ID,ONT_ID_EXTRA] - url = Goo.sparql_data_client.url - graphs.each do |graph| - # This bypasses the chunks stuff - params = { method: :delete, url: "#{url.to_s}#{graph.to_s}", timeout: nil } - RestClient::Request.execute(params) - end + graphs = [ONT_ID, ONT_ID_EXTRA] + graphs.each { |graph| Goo.sparql_data_client.delete_graph(graph) } end def test_put_data @@ -74,20 +72,13 @@ def test_reentrant_queries ntriples_file_path = "./test/data/nemo_ontology.ntriples" # Bypass in chunks - url = Goo.sparql_data_client.url - params = { - method: :put, - url: "#{url.to_s}#{ONT_ID}", - payload: File.read(ntriples_file_path), - headers: {content_type: "application/x-turtle"}, - timeout: nil - } + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { Goo.sparql_data_client.put_triples(ONT_ID_EXTRA, ntriples_file_path, mime_type="application/x-turtle") - sleep(1.5) } + count_queries = 0 tq = Thread.new { 5.times do @@ -103,16 +94,16 @@ def test_reentrant_queries assert_equal 5, count_queries tput.join - triples_no_bnodes = 25256 + count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| - assert_equal triples_no_bnodes, sol[:c].object + assert_includes [25256, 50512], sol[:c].object end tdelete = Thread.new { Goo.sparql_data_client.delete_graph(ONT_ID_EXTRA) - sleep(1.5) } + count_queries = 0 tq = Thread.new { 5.times do @@ -124,9 +115,8 @@ def test_reentrant_queries end } tq.join - assert tdelete.alive? - assert_equal 5, count_queries tdelete.join + assert_equal 5, count_queries count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| @@ -136,16 +126,7 @@ def test_reentrant_queries def test_query_flood ntriples_file_path = "./test/data/nemo_ontology.ntriples" - - # Bypass in chunks - url = Goo.sparql_data_client.url - params = { - method: :put, - url: "#{url.to_s}#{ONT_ID}", - payload: File.read(ntriples_file_path), - headers: {content_type: "application/x-turtle"}, - timeout: nil - } + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { @@ -158,27 +139,34 @@ def test_query_flood 50.times do |j| oq = "SELECT (count(?s) as ?c) WHERE { ?s a ?o }" Goo.sparql_query_client.query(oq).each do |sol| - assert sol[:c].object > 0 + refute_equal 0, sol[:c] end end } end - log_status = [] - Thread.new { - 10.times do |i| - log_status << Goo.sparql_query_client.status - sleep(1.2) + threads.join + + if Goo.backend_4s? + log_status = [] + Thread.new { + 10.times do |i| + log_status << Goo.sparql_query_client.status + end + } + + threads.each do |t| + t.join end - } + tput.join - threads.each do |t| - t.join + assert log_status.map { |x| x[:outstanding] }.max > 0 + assert_equal 16, log_status.map { |x| x[:running] }.max end - tput.join + end - assert log_status.map { |x| x[:outstanding] }.max > 0 - assert_equal 16, log_status.map { |x| x[:running] }.max + def self.params_for_backend(method, graph_name, ntriples_file_path = nil) + Goo.sparql_data_client.params_for_backend(graph_name, File.read(ntriples_file_path), "text/turtle", method) end end diff --git a/test/test_collections.rb b/test/test_collections.rb index 2177c669..65d1f46d 100644 --- a/test/test_collections.rb +++ b/test/test_collections.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - #collection on attribute class Issue < Goo::Base::Resource model :issue, collection: :owner, name_with: :description diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index 9a8f03df..52b0ac78 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -1,6 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo class NewPersonModel < Goo::Base::Resource model :person_model_new, name_with: :name diff --git a/test/test_enum.rb b/test/test_enum.rb index db41c343..eaf13af2 100644 --- a/test/test_enum.rb +++ b/test/test_enum.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestEnum VALUES = ["uploaded","removed","archived"] diff --git a/test/test_index.rb b/test/test_index.rb index 4d781973..bf4b8937 100644 --- a/test/test_index.rb +++ b/test/test_index.rb @@ -1,8 +1,6 @@ require_relative 'test_case' require_relative './app/models' -GooTest.configure_goo - module TestIndex class TestSchemaless < MiniTest::Unit::TestCase diff --git a/test/test_inmutable.rb b/test/test_inmutable.rb index 9d6037c0..0b1a8c2a 100644 --- a/test/test_inmutable.rb +++ b/test/test_inmutable.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestInmutable class Status < Goo::Base::Resource model :status, :inmutable, name_with: :code diff --git a/test/test_inverse.rb b/test/test_inverse.rb index e926a572..2fbb4479 100644 --- a/test/test_inverse.rb +++ b/test/test_inverse.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class Task < Goo::Base::Resource model :task, name_with: :description attribute :description, enforce: [ :existence, :unique] diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index 8f904d8b..3074683c 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestComplex class Submission < Goo::Base::Resource @@ -78,12 +76,13 @@ def self.before_suite if GooTest.count_pattern("?s ?p ?o") > 100000 raise Exception, "Too many triples in KB, does not seem right to run tests" end - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def self.after_suite Goo.use_cache = false - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def test_method_handler @@ -185,6 +184,11 @@ def test_multiple_collection() def test_collection() + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -313,6 +317,11 @@ def test_two_resources_same_id def test_parents_inverse_children + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -320,6 +329,7 @@ def test_parents_inverse_children submission = Submission.find("submission1").first end + terms = Term.in(submission) terms.each do |t| t.delete @@ -653,6 +663,11 @@ def test_empty_pages assert_equal 0, GooTest.count_pattern("GRAPH #{submission.id.to_ntriples} { #{t.id.to_ntriples} ?p ?o . }") end + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + terms = [] 10.times do |i| term = Term.new @@ -679,6 +694,12 @@ def test_empty_pages end def test_readonly_pages_with_include + + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -691,6 +712,7 @@ def test_readonly_pages_with_include assert_equal(0, GooTest.count_pattern("GRAPH #{submission.id.to_ntriples} { #{t.id.to_ntriples} ?p ?o . }")) end + terms = [] 10.times do |i| term = Term.new diff --git a/test/test_name_with.rb b/test/test_name_with.rb index 7ba4df42..c2f226a4 100644 --- a/test/test_name_with.rb +++ b/test/test_name_with.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class NameWith < Goo::Base::Resource model :name_with, name_with: lambda { |s| id_generator(s) } attribute :name, enforce: [ :existence, :string, :unique ] diff --git a/test/test_namespaces.rb b/test/test_namespaces.rb index 78ba9a93..6c4bddc0 100644 --- a/test/test_namespaces.rb +++ b/test/test_namespaces.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class NamespacesModel < Goo::Base::Resource model :namespaces, namespace: :rdfs, name_with: :name attribute :name, enforce: [ :existence, :string, :unique ], namespace: :skos diff --git a/test/test_read_only.rb b/test/test_read_only.rb index 268f7c86..9855decf 100644 --- a/test/test_read_only.rb +++ b/test/test_read_only.rb @@ -1,8 +1,6 @@ require_relative 'test_case' require_relative 'test_where' -GooTest.configure_goo - module TestReadOnly class TestReadOnlyWithStruct < TestWhere diff --git a/test/test_schemaless.rb b/test/test_schemaless.rb index f95a17d5..42084eb8 100644 --- a/test/test_schemaless.rb +++ b/test/test_schemaless.rb @@ -1,8 +1,6 @@ require_relative 'test_case' -GooTest.configure_goo - -module TestSChemaless +module TestSchemaless ONT_ID = "http:://example.org/data/nemo" @@ -118,6 +116,9 @@ def test_find_include_schemaless where = Klass.find(cognition_term).in(ontology).include(:unmapped) k = where.first enter = 0 + + assert k.unmapped.keys.include?(Goo.vocabulary(:nemo)[:definition]) + k.unmapped.each do |p,vals| if p.to_s == Goo.vocabulary(:nemo)[:synonym].to_s enter += 1 @@ -185,7 +186,19 @@ def test_index_order_by end end + + def test_all_pages_loop + ontology = Ontology.find(RDF::URI.new(ONT_ID)).first + page = 1 + count = 0 + begin + paging = Klass.in(ontology).page(page,50).all + count += paging.size + page = paging.next_page if paging.next? + end while(paging.next?) + assert_equal count, Klass.in(ontology).count + end def test_page_reuse_predicates ontology = Ontology.find(RDF::URI.new(ONT_ID)).first paging = Klass.in(ontology).include(:unmapped).page(1,100) @@ -208,7 +221,7 @@ def test_page_reuse_predicates all_ids << k.id end total += page.length - paging.page(page.next_page) if page.next? + paging.page(page.next_page, 100) if page.next? assert page.aggregate == 1713 end while(page.next?) assert all_ids.length == all_ids.uniq.length diff --git a/test/test_search.rb b/test/test_search.rb index 433dee86..180062d1 100644 --- a/test/test_search.rb +++ b/test/test_search.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestSearch class TermSearch < Goo::Base::Resource diff --git a/test/test_validators.rb b/test/test_validators.rb index 5110da80..e5c3a9fe 100644 --- a/test/test_validators.rb +++ b/test/test_validators.rb @@ -1,6 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo require_relative 'models' class Person < Goo::Base::Resource diff --git a/test/test_where.rb b/test/test_where.rb index c80fed33..a95131b6 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -1,7 +1,4 @@ require_relative 'test_case' - -GooTest.configure_goo - require_relative 'models' class TestWhere < MiniTest::Unit::TestCase @@ -73,7 +70,7 @@ def test_where_simple "http://example.org/program/Stanford/CompSci", "http://example.org/program/Stanford/Medicine" ] - assert_equal program_ids, st.programs.map { |x| x.id.to_s }.sort + assert_equal program_ids, st.programs.map { |x| x.id.to_s }.sort end def test_all @@ -104,7 +101,7 @@ def test_where_2levels programs = Program.where(name: "BioInformatics", university: [ address: [ country: "UK" ]]).all assert programs.length == 1 assert programs.first.id.to_s["Southampton/BioInformatics"] - + #any program from universities in the US programs = Program.where(university: [ address: [ country: "US" ]]).include([:name]).all assert programs.length == 3 @@ -121,15 +118,15 @@ def test_where_2levels_inverse #equivalent unis = University.where(address: [country: "US"]) - .and(programs: [category: [code: "Biology"]]).all + .and(programs: [category: [code: "Biology"]]).all assert unis.length == 1 assert unis.first.id.to_s == "http://goo.org/default/university/Stanford" end def test_embed_include programs = Program.where.include(:name) - .include(university: [:name]) - .include(category: [:code]).all + .include(university: [:name]) + .include(category: [:code]).all assert programs.length == 9 programs.each do |p| @@ -180,7 +177,7 @@ def test_iterative_include_in_place #two levels unis = University.where.all unis_return = University.where.models(unis) - .include(programs: [:name, students: [:name]]).to_a + .include(programs: [:name, students: [:name]]).to_a assert unis_return.object_id == unis.object_id return_object_id = unis.map { |x| x.object_id }.uniq.sort unis_object_id = unis.map { |x| x.object_id }.uniq.sort @@ -262,7 +259,18 @@ def test_embed_two_levels end end + def test_fetch_remaining + students = Student.where(enrolled:RDF::URI.new("http://example.org/program/Stanford/BioInformatics")) + .include(:name, :birth_date, enrolled: [:name]).all + + + s = students.select { |x| x.name['Daniel'] }.first + refute_nil s + assert_equal 2, s.enrolled.size + end + def test_paging_with_filter_order + skip('pagination with filter and order does not work in 4s') if Goo.backend_4s? f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') total_count = Student.where.filter(f).count @@ -276,6 +284,14 @@ def test_paging_with_filter_order assert_equal total_count, page_1.size + page_2.size end + def test_two_level_include + programs = Program.where.include(:name).all + r = Program.where.models(programs).include(students: [:name]).all + r.each do |p| + refute_nil p.students + end + end + def test_unique_object_references #NOTE: unique references does not apply across different slice loading @@ -335,7 +351,7 @@ def test_unique_object_references def test_complex_include #Students in a university by name students = Student.where(enrolled: [university: [name: "Stanford"]]) - .include(:name) + .include(:name) .include(enrolled: [:name, university: [ :address ]]).all assert students.map { |x| x.name }.sort == ["Daniel","John","Susan"] @@ -345,7 +361,7 @@ def test_complex_include assert_instance_of University, p.university assert_instance_of Array, p.university.addresses assert_instance_of Address, p.university.addresses.first - assert_raises Goo::Base::AttributeNotLoaded do + assert_raises Goo::Base::AttributeNotLoaded do p.university.addresses.first.country end end @@ -405,23 +421,23 @@ def test_where_union_pattern def test_where_direct_attributes st = Student.where(name: "Daniel") - .or(name: "Louis") - .or(name: "Lee") - .or(name: "John").all + .or(name: "Louis") + .or(name: "Lee") + .or(name: "John").all assert st.length == 4 st = Student.where(name: "Daniel") - .and(name: "John").all + .and(name: "John").all assert st.length == 0 st = Student.where(name: "Daniel") - .and(birth_date: DateTime.parse('1978-01-04')).all + .and(birth_date: DateTime.parse('1978-01-04')).all assert st.length == 1 assert st.first.id.to_s["Daniel"] st = Student.where(name: "Daniel") - .or(name: "Louis") - .and(birth_date: DateTime.parse('1978-01-04')) + .or(name: "Louis") + .and(birth_date: DateTime.parse('1978-01-04')) assert st.length == 1 assert st.first.id.to_s["Daniel"] @@ -457,8 +473,8 @@ def test_combine_where_patterns_with_include st.each do |p| assert (p.name == "Susan" || p.name == "Daniel") assert Array, p.enrolled - assert (p.name == "Susan" && p.enrolled.length == 1) || - (p.name == "Daniel" && p.enrolled.length == 2) + assert (p.name == "Susan" && p.enrolled.length == 1) || + (p.name == "Daniel" && p.enrolled.length == 2) assert String, p.enrolled.first.university.address.first.country end end @@ -470,31 +486,31 @@ def test_filter f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == ["http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Lee", - "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Robert"] + "http://goo.org/default/student/Lee", + "http://goo.org/default/student/Louis", + "http://goo.org/default/student/Robert"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse('1978-01-01')) .or(Goo::Filter.new(:birth_date) >= DateTime.parse('1978-01-07')) st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Robert", - "http://goo.org/default/student/Susan"] + "http://goo.org/default/student/Robert", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse('1978-01-01')) .or(Goo::Filter.new(:name) == "Daniel") st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Susan"] + "http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-02')) .and(Goo::Filter.new(:birth_date) < DateTime.parse('1978-01-06')) st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Tim"] + "http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Louis", + "http://goo.org/default/student/Tim"] f = Goo::Filter.new(enrolled: [ :credits ]) > 8 @@ -504,8 +520,8 @@ def test_filter #students without awards f = Goo::Filter.new(:awards).unbound st = Student.where.filter(f) - .include(:name) - .all + .include(:name) + .all assert st.map { |x| x.name }.sort == ["John","Tim","Louis","Lee","Robert"].sort #unbound on some non existing property @@ -540,7 +556,7 @@ def test_aggregated sts = Student.where.include(:name).aggregate(:count, :enrolled).all sts.each do |st| assert (st.name == "Daniel" && st.aggregates.first.value == 2) || - st.aggregates.first.value == 1 + st.aggregates.first.value == 1 end #students enrolled in more than 1 program and get the programs name From 2206084a977e73275a437f2eaa79fd2b2753d46b Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 31 Jan 2024 20:00:11 +0100 Subject: [PATCH 054/106] Refactor: clean model settings module code (#52) * remove old file no more used * extract attribute settings module from the model settings module * remove the inmutable feature as deprecated and not used * rename callbacks method names --- Rakefile | 6 - lib/goo/base/attribute_proxy.rb | 57 ------ lib/goo/base/resource.rb | 11 +- lib/goo/base/settings/attribute.rb | 233 +++++++++++++++++++++ lib/goo/base/settings/settings.rb | 317 ++++------------------------- lib/goo/base/where.rb | 5 - lib/goo/sparql/solutions_mapper.rb | 8 +- test/app/bioportal.rb | 164 +++++++-------- test/test_inmutable.rb | 100 --------- 9 files changed, 357 insertions(+), 544 deletions(-) delete mode 100644 lib/goo/base/attribute_proxy.rb create mode 100644 lib/goo/base/settings/attribute.rb delete mode 100644 test/test_inmutable.rb diff --git a/Rakefile b/Rakefile index 42ddf39d..80c18410 100644 --- a/Rakefile +++ b/Rakefile @@ -50,12 +50,6 @@ Rake::TestTask.new do |t| t.warning = false end -Rake::TestTask.new do |t| - t.name = "test:inmutable" - t.test_files = FileList['test/test_inmutable.rb'] - t.warning = false -end - Rake::TestTask.new do |t| t.name = "test:inverse" t.test_files = FileList['test/test_inverse.rb'] diff --git a/lib/goo/base/attribute_proxy.rb b/lib/goo/base/attribute_proxy.rb deleted file mode 100644 index 5600256e..00000000 --- a/lib/goo/base/attribute_proxy.rb +++ /dev/null @@ -1,57 +0,0 @@ - -module Goo - module Base - - class AttributeValueProxy - def initialize(validator,internals) - @validator = validator - @internals = internals - end - - def cardinality_transform(attr, value, current_value) - if @validator.nil? - unless value.kind_of? Array - raise ArgumentError, "Attribute '#{attr} must be an array. No cardinality configured.'" - end - return value - end - if value.kind_of? Array - if @validator.options[:max] and value.length > @validator.options[:max] - #TODO review this - return value[0] if attr == :prefLabel - raise ArgumentError, "Attribute '#{attr}' does not satisfy max cardinality." - end - if @validator.options[:min] and value.length < @validator.options[:min] - raise ArgumentError, "Attribute '#{attr}' does not satisfy min cardinality." - end - if @validator.options[:max] and @validator.options[:max] == 1 - return value[0] - end - else #not an array - if (not @validator.options[:max]) or @validator.options[:max] > 1 - return [value] - end - if @validator.options[:max] and @validator.options[:max] == 1 - return value - end - if @validator.options[:min] and @validator.options[:min] > 0 - return [value] - end - end - if not value.kind_of? Array and current_value.kind_of? Array - raise ArgumentError, - "Multiple value objects cannot be replaced for non-array objects" - end - if value.kind_of? Array then value else [value] end - end - - def call(*args) - options = args[0] - value = options[:value] - attr = options[:attr] - current_value = options[:current_value] - tvalue = cardinality_transform(attr,value,current_value) - end - end - end -end diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 4d497f8b..72e6aaf6 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -168,7 +168,7 @@ def delete(*args) end @persistent = false @modified = true - self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances + return nil end @@ -259,7 +259,7 @@ def save(*opts) #call update callback before saving if callbacks - self.class.attributes_with_update_callbacks.each do |attr| + self.class.attributes_with_callbacks.each do |attr| Goo::Validators::Enforce.enforce_callbacks(self, attr) end end @@ -302,7 +302,7 @@ def save(*opts) @modified_attributes = Set.new @persistent = true - self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances + return self end @@ -427,11 +427,6 @@ def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false def self.find(id, *options) id = RDF::URI.new(id) if !id.instance_of?(RDF::URI) && self.name_with == :id id = id_from_unique_attribute(name_with(),id) unless id.instance_of?(RDF::URI) - if self.inmutable? && self.inm_instances && self.inm_instances[id] - w = Goo::Base::Where.new(self) - w.instance_variable_set("@result", [self.inm_instances[id]]) - return w - end options_load = { ids: [id], klass: self }.merge(options[-1] || {}) options_load[:find] = true where = Goo::Base::Where.new(self) diff --git a/lib/goo/base/settings/attribute.rb b/lib/goo/base/settings/attribute.rb new file mode 100644 index 00000000..dda5fdd5 --- /dev/null +++ b/lib/goo/base/settings/attribute.rb @@ -0,0 +1,233 @@ +module Goo + module Base + module Settings + module AttributeSettings + + def attribute(*args) + options = args.reverse + attr_name = options.pop + attr_name = attr_name.to_sym + options = options.pop + options = {} if options.nil? + + options[:enforce] ||= [] + + set_data_type(options) + set_no_list_by_default(options) + + @model_settings[:attributes][attr_name] = options + load_yaml_scheme_options(attr_name) + shape_attribute(attr_name) + namespace = attribute_namespace(attr_name) || @model_settings[:namespace] + vocab = Goo.vocabulary(namespace) + if options[:property].is_a?(Proc) + @attribute_uris[attr_name] = options[:property] + else + @attribute_uris[attr_name] = vocab[options[:property] || attr_name] + end + if options[:enforce].include?(:unique) && options[:enforce].include?(:list) + raise ArgumentError, ":list options cannot be combined with :list" + end + set_range(attr_name) + end + + def shape_attribute(attr) + return if attr == :resource_id + + attr = attr.to_sym + define_method("#{attr}=") do |*args| + if self.class.handler?(attr) + raise ArgumentError, "Method based attributes cannot be set" + end + if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) + raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." + end + @loaded_attributes.add(attr) + value = args[0] + unless args.last.instance_of?(Hash) and args.last[:on_load] + if self.persistent? and self.class.name_with == attr + raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." + end + prev = self.instance_variable_get("@#{attr}") + if !prev.nil? and !@modified_attributes.include?(attr) + if prev != value + @previous_values = @previous_values || {} + @previous_values[attr] = prev + end + end + @modified_attributes.add(attr) + end + if value.instance_of?(Array) + value = value.dup.freeze + end + self.instance_variable_set("@#{attr}", value) + end + define_method("#{attr}") do |*args| + attr_value = self.instance_variable_get("@#{attr}") + + if self.class.not_show_all_languages?(attr_value, args) + is_array = attr_value.values.first.is_a?(Array) + attr_value = attr_value.values.flatten + attr_value = attr_value.first unless is_array + end + + if self.class.handler?(attr) + if @loaded_attributes.include?(attr) + return attr_value + end + value = self.send("#{self.class.handler(attr)}") + self.instance_variable_set("@#{attr}", value) + @loaded_attributes << attr + return value + end + + if (not @persistent) or @loaded_attributes.include?(attr) + return attr_value + else + # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) + raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." + end + end + end + + def attributes(*options) + if options and options.length > 0 + option = options.first + + if option == :all + return @model_settings[:attributes].keys + end + + if option == :inverse + return @model_settings[:attributes].select { |_, v| v[:inverse] }.keys + end + + attrs = @model_settings[:attributes].select { |_, opts| opts[:enforce].include?(option) }.keys + + attrs.concat(attributes(:inverse)) if option == :list + + return attrs + end + + @model_settings[:attributes].select { |k, attr| attr[:inverse].nil? && !handler?(k) }.keys + + end + + def attributes_with_defaults + @model_settings[:attributes].select { |_, opts| opts[:default] }.keys + end + + def attribute_namespace(attr) + attribute_settings(attr)[:namespace] + end + + def default(attr) + attribute_settings(attr)[:default] + end + + def range(attr) + @model_settings[:range][attr] + end + + def attribute_settings(attr) + @model_settings[:attributes][attr] + end + + def required?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:existence) + end + + def unique?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:unique) + end + + def datatype(attr) + enforced = attribute_settings(attr)[:enforce].dup + return :string if enforced.nil? + + enforced.delete(:list) + enforced.delete(:no_list) + + enforced.find { |e| Goo::Validators::DataType.ids.include?(e) } || :string + end + + def list?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:list) + end + + def index_attribute?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:index] + end + + def transitive?(attr) + return false unless @model_settings[:attributes].include?(attr) + attribute_settings(attr)[:transitive] == true + end + + def alias?(attr) + return false unless @model_settings[:attributes].include?(attr) + attribute_settings(attr)[:alias] == true + end + + def handler?(attr) + return false if attribute_settings(attr).nil? + !attribute_settings(attr)[:handler].nil? + end + + def handler(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:handler] + end + + def inverse?(attr) + return false if attribute_settings(attr).nil? + !attribute_settings(attr)[:inverse].nil? + end + + def inverse_opts(attr) + attribute_settings(attr)[:inverse] + end + + def attribute_uri(attr, *args) + if attr == :id + raise ArgumentError, ":id cannot be treated as predicate for .where, use find " + end + uri = @attribute_uris[attr] + if uri.is_a?(Proc) + uri = uri.call(*args.flatten) + end + return uri unless uri.nil? + attr_string = attr.to_s + Goo.namespaces.keys.each do |ns| + nss = ns.to_s + if attr_string.start_with?(nss) + return Goo.vocabulary(ns)[attr_string[nss.length + 1..-1]] + end + end + + Goo.vocabulary(nil)[attr] + end + + private + + def set_no_list_by_default(options) + if options[:enforce].nil? or !options[:enforce].include?(:list) + options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] + end + end + + def set_data_type(options) + if options[:type] + options[:enforce] += Array(options[:type]) + options[:enforce].uniq! + options.delete :type + end + end + end + end + end +end diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index a7008087..adebeed6 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -1,5 +1,6 @@ require 'active_support/core_ext/string' require_relative 'yaml_settings' +require_relative 'attribute' module Goo module Base @@ -12,11 +13,13 @@ module ClassMethods attr_accessor :model_settings attr_reader :model_name attr_reader :attribute_uris + attr_reader :namespace include YAMLScheme + include AttributeSettings def default_model_options - {} + {name_with: lambda {|x| uuid_uri_generator(x)}} end def model(*args) @@ -28,12 +31,8 @@ def model(*args) model_name = args[0] @model_name = model_name.to_sym - #a hash with options is expected + # a hash with options is expected options = args.last - @inmutable = (args.include? :inmutable) - if @inmutable - @inm_instances = nil - end @model_settings = default_model_options.merge(options || {}) @@ -42,7 +41,8 @@ def model(*args) unless options.include? :name_with raise ArgumentError, "The model `#{model_name}` definition should include the :name_with option" end - Goo.add_model(@model_name,self) + + Goo.add_model(@model_name, self) @attribute_uris = {} @namespace = Goo.vocabulary(@model_settings[:namespace]) @uri_type = @namespace[@model_name.to_s.camelize] @@ -50,8 +50,8 @@ def model(*args) @model_settings[:attributes] = {} @model_settings[:rdf_type] = options[:rdf_type] - #registering a new models forces to redo ranges - Goo.models.each do |k,m| + # registering a new models forces to redo ranges + Goo.models.each do |k, m| m.attributes(:all).each do |attr| next if m.range(attr) m.set_range(attr) @@ -59,247 +59,29 @@ def model(*args) end end - def attributes(*options) - if options and options.length > 0 - filt = options.first - if filt == :all - return @model_settings[:attributes].keys - end - if filt == :inverse - return @model_settings[:attributes].keys. - select{ |k| @model_settings[:attributes][k][:inverse] } - end - atts = (@model_settings[:attributes]. - select{ |attr,opts| opts[:enforce].include?(filt) }).keys() - atts.concat(attributes(:inverse)) if filt == :list - return atts - end - return @model_settings[:attributes].keys. - select{ |k| @model_settings[:attributes][k][:inverse].nil? }. - select{ |k| !handler?(k) } - end - - def inmutable? - return @inmutable - end - - def collection?(attr) - return @model_settings[:collection] == attr - end - - def collection_opts - return @model_settings[:collection] - end - - def attributes_with_defaults - return (@model_settings[:attributes]. - select{ |attr,opts| opts[:default] }).keys() - end - - def attributes_with_update_callbacks - (@model_settings[:attributes]. - select{ |attr,opts| opts[:onUpdate] }).keys - end - - - def update_callbacks(attr) - @model_settings[:attributes][attr][:onUpdate] - end - - def default(attr) - return @model_settings[:attributes][attr][:default] - end - - def attribute_namespace(attr) - return @model_settings[:attributes][attr][:namespace] - end - - def range(attr) - @model_settings[:range][attr] - end - - def attribute_settings(attr) - @model_settings[:attributes][attr] - end - - def cardinality(attr) - return nil if @model_settings[:attributes][attr].nil? - cardinality = {} - enforce = @model_settings[:attributes][attr][:enforce] - min = enforce.map {|e| e.to_s.split("_").last.to_i if e.to_s.start_with?("min_") }.compact - max = enforce.map {|e| e.to_s.split("_").last.to_i if e.to_s.start_with?("max_") }.compact - cardinality[:min] = min.first unless min.empty? - cardinality[:max] = max.first unless max.empty? - cardinality.empty? ? nil : cardinality - end - - def required?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:existence) - end - - def unique?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:unique) - end - - def list?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:list) - end - - def transitive?(attr) - return false if !@model_settings[:attributes].include?(attr) - return (@model_settings[:attributes][attr][:transitive] == true) - end - - def alias?(attr) - return false if !@model_settings[:attributes].include?(attr) - return (@model_settings[:attributes][attr][:alias] == true) - end - - def handler?(attr) - return false if @model_settings[:attributes][attr].nil? - return (!@model_settings[:attributes][attr][:handler].nil?) - end - - def handler(attr) - return false if @model_settings[:attributes][attr].nil? - return @model_settings[:attributes][attr][:handler] - end - - def inverse?(attr) - return false if @model_settings[:attributes][attr].nil? - return (!@model_settings[:attributes][attr][:inverse].nil?) - end - - def inverse_opts(attr) - return @model_settings[:attributes][attr][:inverse] - end - def set_range(attr) - @model_settings[:attributes][attr][:enforce].each do |opt| + attribute_settings(attr)[:enforce].each do |opt| if Goo.models.include?(opt) || opt.respond_to?(:model_name) || (opt.respond_to?(:new) && opt.new.kind_of?(Struct)) opt = Goo.models[opt] if opt.instance_of?(Symbol) - @model_settings[:range][attr]=opt + @model_settings[:range][attr] = opt break end end - if @model_settings[:attributes][attr][:inverse] - on = @model_settings[:attributes][attr][:inverse][:on] + if attribute_settings(attr)[:inverse] + on = attribute_settings(attr)[:inverse][:on] if Goo.models.include?(on) || on.respond_to?(:model_name) on = Goo.models[on] if on.instance_of?(Symbol) - @model_settings[:range][attr]=on + @model_settings[:range][attr] = on end end end - def attribute(*args) - options = args.reverse - attr_name = options.pop - attr_name = attr_name.to_sym - options = options.pop - options = {} if options.nil? - - options[:enforce] ||= [] - - set_data_type(options) - set_no_list_by_default(options) - - @model_settings[:attributes][attr_name] = options - load_yaml_scheme_options(attr_name) - shape_attribute(attr_name) - namespace = attribute_namespace(attr_name) - namespace = namespace || @model_settings[:namespace] - vocab = Goo.vocabulary(namespace) #returns default for nil input - if options[:property].is_a?(Proc) - @attribute_uris[attr_name] = options[:property] - else - @attribute_uris[attr_name] = vocab[options[:property] || attr_name] - end - if options[:enforce].include?(:unique) and options[:enforce].include?(:list) - raise ArgumentError, ":list options cannot be combined with :list" - end - set_range(attr_name) - end - - def attribute_uri(attr,*args) - if attr == :id - raise ArgumentError, ":id cannot be treated as predicate for .where, use find " - end - uri = @attribute_uris[attr] - if uri.is_a?(Proc) - uri = uri.call(*args.flatten) - end - return uri unless uri.nil? - attr_string = attr.to_s - Goo.namespaces.keys.each do |ns| - nss = ns.to_s - if attr_string.start_with?(nss) - return Goo.vocabulary(ns)[attr_string[nss.length+1..-1]] - end - end - #default - return Goo.vocabulary(nil)[attr] + def collection?(attr) + @model_settings[:collection] == attr end - def shape_attribute(attr) - return if attr == :resource_id - attr = attr.to_sym - define_method("#{attr}=") do |*args| - if self.class.handler?(attr) - raise ArgumentError, "Method based attributes cannot be set" - end - if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) - raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." - end - @loaded_attributes.add(attr) - value = args[0] - unless args.last.instance_of?(Hash) and args.last[:on_load] - if self.persistent? and self.class.name_with == attr - raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." - end - prev = self.instance_variable_get("@#{attr}") - if !prev.nil? and !@modified_attributes.include?(attr) - if prev != value - @previous_values = @previous_values || {} - @previous_values[attr] = prev - end - end - @modified_attributes.add(attr) - end - if value.instance_of?(Array) - value = value.dup.freeze - end - self.instance_variable_set("@#{attr}",value) - end - define_method("#{attr}") do |*args| - attr_value = self.instance_variable_get("@#{attr}") - - if self.class.not_show_all_languages?(attr_value, args) - is_array = attr_value.values.first.is_a?(Array) - attr_value = attr_value.values.flatten - attr_value = attr_value.first unless is_array - end - - - if self.class.handler?(attr) - if @loaded_attributes.include?(attr) - return attr_value - end - value = self.send("#{self.class.handler(attr)}") - self.instance_variable_set("@#{attr}",value) - @loaded_attributes << attr - return value - end - - if (not @persistent) or @loaded_attributes.include?(attr) - return attr_value - else - # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) - raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." - end - end + def collection_opts + @model_settings[:collection] end def uuid_uri_generator(inst) @@ -308,19 +90,14 @@ def uuid_uri_generator(inst) if Goo.id_prefix return RDF::URI.new(Goo.id_prefix + model_name_uri + '/' + Goo.uuid) end - return namespace[ model_name_uri + '/' + Goo.uuid] + namespace[model_name_uri + '/' + Goo.uuid] end def uri_type(*args) - if @model_settings[:rdf_type] - return @model_settings[:rdf_type].call(*args) - end - return @uri_type + @model_settings[:rdf_type] ? @model_settings[:rdf_type].call(*args) : @uri_type end + alias :type_uri :uri_type - def namespace - return @namespace - end def id_prefix model_name_uri = model_name.to_s @@ -328,16 +105,15 @@ def id_prefix if Goo.id_prefix return RDF::URI.new(Goo.id_prefix + model_name_uri + '/') end - return namespace[model_name_uri + '/'] + namespace[model_name_uri + '/'] end - def id_from_unique_attribute(attr,value_attr) + def id_from_unique_attribute(attr, value_attr) if value_attr.nil? raise Goo::Base::IDGenerationError, "`#{attr}` value is nil. Id for resource cannot be generated." end uri_last_fragment = CGI.escape(value_attr) - model_prefix_uri = id_prefix() - return model_prefix_uri + uri_last_fragment + id_prefix + uri_last_fragment end def enum(*values) @@ -348,25 +124,11 @@ def enum(*values) end def name_with - return @model_settings[:name_with] - end - - def load_inmutable_instances - #TODO this should be SYNC - @inm_instances = nil - ins = self.where.include(self.attributes).all - @inm_instances = {} - ins.each do |ins| - @inm_instances[ins.id] = ins - end + @model_settings[:name_with] end def attribute_loaded?(attr) - return @loaded_attributes.include?(attr) - end - - def inm_instances - @inm_instances + @loaded_attributes.include?(attr) end def struct_object(attrs) @@ -377,12 +139,12 @@ def struct_object(attrs) attrs << :unmapped attrs << collection_opts if collection_opts attrs.uniq! - return Struct.new(*attrs) + Struct.new(*attrs) end STRUCT_CACHE = {} ## - # Return a struct-based, + # Return a struct-based, # read-only instance for a class that is populated with the contents of `attributes` def read_only(attributes) if !attributes.is_a?(Hash) || attributes.empty? @@ -396,11 +158,10 @@ def read_only(attributes) cls = STRUCT_CACHE[attributes.keys.hash] instance = cls.new instance.klass = self - attributes.each {|k,v| instance[k] = v} + attributes.each { |k, v| instance[k] = v } instance end - def show_all_languages?(args) args.first.is_a?(Hash) && args.first.keys.include?(:include_languages) && args.first[:include_languages] end @@ -409,20 +170,16 @@ def not_show_all_languages?(values, args) values.is_a?(Hash) && !show_all_languages?(args) end - private - - def set_no_list_by_default(options) - if options[:enforce].nil? or !options[:enforce].include?(:list) - options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] - end + def attributes_with_callbacks + (@model_settings[:attributes]. + select{ |attr,opts| opts[:onUpdate] }).keys end - def set_data_type(options) - if options[:type] - options[:enforce] += Array(options[:type]) - options[:enforce].uniq! - options.delete :type - end + + + def update_callbacks(attr) + @model_settings[:attributes][attr][:onUpdate] end + end end end diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 7aaad6ce..d4668e4c 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -169,11 +169,6 @@ def index_as(index_key,max=nil) end def all - if @result.nil? && @klass.inmutable? && @klass.inm_instances - if @pattern.nil? && @filters.nil? - @result = @klass.inm_instances.values - end - end process_query unless @result @result end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index d849be3a..18ad4f06 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -152,7 +152,7 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? if objects_new.include?(object) object = objects_new[object] - elsif !range_for_v.inmutable? + else pre_val = get_preload_value(id, object, predicate) object, objects_new = if !@read_only preloaded_or_new_object(object, objects_new, pre_val, predicate) @@ -160,8 +160,6 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) # depedent read only preloaded_or_new_struct(object, objects_new, pre_val, predicate) end - else - object = range_for_v.find(object).first end end @@ -383,11 +381,9 @@ def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new if range_for_v if objects_new.include?(object) object = objects_new[object] - elsif !range_for_v.inmutable? + else pre_val = get_pre_val(id, models_by_id, object, v) object = get_object_from_range(pre_val, embed_struct, object, objects_new, v) - else - object = range_for_v.find(object).first end end end diff --git a/test/app/bioportal.rb b/test/app/bioportal.rb index 2b3c57b3..8c99df03 100644 --- a/test/app/bioportal.rb +++ b/test/app/bioportal.rb @@ -5,90 +5,91 @@ require_relative './query_profiler' module Test - module BioPortal + module BioPortal class Ontology < Goo::Base::Resource model :ontology, namespace: :bioportal, name_with: :acronym attribute :acronym, namespace: :omv, enforce: [:existence, :unique] attribute :name, namespace: :omv, enforce: [:existence] attribute :administeredBy, enforce: [:user, :existence] - end + end class User < Goo::Base::Resource model :user, name_with: :username attribute :username, enforce: [:existence, :unique] attribute :email, enforce: [:existence, :email] attribute :roles, enforce: [:list, :role, :existence] - attribute :created, enforce: [ DateTime ], + attribute :created, enforce: [DateTime], default: lambda { |record| DateTime.now } - attribute :notes, inverse: { on: :note, attribute: :owner} - end + attribute :notes, inverse: { on: :note, attribute: :owner } + end class Role < Goo::Base::Resource - model :role, :inmutable, name_with: :code + model :role, name_with: :code attribute :code, enforce: [:existence, :unique] attribute :users, inverse: { on: :user, attribute: :roles } end class Note < Goo::Base::Resource - model :note, name_with: lambda { |s| id_generator(s) } + model :note, name_with: lambda { |s| id_generator(s) } attribute :content, enforce: [:existence] attribute :ontology, enforce: [:existence, :ontology] attribute :owner, enforce: [:existence, :user] + def self.id_generator(inst) - return RDF::URI.new("http://example.org/note/" + inst.owner.username + "/" + Random.rand(1000000).to_s ) + return RDF::URI.new("http://example.org/note/" + inst.owner.username + "/" + Random.rand(1000000).to_s) end end def self.benchmark_data - Goo.sparql_query_client.reset_profiling - if false - 10.times do |i| - Role.new(code: "role#{i}").save - end - puts "Roles created" - 900.times do |i| - roles = [] - 2.times do |j| - roles << Role.find("role#{j}").first + Goo.sparql_query_client.reset_profiling + if false + 10.times do |i| + Role.new(code: "role#{i}").save + end + puts "Roles created" + 900.times do |i| + roles = [] + 2.times do |j| + roles << Role.find("role#{j}").first + end + u = User.new(username: "user#{i}name", email: "email#{i}@example.org", roles: roles) + u.save + puts "#{i} users created" + end + 400.times do |i| + ont = Ontology.new(acronym: "ontology #{i}", name: "ontology ontology ontology #{i}") + ont.administeredBy = User.find("user#{i % 75}name").first + ont.save + end + binding.pry + 1000.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(200)}").all.first + owner = User.where(username: "user#{i % 300}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" + end + binding.pry + 2000.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(15)}").all.first + owner = User.where(username: "user#{i % 200}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" + end + binding.pry + 800.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(6)}").all.first + owner = User.where(username: "user#{i % 200}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" end - u = User.new(username: "user#{i}name", email: "email#{i}@example.org", roles: roles) - u.save - puts "#{i} users created" - end - 400.times do |i| - ont = Ontology.new(acronym: "ontology #{i}",name: "ontology ontology ontology #{i}") - ont.administeredBy = User.find("user#{i % 75}name").first - ont.save - end - binding.pry - 1000.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(200)}").all.first - owner = User.where(username: "user#{i % 300}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" - end - binding.pry - 2000.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(15)}").all.first - owner = User.where(username: "user#{i % 200}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" - end - binding.pry - 800.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(6)}").all.first - owner = User.where(username: "user#{i % 200}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" end - end 500.times do |i| ont_id = 0 begin - ont_id = Random.rand(5)+180 + ont_id = Random.rand(5) + 180 end ont = Ontology.where(acronym: "ontology #{ont_id}").all.first owner = User.where(username: "user#{i % 200}name").include(:username).all.first @@ -98,12 +99,12 @@ def self.benchmark_data end end - def self.benchmark_naive_query + def self.benchmark_naive_query Goo.sparql_query_client.reset_profiling ont = Ontology.where.include(:acronym).all bench_result = [] ont.each do |ont| - qq =< . ?id ?username . @@ -128,12 +129,12 @@ def self.benchmark_naive_query users = {} roles = {} count_sol = 0 - res = client.query(qq) + res = client.query(qq) res.each do |sol| unless users.include?(sol[:id]) users[sol[:id]] = User.new - users[sol[:id]].username=sol[:username] - users[sol[:id]].email=sol[:email] + users[sol[:id]].username = sol[:username] + users[sol[:id]].email = sol[:email] end unless roles.include?(sol[:roles]) roles[sol[:roles]] = Role.new @@ -145,14 +146,14 @@ def self.benchmark_naive_query end count_sol = count_sol + 1 end - bench_result << [Time.now - start,notes.length, client.query_times.last, client.parse_times.last,count_sol ] + bench_result << [Time.now - start, notes.length, client.query_times.last, client.parse_times.last, count_sol] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_naive.csv", "wb") do |csv| - csv << ["total", "notes", "qt", "pt","sol"] + csv << ["total", "notes", "qt", "pt", "sol"] bench_result.each do |b| - csv << b + csv << b end end end @@ -162,7 +163,7 @@ def self.benchmark_naive_fast ont = Ontology.where.include(:acronym).all bench_result = [] ont.each do |ont| - qq =< . ?note ?id . @@ -183,11 +184,11 @@ def self.benchmark_naive_fast users = {} roles = {} count_sol = 0 - res = client.query(qq) + res = client.query(qq) res.each do |sol| unless users.include?(sol[:id]) users[sol[:id]] = User.new - users[sol[:id]].username=sol[:username] + users[sol[:id]].username = sol[:username] end unless roles.include?(sol[:roles]) roles[sol[:roles]] = Role.new @@ -199,14 +200,14 @@ def self.benchmark_naive_fast end count_sol = count_sol + 1 end - bench_result << [Time.now - start,notes.length, client.query_times.last, client.parse_times.last,count_sol ] + bench_result << [Time.now - start, notes.length, client.query_times.last, client.parse_times.last, count_sol] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_naive_fast.csv", "wb") do |csv| - csv << ["total", "notes", "qt", "pt","sol"] + csv << ["total", "notes", "qt", "pt", "sol"] bench_result.each do |b| - csv << b + csv << b end end end @@ -221,20 +222,20 @@ def self.benchmark_query_goo_fast start = Time.now notes = nil notes = Note.where(ontology: ont) - .include(:content) - .include(:owner) - .all + .include(:content) + .include(:owner) + .all num_queries = client.query_times.length - agg_parsing = client.parse_times.inject{|sum,x| sum + x } - agg_queries = client.query_times.inject{|sum,x| sum + x } - bench_result << [Time.now - start, notes.length,agg_queries,agg_parsing,num_queries ] + agg_parsing = client.parse_times.inject { |sum, x| sum + x } + agg_queries = client.query_times.inject { |sum, x| sum + x } + bench_result << [Time.now - start, notes.length, agg_queries, agg_parsing, num_queries] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_goo_fast.csv", "wb") do |csv| csv << ["total", "notes", "agg_qt", "agg_qp", "queries"] bench_result.each do |b| - csv << b + csv << b end end end @@ -244,27 +245,26 @@ def self.benchmark_query_goo client.reset_profiling ont = Ontology.where.include(:acronym).all bench_result = [] - Role.load_inmutable_instances ont.each do |ont| client.reset_profiling start = Time.now notes = nil notes = Note.where(ontology: ont) - .include(:content) - .include(owner: [ :username, :email, roles: [:code]]) - .read_only - .all + .include(:content) + .include(owner: [:username, :email, roles: [:code]]) + .read_only + .all num_queries = client.query_times.length - agg_parsing = client.parse_times.inject{|sum,x| sum + x } - agg_queries = client.query_times.inject{|sum,x| sum + x } - bench_result << [Time.now - start, notes.length,agg_queries,agg_parsing,num_queries ] + agg_parsing = client.parse_times.inject { |sum, x| sum + x } + agg_queries = client.query_times.inject { |sum, x| sum + x } + bench_result << [Time.now - start, notes.length, agg_queries, agg_parsing, num_queries] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_goo.csv", "wb") do |csv| csv << ["total", "notes", "agg_qt", "agg_qp", "queries"] bench_result.each do |b| - csv << b + csv << b end end end diff --git a/test/test_inmutable.rb b/test/test_inmutable.rb deleted file mode 100644 index 0b1a8c2a..00000000 --- a/test/test_inmutable.rb +++ /dev/null @@ -1,100 +0,0 @@ -require_relative 'test_case' - -module TestInmutable - class Status < Goo::Base::Resource - model :status, :inmutable, name_with: :code - attribute :code, enforce: [:unique, :existence] - attribute :description, enforce: [:existence] - end - - class Person < Goo::Base::Resource - model :person, :inmutable, name_with: :name - attribute :name, enforce: [:unique, :existence] - attribute :status, enforce: [:status, :existence] - end - - class TestInmutableCase < MiniTest::Unit::TestCase - def initialize(*args) - super(*args) - end - - def setup - end - - def self.before_suite - status = ["single", "married", "divorced", "widowed"] - status.each do |st| - stt = Status.new(code: st, description: (st + " some desc")) - stt.save - end - people = [ - ["Susan","married"], - ["Lee","divorced"], - ["John","divorced"], - ["Peter","married"], - ["Christine","married"], - ["Ana","single"], - ] - people.each do |p| - po = Person.new - po.name = p[0] - po.status = Status.find(p[1]).first - po.save - end - end - - def self.after_suite - objs = [Person,Status] - objs.each do |obj| - obj.where.all.each do |st| - st.delete - end - end - end - - ## TODO inmutable are deprecated - they might come back in a different way" - def skip_test_inmutable - #they come fully loaded - Status.load_inmutable_instances - status1 = Status.where.all.sort_by { |s| s.code } - status2 = Status.where.all.sort_by { |s| s.code } - assert status1.length == 4 - assert status2.length == 4 - #same referencs - status1.each_index do |i| - assert status1[i].object_id==status2[i].object_id - end - - #create a new object - stt = Status.new(code: "xx", description: ("xx" + " some desc")) - stt.save - - status1 = Status.where.all.sort_by { |s| s.code } - status2 = Status.where.all.sort_by { |s| s.code } - assert status1.length == 5 - assert status2.length == 5 - #same referencs - status1.each_index do |i| - assert status1[i].object_id==status2[i].object_id - end - - status1.each do |st| - assert st.code - assert st.description - end - - marr = Status.find("divorced").first - assert marr.code == "divorced" - assert marr.description - assert marr.object_id == status1.first.object_id - - people = Person.where.include(:name, status: [ :code, :description ]).all - people.each do |p| - assert p.status.object_id == status1.select { |st| st.id == p.status.id }.first.object_id - assert p.status.code - assert p.status.description - end - end - - end -end From f88b50a1e2728a9b513c04c4f9e342eec1cffec7 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 31 Jan 2024 20:24:22 +0100 Subject: [PATCH 055/106] Feature: Add after_save and after_destroy hooks to models (#53) * remove old file no more used * extract attribute settings module from the model settings module * remove the inmutable feature as deprecated and not used * rename callbacks method names * add hooks module --- lib/goo/base/resource.rb | 8 ++++ lib/goo/base/settings/hooks.rb | 62 +++++++++++++++++++++++++++++++ lib/goo/base/settings/settings.rb | 16 ++------ lib/goo/utils/callbacks_utils.rb | 22 +++++++++++ lib/goo/validators/enforce.rb | 8 ++-- test/settings/test_hooks.rb | 50 +++++++++++++++++++++++++ test/test_dsl_settings.rb | 2 +- 7 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 lib/goo/base/settings/hooks.rb create mode 100644 lib/goo/utils/callbacks_utils.rb create mode 100644 test/settings/test_hooks.rb diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 72e6aaf6..bd4ff741 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -169,6 +169,10 @@ def delete(*args) @persistent = false @modified = true + if self.class.after_destroy? + self.class.call_after_destroy(self) + end + return nil end @@ -303,6 +307,10 @@ def save(*opts) @modified_attributes = Set.new @persistent = true + if self.class.after_save? + self.class.call_after_save(self) + end + return self end diff --git a/lib/goo/base/settings/hooks.rb b/lib/goo/base/settings/hooks.rb new file mode 100644 index 00000000..7925b2a0 --- /dev/null +++ b/lib/goo/base/settings/hooks.rb @@ -0,0 +1,62 @@ +require 'yaml' +require_relative '../../utils/callbacks_utils' + +module Goo + module Base + module Settings + module Hooks + + include CallbackRunner + + def after_save(*methods) + @model_settings[:after_save] ||= [] + @model_settings[:after_save].push(*methods) + end + + def after_destroy(*methods) + @model_settings[:after_destroy] ||= [] + @model_settings[:after_destroy].push(*methods) + end + + def after_save_callbacks + Array(@model_settings[:after_save]) + end + + def after_destroy_callbacks + Array(@model_settings[:after_destroy]) + end + + def after_save? + !after_save_callbacks.empty? + end + + def after_destroy? + !after_destroy_callbacks.empty? + end + + def call_after_save(inst) + run_callbacks(inst, after_save_callbacks) + end + + def call_after_destroy(inst) + run_callbacks(inst, after_destroy_callbacks) + end + + def attributes_with_callbacks + (@model_settings[:attributes]. + select{ |attr,opts| opts[:onUpdate] }).keys + end + + + def attribute_callbacks(attr) + @model_settings[:attributes][attr][:onUpdate] + end + + end + end + end +end + + + + diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index adebeed6..bf2c38da 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -1,5 +1,6 @@ require 'active_support/core_ext/string' require_relative 'yaml_settings' +require_relative 'hooks' require_relative 'attribute' module Goo @@ -15,8 +16,7 @@ module ClassMethods attr_reader :attribute_uris attr_reader :namespace - include YAMLScheme - include AttributeSettings + include YAMLScheme ,AttributeSettings, Hooks def default_model_options {name_with: lambda {|x| uuid_uri_generator(x)}} @@ -169,17 +169,7 @@ def show_all_languages?(args) def not_show_all_languages?(values, args) values.is_a?(Hash) && !show_all_languages?(args) end - - def attributes_with_callbacks - (@model_settings[:attributes]. - select{ |attr,opts| opts[:onUpdate] }).keys - end - - - def update_callbacks(attr) - @model_settings[:attributes][attr][:onUpdate] - end - + end end end diff --git a/lib/goo/utils/callbacks_utils.rb b/lib/goo/utils/callbacks_utils.rb new file mode 100644 index 00000000..b9e747ff --- /dev/null +++ b/lib/goo/utils/callbacks_utils.rb @@ -0,0 +1,22 @@ +module CallbackRunner + + def run_callbacks(inst, callbacks) + callbacks.each do |proc| + if instance_proc?(inst, proc) + call_proc(inst.method(proc)) + elsif proc.is_a?(Proc) + call_proc(proc) + end + end + end + + def instance_proc?(inst, opt) + opt && (opt.is_a?(Symbol) || opt.is_a?(String)) && inst.respond_to?(opt) + end + + def call_proc(proc) + proc.call + end + + +end \ No newline at end of file diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index d6f3816d..5c157fb1 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -1,9 +1,11 @@ +require_relative '../utils/callbacks_utils' module Goo module Validators module Enforce class EnforceInstance + include CallbackRunner attr_reader :errors_by_opt def initialize @errors_by_opt = {} @@ -67,7 +69,7 @@ def enforce(inst,attr,value) end def enforce_callback(inst, attr) - callbacks = Array(inst.class.update_callbacks(attr)) + callbacks = Array(inst.class.attribute_callbacks(attr)) callbacks.each do |proc| if instance_proc?(inst, proc) call_proc(inst.method(proc), inst, attr) @@ -87,10 +89,6 @@ def object_type?(opt) opt.respond_to?(:shape_attribute) ? opt : Goo.model_by_name(opt) end - def instance_proc?(inst, opt) - opt && (opt.is_a?(Symbol) || opt.is_a?(String)) && inst.respond_to?(opt) - end - def check_object_type(inst, attr, value, opt) model_range = object_type(opt) if model_range && !value.nil? diff --git a/test/settings/test_hooks.rb b/test/settings/test_hooks.rb new file mode 100644 index 00000000..47d8fa0f --- /dev/null +++ b/test/settings/test_hooks.rb @@ -0,0 +1,50 @@ +require_relative '../test_case' + +class TestHookModel < Goo::Base::Resource + model :test_hook, name_with: lambda { |s| RDF::URI.new("http://example.org/test/#{rand(1000)}") } + after_save :update_count, :update_count_2 + after_destroy :decrease_count_2 + attribute :name, enforce: [:existence, :unique] + + attr_reader :count, :count2 + + def update_count + @count ||= 0 + @count += 1 + end + + def update_count_2 + @count2 ||= 0 + @count2 += 2 + end + + def decrease_count_2 + @count2 -= 2 + end + +end + +class TestHooksSetting < MiniTest::Unit::TestCase + + def test_model_hooks + TestHookModel.find("test").first&.delete + + model = TestHookModel.new(name: "test").save + + assert_equal 1, model.count + assert_equal 2, model.count2 + + model.name = "test2" + model.save + + assert_equal 2, model.count + assert_equal 4, model.count2 + + + model.delete + + assert_equal 2, model.count + assert_equal 2, model.count2 + + end +end diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index 52b0ac78..3b8f493b 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -63,7 +63,7 @@ class YamlSchemeModelTest < Goo::Base::Resource end -class TestDSLSeeting < MiniTest::Unit::TestCase +class TestDSLSetting < MiniTest::Unit::TestCase def initialize(*args) super(*args) end From 85d70605af646900d31cd79bc6da9a32913d5a94 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Tue, 20 Feb 2024 10:43:36 +0100 Subject: [PATCH 056/106] Feature: update rdf gem to latest version (#56) * un pin rdf version, to use the latest and add rdf vocab and xml * update URI class monkey patch because Addressable does no more exist * RDF::SKOS is replaced with RDF::Vocab::SKOS in the latest version of RDF * pin rdf version to 3.2.11 the latest version that support ruby 2.7 * monkey path Literal::DateTime format to be supported by 4store * remove addressable dependency --- Gemfile | 1 + Gemfile.lock | 65 ++++++++++++++++----------- goo.gemspec | 6 ++- lib/goo.rb | 2 + lib/goo/mixins/sparql_client.rb | 79 +++++++++------------------------ lib/goo/sparql/loader.rb | 5 ++- test/test_chunks_write.rb | 2 +- test/test_model_complex.rb | 4 +- 8 files changed, 72 insertions(+), 92 deletions(-) diff --git a/Gemfile b/Gemfile index 49dd2b38..af13989b 100644 --- a/Gemfile +++ b/Gemfile @@ -23,3 +23,4 @@ group :profiling do end gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' +gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index f0c24969..c940c3b7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -12,9 +12,11 @@ PATH remote: . specs: goo (0.0.2) - addressable (~> 2.8) pry - rdf (= 1.0.8) + rdf (= 3.2.11) + rdf-raptor + rdf-rdfxml + rdf-vocab redis rest-client rsolr @@ -30,36 +32,36 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.5) - public_suffix (>= 2.0.2, < 6.0) - base64 (0.1.1) + base64 (0.2.0) builder (3.2.4) coderay (1.1.3) - concurrent-ruby (1.2.2) + concurrent-ruby (1.2.3) connection_pool (2.4.1) cube-ruby (0.0.3) daemons (1.4.1) docile (1.4.0) - domain_name (0.5.20190701) - unf (>= 0.0.5, < 1.0.0) + domain_name (0.6.20240107) eventmachine (1.2.7) faraday (2.7.11) base64 faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) faraday-net_http (3.0.2) + ffi (1.16.3) + htmlentities (4.3.4) http-accept (1.7.0) http-cookie (1.0.5) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json_pure (2.6.3) + json_pure (2.7.1) + link_header (0.0.8) macaddr (1.7.2) systemu (~> 2.6.5) method_source (1.0.0) - mime-types (3.5.1) + mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2023.1003) + mime-types-data (3.2024.0206) minitest (4.7.5) multi_json (1.15.0) mustermann (3.0.0) @@ -69,22 +71,35 @@ GEM pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.3) rack (2.2.8) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) activesupport (>= 2.3) - rack-protection (3.1.0) + rack-protection (3.2.0) + base64 (>= 0.1.0) rack (~> 2.2, >= 2.2.4) - rake (13.0.6) - rdf (1.0.8) - addressable (>= 2.2) - redis (5.0.7) - redis-client (>= 0.9.0) - redis-client (0.17.0) + rake (13.1.0) + rdf (3.2.11) + link_header (~> 0.0, >= 0.0.8) + rdf-raptor (3.2.0) + ffi (~> 1.15) + rdf (~> 3.2) + rdf-rdfxml (3.2.2) + builder (~> 3.2) + htmlentities (~> 4.3) + rdf (~> 3.2) + rdf-xsd (~> 3.2) + rdf-vocab (3.2.7) + rdf (~> 3.2, >= 3.2.4) + rdf-xsd (3.2.1) + rdf (~> 3.2) + rexml (~> 3.2) + redis (5.1.0) + redis-client (>= 0.17.0) + redis-client (0.20.0) connection_pool - request_store (1.5.1) + request_store (1.6.0) rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) @@ -105,10 +120,10 @@ GEM simplecov (~> 0.19) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - sinatra (3.1.0) + sinatra (3.2.0) mustermann (~> 3.0) rack (~> 2.2, >= 2.2.4) - rack-protection (= 3.1.0) + rack-protection (= 3.2.0) tilt (~> 2.0) systemu (2.6.5) thin (1.8.2) @@ -118,9 +133,6 @@ GEM thread_safe (0.3.6) tilt (2.3.0) tzinfo (0.3.62) - unf (0.1.4) - unf_ext - unf_ext (0.0.8.2) uuid (2.3.9) macaddr (~> 1.0) @@ -132,6 +144,7 @@ PLATFORMS DEPENDENCIES activesupport cube-ruby + faraday (= 2.7.11) goo! minitest (< 5.0) pry @@ -147,4 +160,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.3.15 + 2.4.21 diff --git a/goo.gemspec b/goo.gemspec index c3386799..f07be8ef 100644 --- a/goo.gemspec +++ b/goo.gemspec @@ -6,9 +6,11 @@ Gem::Specification.new do |s| s.email = "manuelso@stanford.edu" s.files = Dir["lib/**/*.rb"] s.homepage = "http://github.com/ncbo/goo" - s.add_dependency("addressable", "~> 2.8") s.add_dependency("pry") - s.add_dependency("rdf", "= 1.0.8") + s.add_dependency("rdf", "3.2.11") #unpin when we support only Ruby >= 3.0 + s.add_dependency("rdf-vocab") + s.add_dependency("rdf-rdfxml") + s.add_dependency("rdf-raptor") s.add_dependency("redis") s.add_dependency("rest-client") s.add_dependency("rsolr") diff --git a/lib/goo.rb b/lib/goo.rb index a63722d6..283ab5b4 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -1,6 +1,8 @@ require "pry" require "rdf" +require "rdf/vocab" require "rdf/ntriples" +require "rdf/rdfxml" require "sparql/client" require "set" diff --git a/lib/goo/mixins/sparql_client.rb b/lib/goo/mixins/sparql_client.rb index fcfb7888..dc2b0728 100644 --- a/lib/goo/mixins/sparql_client.rb +++ b/lib/goo/mixins/sparql_client.rb @@ -6,7 +6,24 @@ def to_uri module RDF def self.URI(*args, &block) - return args.first + return RDF::URI.new(*args) + end + + class URI + # Delegate any undefined method calls to the String object + def method_missing(method, *args, &block) + if self.to_s.respond_to?(method) + self.to_s.send(method, *args, &block) + else + super + end + end + + # Ensure respond_to? reflects the delegated methods + def respond_to_missing?(method, include_private = false) + self.to_s.respond_to?(method) || super + end + end class Writer @@ -31,66 +48,10 @@ def to_base end end - class URI - def initialize(uri_or_options) - case uri_or_options - when Hash - @uri = Addressable::URI.new(uri_or_options) - when Addressable::URI - @uri = uri_or_options - else - @uri = uri_or_options.to_s - #@uri = Addressable::URI.parse(uri_or_options.to_s) - end - rescue Addressable::URI::InvalidURIError => e - raise ArgumentError, e.message - end - - def method_missing(symbol, *args, &block) - unless @uri.respond_to?(symbol) - if (Addressable::URI.instance_methods.include?(symbol) && @uri.instance_of?(String)) - @uri = Addressable::URI.parse(@uri) - end - end - if @uri.respond_to?(symbol) - case result = @uri.send(symbol, *args, &block) - when Addressable::URI - self.class.new(result) - else result - end - else - super - end - end - - def last_part - f = fragment - return f if f - return to_s.split("/")[-1] - end - - def respond_to?(symbol,include_private = false) - @uri.respond_to?(symbol,include_private=false) || super - end - - def hash - @uri.to_s.hash - end - - end #end URI class Literal - @@subclasses_by_uri = {} - def self.datatyped_class(uri) - return nil if uri.nil? - if @@subclasses.length != (@@subclasses_by_uri.length + 1) - @@subclasses.each do |child| - if child.const_defined?(:DATATYPE) - @@subclasses_by_uri[child.const_get(:DATATYPE).to_s] = child - end - end - end - return @@subclasses_by_uri[uri] + class DateTime < Temporal + FORMAT = '%Y-%m-%dT%H:%M:%S'.freeze # the format that is supported by 4store end end end #end RDF diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index f3dcdb3d..cc101855 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -95,10 +95,11 @@ def predicate_map(predicates) predicates_map = {} uniq_p.each do |p| i = 0 - key = ('var_' + p.last_part + i.to_s).to_sym + last_part = p.to_s.include?("#") ? p.to_s.split('#').last : p.to_s.split('/').last + key = ('var_' + last_part + i.to_s).to_sym while predicates_map.include?(key) i += 1 - key = ('var_' + p.last_part + i.to_s).to_sym + key = ('var_' + last_part + i.to_s).to_sym break if i > 10 end predicates_map[key] = { uri: p, is_inverse: false } diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index afee11f1..bbf0c5ca 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -139,7 +139,7 @@ def test_query_flood 50.times do |j| oq = "SELECT (count(?s) as ?c) WHERE { ?s a ?o }" Goo.sparql_query_client.query(oq).each do |sol| - refute_equal 0, sol[:c] + refute_equal 0, sol[:c].to_i end end } diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index 3074683c..38a282d6 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -46,7 +46,7 @@ def self.tree_property(*args) if collection.id.to_s["submission1"] return RDF::RDFS[:subClassOf] end - return RDF::SKOS[:broader] + return RDF::Vocab::SKOS[:broader] end def self.class_rdf_type(*args) @@ -54,7 +54,7 @@ def self.class_rdf_type(*args) if collection.id.to_s["submission1"] return RDF::OWL[:Class] end - return RDF::SKOS[:Concept] + return RDF::Vocab::SKOS[:Concept] end attribute :methodBased, namespace: :rdfs, property: :subClassOf, handler: :dataMethod From 66b68ab53e731501bd453be1ffd7a76f7c795626 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 22 Feb 2024 17:18:53 +0100 Subject: [PATCH 057/106] Fix: saving a model removing unmodified attributes after consecutive save --- lib/goo/sparql/triples.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/goo/sparql/triples.rb b/lib/goo/sparql/triples.rb index df3f9f1d..317d1d84 100644 --- a/lib/goo/sparql/triples.rb +++ b/lib/goo/sparql/triples.rb @@ -53,6 +53,8 @@ def self.model_update_triples(model) if model.previous_values graph_delete = RDF::Graph.new model.previous_values.each do |attr,value| + next unless model.modified_attributes.any?{|x| attr.eql?(x)} + predicate = model.class.attribute_uri(attr,model.collection) values = value.kind_of?(Array) ? value : [value] values.each do |v| From 196070c15dfa0df196864ff88d608f9c30e17137 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 28 Feb 2024 22:27:42 +0100 Subject: [PATCH 058/106] Fix: enforce to use str() when doing a filter with a string value (#57) * enforce to use str() when doing a filter with a string * update agraph version to 8.1.0 --- Gemfile.lock | 6 +++++- docker-compose.yml | 2 +- goo.gemspec | 1 + lib/goo/sparql/query_builder.rb | 5 ++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index c940c3b7..5cd7be01 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -12,6 +12,7 @@ PATH remote: . specs: goo (0.0.2) + addressable (~> 2.8) pry rdf (= 3.2.11) rdf-raptor @@ -32,6 +33,8 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) + addressable (2.8.6) + public_suffix (>= 2.0.2, < 6.0) base64 (0.2.0) builder (3.2.4) coderay (1.1.3) @@ -71,7 +74,8 @@ GEM pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - rack (2.2.8) + public_suffix (5.0.4) + rack (2.2.8.1) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) diff --git a/docker-compose.yml b/docker-compose.yml index fd832341..6bd6cd56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,7 @@ services: retries: 5 agraph-ut: - image: franzinc/agraph:v8.0.0.rc1 + image: franzinc/agraph:v8.1.0 platform: linux/amd64 environment: - AGRAPH_SUPER_USER=test diff --git a/goo.gemspec b/goo.gemspec index f07be8ef..b7175779 100644 --- a/goo.gemspec +++ b/goo.gemspec @@ -6,6 +6,7 @@ Gem::Specification.new do |s| s.email = "manuelso@stanford.edu" s.files = Dir["lib/**/*.rb"] s.homepage = "http://github.com/ncbo/goo" + s.add_dependency("addressable", "~> 2.8") s.add_dependency("pry") s.add_dependency("rdf", "3.2.11") #unpin when we support only Ruby >= 3.0 s.add_dependency("rdf-vocab") diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 1a6ea740..674ae4ec 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -383,9 +383,12 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, value = RDF::Literal.new(filter_operation.value) if filter_operation.value.is_a? String value = RDF::Literal.new(filter_operation.value) + filter_var = "str(?#{filter_var})" + else + filter_var = "?#{filter_var}" end filter_operations << ( - "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + + "#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + " #{value.to_ntriples}") end From 4951e7289760db2a601e091e5c5b054af1b2a470 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 29 Feb 2024 01:52:45 +0100 Subject: [PATCH 059/106] Fix: monkey path RDF to not remove xsd:string by default --- Gemfile.lock | 7 +++---- lib/goo/mixins/sparql_client.rb | 33 ++++++++++++++++----------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 5cd7be01..affe4c00 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 + revision: 180c818f7715baac64b2699bb452ef5c756f62c5 branch: master specs: sparql-client (1.0.1) @@ -141,8 +141,7 @@ GEM macaddr (~> 1.0) PLATFORMS - ruby - x86_64-darwin-16 + x86_64-darwin-23 x86_64-linux DEPENDENCIES @@ -164,4 +163,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.4.21 + 2.2.33 diff --git a/lib/goo/mixins/sparql_client.rb b/lib/goo/mixins/sparql_client.rb index dc2b0728..d4d98523 100644 --- a/lib/goo/mixins/sparql_client.rb +++ b/lib/goo/mixins/sparql_client.rb @@ -32,26 +32,25 @@ def validate? end end - class Literal - def to_base - text = [] - text << %("#{escape(value)}") - text << "@#{language}" if has_language? - if has_datatype? - if datatype.respond_to?:to_base - text << "^^#{datatype.to_base}" - else - text << "^^<#{datatype.to_s}>" - end - end - text.join "" - end - end - - class Literal class DateTime < Temporal FORMAT = '%Y-%m-%dT%H:%M:%S'.freeze # the format that is supported by 4store end + + def initialize(value, language: nil, datatype: nil, lexical: nil, validate: false, canonicalize: false, **options) + @object = value.freeze + @string = lexical if lexical + @string = value if !defined?(@string) && value.is_a?(String) + @string = @string.encode(Encoding::UTF_8).freeze if instance_variable_defined?(:@string) + @object = @string if instance_variable_defined?(:@string) && @object.is_a?(String) + @language = language.to_s.downcase.to_sym if language + @datatype = RDF::URI(datatype).freeze if datatype + @datatype ||= self.class.const_get(:DATATYPE) if self.class.const_defined?(:DATATYPE) + @datatype ||= instance_variable_defined?(:@language) && @language ? RDF.langString : RDF::URI("http://www.w3.org/2001/XMLSchema#string") + @original_datatype = datatype + end + + attr_reader :original_datatype end + end #end RDF From 0e554fce49713ce4d5a742a06c2fb59a547caf47 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Sat, 2 Mar 2024 22:03:25 +0100 Subject: [PATCH 060/106] Feature: Enhance SOLR integration and add a Schema API (#54) * add an abstraction to SOLR integeration and add Schema API * add SOLR Schema API tests * update SOLR backend configuration and init * use the new Solr connector in the model search interface * update search test to cover the new automatic indexing and unindexing * handle the solr container initialization when running docker for tests * add omit_norms options for SolrSchemaGenerator * fix solr schema initial dynamic fields declaration and replace the usage of mapping-ISOLatin1Accent * delay the schema generation to after model declarations or in demand * add solr edismax fitlers tests * fix indexBatch and unindexBatch tests * add security checks to the index and unindex functions * change dynamic fields names to have less code migration * update clear_all_schema to remove all copy and normal fields * add an option to force solr initialization if wanted * handle indexing embed objects of a model * add index update option * fix clear all schema to just remove all the fields and recreate them * add index_enabled? helper for models * perform a status test when initializing the solr connector * extract init_search_connection function from init_search_connections * fix typo in indexOptimize call * add solr search using HTTP post instead of GET for large queries --- .ruby-version | 1 + docker-compose.yml | 9 +- lib/goo.rb | 49 +++- lib/goo/base/settings/attribute.rb | 16 +- lib/goo/config/config.rb | 2 +- lib/goo/search/search.rb | 195 +++++++++---- lib/goo/search/solr/solr_admin.rb | 79 ++++++ lib/goo/search/solr/solr_connector.rb | 41 +++ lib/goo/search/solr/solr_query.rb | 108 +++++++ lib/goo/search/solr/solr_schema.rb | 184 ++++++++++++ lib/goo/search/solr/solr_schema_generator.rb | 279 +++++++++++++++++++ rakelib/docker_based_test.rake | 14 + test/solr/test_solr.rb | 122 ++++++++ test/test_search.rb | 249 ++++++++++++++++- 14 files changed, 1267 insertions(+), 81 deletions(-) create mode 100644 .ruby-version create mode 100644 lib/goo/search/solr/solr_admin.rb create mode 100644 lib/goo/search/solr/solr_connector.rb create mode 100644 lib/goo/search/solr/solr_query.rb create mode 100644 lib/goo/search/solr/solr_schema.rb create mode 100644 lib/goo/search/solr/solr_schema_generator.rb create mode 100644 test/solr/test_solr.rb diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..6a81b4c8 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +2.7.8 diff --git a/docker-compose.yml b/docker-compose.yml index 6bd6cd56..463a1b92 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,15 +10,10 @@ services: retries: 30 solr-ut: - image: ontoportal/solr-ut:0.0.2 + image: solr:8.11.2 ports: - 8983:8983 - healthcheck: - test: ["CMD-SHELL", "curl -sf http://localhost:8983/solr/term_search_core1/admin/ping?wt=json | grep -iq '\"status\":\"OK\"}' || exit 1"] - start_period: 10s - interval: 10s - timeout: 5s - retries: 5 + command: bin/solr start -cloud -f agraph-ut: image: franzinc/agraph:v8.1.0 diff --git a/lib/goo.rb b/lib/goo.rb index 283ab5b4..adf73d3a 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -42,6 +42,7 @@ module Goo @@model_by_name = {} @@search_backends = {} @@search_connection = {} + @@search_collections = {} @@default_namespace = nil @@id_prefix = nil @@redis_client = nil @@ -101,7 +102,7 @@ def self.language_includes(lang) end def self.add_namespace(shortcut, namespace,default=false) - if !(namespace.instance_of? RDF::Vocabulary) + unless namespace.instance_of? RDF::Vocabulary raise ArgumentError, "Namespace must be a RDF::Vocabulary object" end @@namespaces[shortcut.to_sym] = namespace @@ -252,11 +253,9 @@ def self.configure raise ArgumentError, "Configuration needs to receive a code block" end yield self - configure_sanity_check() + configure_sanity_check - if @@search_backends.length > 0 - @@search_backends.each { |name, val| @@search_connection[name] = RSolr.connect(url: search_conf(name), timeout: 1800, open_timeout: 1800) } - end + init_search_connections @@namespaces.freeze @@sparql_backends.freeze @@ -280,8 +279,44 @@ def self.search_conf(name=:main) return @@search_backends[name][:service] end - def self.search_connection(name=:main) - return @@search_connection[name] + def self.search_connection(collection_name) + return search_client(collection_name).solr + end + + def self.search_client(collection_name) + @@search_connection[collection_name] + end + + def self.add_search_connection(collection_name, search_backend = :main, &block) + @@search_collections[collection_name] = { + search_backend: search_backend, + block: block_given? ? block : nil + } + end + + def self.search_connections + @@search_connection + end + + def self.init_search_connection(collection_name, search_backend = :main, block = nil, force: false) + return @@search_connection[collection_name] if @@search_connection[collection_name] && !force + + @@search_connection[collection_name] = SOLR::SolrConnector.new(search_conf(search_backend), collection_name) + if block + block.call(@@search_connection[collection_name].schema_generator) + @@search_connection[collection_name].enable_custom_schema + end + @@search_connection[collection_name].init(force) + @@search_connection[collection_name] + end + + + def self.init_search_connections(force=false) + @@search_collections.each do |collection_name, backend| + search_backend = backend[:search_backend] + block = backend[:block] + init_search_connection(collection_name, search_backend, block, force: force) + end end def self.sparql_query_client(name=:main) diff --git a/lib/goo/base/settings/attribute.rb b/lib/goo/base/settings/attribute.rb index dda5fdd5..dbf52b78 100644 --- a/lib/goo/base/settings/attribute.rb +++ b/lib/goo/base/settings/attribute.rb @@ -158,11 +158,6 @@ def list?(attr) attribute_settings(attr)[:enforce].include?(:list) end - def index_attribute?(attr) - return false if attribute_settings(attr).nil? - attribute_settings(attr)[:index] - end - def transitive?(attr) return false unless @model_settings[:attributes].include?(attr) attribute_settings(attr)[:transitive] == true @@ -212,6 +207,17 @@ def attribute_uri(attr, *args) Goo.vocabulary(nil)[attr] end + + def indexable?(attr) + setting = attribute_settings(attr.to_sym) + setting && (setting[:index].nil? || setting[:index] == true) + end + + def fuzzy_searchable?(attr) + attribute_settings(attr)[:fuzzy_search] == true + end + + private def set_no_list_by_default(options) diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index ff51e8b7..4c51a223 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -20,7 +20,7 @@ def config(&block) @settings.goo_path_query ||= ENV['GOO_PATH_QUERY'] || '/sparql/' @settings.goo_path_data ||= ENV['GOO_PATH_DATA'] || '/data/' @settings.goo_path_update ||= ENV['GOO_PATH_UPDATE'] || '/update/' - @settings.search_server_url ||= ENV['SEARCH_SERVER_URL'] || 'http://localhost:8983/solr/term_search_core1' + @settings.search_server_url ||= ENV['SEARCH_SERVER_URL'] || 'http://localhost:8983/solr' @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 1dc72ea9..b0cccfce 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -1,4 +1,5 @@ require 'rsolr' +require_relative 'solr/solr_connector' module Goo @@ -8,102 +9,184 @@ def self.included(base) base.extend(ClassMethods) end - def index(connection_name=:main) + def index(connection_name = nil, to_set = nil) raise ArgumentError, "ID must be set to be able to index" if @id.nil? - doc = indexable_object - Goo.search_connection(connection_name).add(doc) + document = indexable_object(to_set) + + return if document.blank? || document[:id].blank? + + connection_name ||= self.class.search_collection_name + unindex(connection_name) + self.class.search_client(connection_name).index_document(document) end - def index_update(to_set, connection_name=:main) + def index_update(attributes_to_update, connection_name = nil, to_set = nil) raise ArgumentError, "ID must be set to be able to index" if @id.nil? - raise ArgumentError, "Field names to be updated in index must be provided" if to_set.nil? + raise ArgumentError, "Field names to be updated in index must be provided" if attributes_to_update.blank? + + old_doc = self.class.search("id:\"#{index_id}\"").dig("response", "docs")&.first + + raise ArgumentError, "ID must be set to be able to index" if old_doc.blank? + doc = indexable_object(to_set) - doc.each { |key, val| - next if key === :id - doc[key] = {set: val} - } + doc.each do |key, val| + next unless attributes_to_update.any? { |attr| key.to_s.eql?(attr.to_s) || key.to_s.include?("#{attr}_") } + old_doc[key] = val + end + + connection_name ||= self.class.search_collection_name + unindex(connection_name) - Goo.search_connection(connection_name).update( - data: "[#{doc.to_json}]", - headers: { 'Content-Type' => 'application/json' } - ) + old_doc.reject! { |k, v| k.to_s.end_with?('_sort') || k.to_s.end_with?('_sorts') } + old_doc.delete("_version_") + self.class.search_client(connection_name).index_document(old_doc) end - def unindex(connection_name=:main) - id = index_id - Goo.search_connection(connection_name).delete_by_id(id) + def unindex(connection_name = nil) + connection_name ||= self.class.search_collection_name + self.class.search_client(connection_name).delete_by_id(index_id) end # default implementation, should be overridden by child class - def index_id() + def index_id raise ArgumentError, "ID must be set to be able to index" if @id.nil? @id.to_s end # default implementation, should be overridden by child class - def index_doc(to_set=nil) + def index_doc(to_set = nil) raise NoMethodError, "You must define method index_doc in your class for it to be indexable" end - def indexable_object(to_set=nil) - doc = index_doc(to_set) - # use resource_id for the actual term id because :id is a Solr reserved field - doc[:resource_id] = doc[:id].to_s - doc[:id] = index_id.to_s - doc + def embedded_doc + raise NoMethodError, "You must define method embedded_doc in your class for it to be indexable" end + def indexable_object(to_set = nil) + begin + document = index_doc(to_set) + rescue NoMethodError + document = self.to_hash.reject { |k, _| !self.class.indexable?(k) } + document.transform_values! do |v| + is_array = v.is_a?(Array) + v = Array(v).map do |x| + if x.is_a?(Goo::Base::Resource) + x.embedded_doc rescue x.id.to_s + else + if x.is_a?(RDF::URI) + x.to_s + else + x.respond_to?(:object) ? x.object : x + end + end + end + is_array ? v : v.first + end + end + + document = document.reduce({}) do |h, (k, v)| + if v.is_a?(Hash) + v.each { |k2, v2| h["#{k}_#{k2}".to_sym] = v2 } + else + h[k] = v + end + h + end + + model_name = self.class.model_name.to_s.downcase + document.delete(:id) + document.delete("id") + + document.transform_keys! do |k| + self.class.index_document_attr(k) + end + + document[:resource_id] = self.id.to_s + document[:resource_model] = model_name + document[:id] = index_id.to_s + document + end module ClassMethods - def search(q, params={}, connection_name=:main) - params["q"] = q - Goo.search_connection(connection_name).post('select', :data => params) + def index_enabled? + !@model_settings[:search_collection].nil? end - def indexBatch(collection, connection_name=:main) - docs = Array.new - collection.each do |c| - docs << c.indexable_object + def enable_indexing(collection_name, search_backend = :main, &block) + @model_settings[:search_collection] = collection_name + + if block_given? + # optional block to generate custom schema + Goo.add_search_connection(collection_name, search_backend, &block) + else + Goo.add_search_connection(collection_name, search_backend) end - Goo.search_connection(connection_name).add(docs) + + after_save :index + after_destroy :unindex end - def unindexBatch(collection, connection_name=:main) - docs = Array.new - collection.each do |c| - docs << c.index_id - end - Goo.search_connection(connection_name).delete_by_id(docs) + def search_collection_name + @model_settings[:search_collection] + end + + def search_client(connection_name = search_collection_name) + Goo.search_client(connection_name) + end + + def custom_schema?(connection_name = search_collection_name) + search_client(connection_name)&.custom_schema? + end + + def schema_generator + Goo.search_client(search_collection_name).schema_generator + end + + def index_document_attr(key) + return key.to_s if custom_schema? || self.attribute_settings(key).nil? + + type = self.datatype(key) + is_list = self.list?(key) + fuzzy = self.fuzzy_searchable?(key) + + SOLR::SolrConnector.index_document_attr(key, type, is_list, fuzzy) + end + + def search(q, params = {}, connection_name = search_collection_name) + search_client(connection_name).search(q, params) + end + + def submit_search_query(query, params = {}, connection_name = search_collection_name) + search_client(connection_name).submit_search_query(query, params) + end + + def indexBatch(collection, connection_name = search_collection_name) + docs = collection.map(&:indexable_object) + search_client(connection_name).index_document(docs) end - def unindexByQuery(query, connection_name=:main) - Goo.search_connection(connection_name).delete_by_query(query) + def unindexBatch(collection, connection_name = search_collection_name) + docs = collection.map(&:index_id) + search_client(connection_name).delete_by_id(docs) end - # Get the doc that will be indexed in solr - def get_indexable_object() - # To make the code less readable the guys that wrote it managed to hide the real function called by this line - # It is "get_index_doc" in ontologies_linked_data Class.rb - doc = self.class.model_settings[:search_options][:document].call(self) - doc[:resource_id] = doc[:id].to_s - doc[:id] = get_index_id.to_s - # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 - doc + def unindexByQuery(query, connection_name = search_collection_name) + search_client(connection_name).delete_by_query(query) end - def indexCommit(attrs=nil, connection_name=:main) - Goo.search_connection(connection_name).commit(:commit_attributes => attrs || {}) + def indexCommit(attrs = nil, connection_name = search_collection_name) + search_client(connection_name).index_commit(attrs) end - def indexOptimize(attrs=nil, connection_name=:main) - Goo.search_connection(connection_name).optimize(:optimize_attributes => attrs || {}) + def indexOptimize(attrs = nil, connection_name = search_collection_name) + search_client(connection_name).index_optimize(attrs) end - def indexClear(connection_name=:main) - # WARNING: this deletes ALL data from the index - unindexByQuery("*:*", connection_name) + # WARNING: this deletes ALL data from the index + def indexClear(connection_name = search_collection_name) + search_client(connection_name).clear_all_data end end end diff --git a/lib/goo/search/solr/solr_admin.rb b/lib/goo/search/solr/solr_admin.rb new file mode 100644 index 00000000..4d20271b --- /dev/null +++ b/lib/goo/search/solr/solr_admin.rb @@ -0,0 +1,79 @@ +module SOLR + module Administration + + def admin_url + "#{@solr_url}/admin" + end + + def solr_alive? + collections_url = URI.parse("#{admin_url}/collections?action=CLUSTERSTATUS") + http = Net::HTTP.new(collections_url.host, collections_url.port) + request = Net::HTTP::Get.new(collections_url.request_uri) + + begin + response = http.request(request) + return response.code.eql?("200") && JSON.parse(response.body).dig("responseHeader", "status").eql?(0) + rescue StandardError => e + return false + end + end + + def fetch_all_collections + collections_url = URI.parse("#{admin_url}/collections?action=LIST") + + http = Net::HTTP.new(collections_url.host, collections_url.port) + request = Net::HTTP::Get.new(collections_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to fetch collections. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to fetch collections. #{e.message}" + end + + collections = [] + if response.is_a?(Net::HTTPSuccess) + collections = JSON.parse(response.body)['collections'] + end + + collections + end + + def create_collection(name = @collection_name, num_shards = 1, replication_factor = 1) + return if collection_exists?(name) + create_collection_url = URI.parse("#{admin_url}/collections?action=CREATE&name=#{name}&numShards=#{num_shards}&replicationFactor=#{replication_factor}") + + http = Net::HTTP.new(create_collection_url.host, create_collection_url.port) + request = Net::HTTP::Post.new(create_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to create collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to create collection. #{e.message}" + end + end + + def delete_collection(collection_name = @collection_name) + return unless collection_exists?(collection_name) + + delete_collection_url = URI.parse("#{admin_url}/collections?action=DELETE&name=#{collection_name}") + + http = Net::HTTP.new(delete_collection_url.host, delete_collection_url.port) + request = Net::HTTP::Post.new(delete_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to delete collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to delete collection. #{e.message}" + end + + end + + def collection_exists?(collection_name) + fetch_all_collections.include?(collection_name.to_s) + end + end +end + diff --git a/lib/goo/search/solr/solr_connector.rb b/lib/goo/search/solr/solr_connector.rb new file mode 100644 index 00000000..e367f5cd --- /dev/null +++ b/lib/goo/search/solr/solr_connector.rb @@ -0,0 +1,41 @@ +require 'rsolr' +require_relative 'solr_schema_generator' +require_relative 'solr_schema' +require_relative 'solr_admin' +require_relative 'solr_query' + +module SOLR + + class SolrConnector + include Schema, Administration, Query + attr_reader :solr + + def initialize(solr_url, collection_name) + @solr_url = solr_url + @collection_name = collection_name + @solr = RSolr.connect(url: collection_url) + + # Perform a status test and wait up to 30 seconds before raising an error + wait_time = 0 + max_wait_time = 30 + until solr_alive? || wait_time >= max_wait_time + sleep 1 + wait_time += 1 + end + raise "Solr instance not reachable within #{max_wait_time} seconds" unless solr_alive? + + + @custom_schema = false + end + + def init(force = false) + return if collection_exists?(@collection_name) && !force + + create_collection + + init_schema + end + + end +end + diff --git a/lib/goo/search/solr/solr_query.rb b/lib/goo/search/solr/solr_query.rb new file mode 100644 index 00000000..ed194950 --- /dev/null +++ b/lib/goo/search/solr/solr_query.rb @@ -0,0 +1,108 @@ +module SOLR + module Query + + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + def index_document_attr(key, type, is_list, fuzzy_search) + dynamic_field(type: type, is_list: is_list, is_fuzzy_search: fuzzy_search).gsub('*', key.to_s) + end + + private + + def dynamic_field(type:, is_list:, is_fuzzy_search: false) + return is_list ? '*_texts' : '*_text' if is_fuzzy_search + + dynamic_type = case type + when :uri, :string, nil + '*_t' + when :integer + '*_i' + when :boolean + '*_b' + when :date_time + '*_dt' + when :float + '*_f' + else + # Handle unknown data types or raise an error based on your specific requirements + raise ArgumentError, "Unsupported ORM data type: #{type}" + end + + if is_list + dynamic_type = dynamic_type.eql?('*_t') ? "*_txt" : "#{dynamic_type}s" + end + + dynamic_type + end + end + + def clear_all_data + delete_by_query('*:*') + end + + def collection_url + "#{@solr_url}/#{@collection_name}" + end + + def index_commit(attrs = nil) + @solr.commit(:commit_attributes => attrs || {}) + end + + def index_optimize(attrs = nil) + @solr.optimize(:optimize_attributes => attrs || {}) + end + + def index_document(document, commit: true) + @solr.add(document) + @solr.commit if commit + end + + def index_document_attr(key, type, is_list, fuzzy_search) + self.class.index_document_attr(key, type, is_list, fuzzy_search) + end + + + + def delete_by_id(document_id, commit: true) + return if document_id.nil? + + @solr.delete_by_id(document_id) + @solr.commit if commit + end + + def delete_by_query(query) + @solr.delete_by_query(query) + @solr.commit + end + + def search(query, params = {}) + params[:q] = query + @solr.get('select', params: params) + end + + def submit_search_query(query, params = {}) + uri = ::URI.parse("#{collection_url}/select") + + http = Net::HTTP.new(uri.host, uri.port) + request = Net::HTTP::Post.new(uri.request_uri) + + params[:q] = query + request.set_form_data(params) + + response = http.request(request) + + if response.is_a?(Net::HTTPSuccess) + JSON.parse(response.body) + else + puts "Error: #{response.code} - #{response.message}" + nil + end + end + + + end +end + diff --git a/lib/goo/search/solr/solr_schema.rb b/lib/goo/search/solr/solr_schema.rb new file mode 100644 index 00000000..8c38fd2f --- /dev/null +++ b/lib/goo/search/solr/solr_schema.rb @@ -0,0 +1,184 @@ +module SOLR + module Schema + + def fetch_schema + uri = URI.parse("#{@solr_url}/#{@collection_name}/schema") + http = Net::HTTP.new(uri.host, uri.port) + + request = Net::HTTP::Get.new(uri.path, 'Content-Type' => 'application/json') + response = http.request(request) + + if response.code.to_i == 200 + @schema = JSON.parse(response.body)["schema"] + else + raise StandardError, "Failed to upload schema. HTTP #{response.code}: #{response.body}" + end + end + + def schema + @schema ||= fetch_schema + end + + def all_fields + schema["fields"] + end + + def all_copy_fields + schema["copyFields"] + end + + def all_dynamic_fields + schema["dynamicFields"] + end + + def all_fields_types + schema["fieldTypes"] + end + + def fetch_all_fields + fetch_schema["fields"] + end + + def fetch_all_copy_fields + fetch_schema["copyFields"] + end + + def fetch_all_dynamic_fields + fetch_schema["dynamicFields"] + end + + def fetch_all_fields_types + fetch_schema["fieldTypes"] + end + + def schema_generator + @schema_generator ||= SolrSchemaGenerator.new + end + + def init_collection(num_shards = 1, replication_factor = 1) + create_collection_url = URI.parse("#{@solr_url}/admin/collections?action=CREATE&name=#{@collection_name}&numShards=#{num_shards}&replicationFactor=#{replication_factor}") + + http = Net::HTTP.new(create_collection_url.host, create_collection_url.port) + request = Net::HTTP::Post.new(create_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to create collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to create collection. #{e.message}" + end + end + + def init_schema(generator = schema_generator) + clear_all_schema(generator) + fetch_schema + default_fields = all_fields.map { |f| f['name'] } + + solr_schema = { + "add-field-type": generator.field_types_to_add, + 'add-field' => generator.fields_to_add.reject { |f| default_fields.include?(f[:name]) }, + 'add-dynamic-field' => generator.dynamic_fields_to_add, + 'add-copy-field' => generator.copy_fields_to_add + } + + update_schema(solr_schema) + end + + def custom_schema? + @custom_schema + end + + def enable_custom_schema + @custom_schema = true + end + + def clear_all_schema(generator = schema_generator) + init_ft = generator.field_types_to_add.map { |f| f[:name] } + dynamic_fields = all_dynamic_fields.map { |f| { name: f['name'] } } + copy_fields = all_copy_fields.map { |f| { source: f['source'], dest: f['dest'] } } + fields_types = all_fields_types.select { |f| init_ft.include?(f['name']) }.map { |f| { name: f['name']} } + fields = all_fields.reject { |f| %w[id _version_ ].include?(f['name']) }.map { |f| { name: f['name'] } } + + upload_schema('delete-copy-field' => copy_fields) unless copy_fields.empty? + upload_schema('delete-dynamic-field' => dynamic_fields) unless dynamic_fields.empty? + upload_schema('delete-field' => fields) unless copy_fields.empty? + upload_schema('delete-field-type' => fields_types) unless fields_types.empty? + end + + def map_to_indexer_type(orm_data_type) + case orm_data_type + when :uri + 'string' # Assuming a string field for URIs + when :string, nil # Default to 'string' if no type is given + 'text_general' # Assuming a generic text field for strings + when :integer + 'pint' + when :boolean + 'boolean' + when :date_time + 'pdate' + when :float + 'pfloat' + else + # Handle unknown data types or raise an error based on your specific requirements + raise ArgumentError, "Unsupported ORM data type: #{orm_data_type}" + end + end + + def delete_field(name) + update_schema('delete-field' => [ + { name: name } + ]) + end + + def add_field(name, type, indexed: true, stored: true, multi_valued: false) + update_schema('add-field' => [ + { name: name, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + ]) + end + + def add_dynamic_field(name, type, indexed: true, stored: true, multi_valued: false) + update_schema('add-dynamic-field' => [ + { name: name, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + ]) + end + + def add_copy_field(source, dest) + update_schema('add-copy-field' => [ + { source: source, dest: dest } + ]) + end + + def fetch_field(name) + fetch_all_fields.select { |f| f['name'] == name }.first + end + + def update_schema(schema_json) + permitted_actions = %w[add-field add-copy-field add-dynamic-field add-field-type delete-copy-field delete-dynamic-field delete-field delete-field-type] + + unless permitted_actions.any? { |action| schema_json.key?(action) } + raise StandardError, "The schema need to implement at least one of this actions: #{permitted_actions.join(', ')}" + end + upload_schema(schema_json) + fetch_schema + end + + private + + def upload_schema(schema_json) + uri = URI.parse("#{@solr_url}/#{@collection_name}/schema") + http = Net::HTTP.new(uri.host, uri.port) + + request = Net::HTTP::Post.new(uri.path, 'Content-Type' => 'application/json') + request.body = schema_json.to_json + response = http.request(request) + if response.code.to_i == 200 + response + else + raise StandardError, "Failed to upload schema. HTTP #{response.code}: #{response.body}" + end + end + + end +end + diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb new file mode 100644 index 00000000..bc1e4693 --- /dev/null +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -0,0 +1,279 @@ +module SOLR + + class SolrSchemaGenerator + + attr_reader :schema + + def initialize + @schema = {} + end + + def add_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil) + @schema['add-field'] ||= [] + af = { name: name.to_s, type: type, indexed: indexed, stored: stored, multiValued: multi_valued} + af[:omitNorms] = omit_norms unless omit_norms.nil? + @schema['add-field'] << af + end + + def add_dynamic_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil) + @schema['add-dynamic-field'] ||= [] + df = { name: name.to_s, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + df[:omitNorms] = omit_norms unless omit_norms.nil? + @schema['add-dynamic-field'] << df + end + + def add_copy_field(source, dest) + @schema['add-copy-field'] ||= [] + @schema['add-copy-field'] << { source: source, dest: dest } + end + + def add_field_type(type_definition) + @schema['add-field-type'] ||= [] + @schema['add-field-type'] << type_definition + end + + def fields_to_add + custom_fields = @schema['add-field'] || [] + custom_fields + init_fields + end + + def dynamic_fields_to_add + custom_fields = @schema['add-dynamic-field'] || [] + custom_fields + init_dynamic_fields + end + + def copy_fields_to_add + custom_fields = @schema['add-copy-field'] || [] + custom_fields + init_copy_fields + end + + def field_types_to_add + custom_fields = @schema['add-field-type'] || [] + custom_fields + init_fields_types + end + + def init_fields_types + [ + { + "name": "string_ci", + "class": "solr.TextField", + "sortMissingLast": true, + "omitNorms": true, + "queryAnalyzer": + { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + } + ] + } + }, + { + "name": "text_suggest_ngram", + "class": "solr.TextField", + "positionIncrementGap": "100", + "analyzer": { + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.EdgeNGramTokenizerFactory", + "minGramSize": 1, + "maxGramSize": 25 + } + ] + } + }, + { + "name": "text_suggest_edge", + "class": "solr.TextField", + "positionIncrementGap": "100", + "indexAnalyzer": { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([\\.,;:-_])", + "replacement": " ", + "replace": "all" + }, + { + "class": "solr.EdgeNGramFilterFactory", + "minGramSize": 1, + "maxGramSize": 30, + "preserveOriginal": true + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + } + ] + }, + "queryAnalyzer": { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([\\.,;:-_])", + "replacement": " ", + "replace": "all" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "^(.{30})(.*)?", + "replacement": "$1", + "replace": "all" + } + ] + } + }, + { + "name": "text_suggest", + "class": "solr.TextField", + "positionIncrementGap": 100, + indexAnalyzer: { + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "1", + "generateNumberParts": "1", + "catenateWords": "1", + "catenateNumbers": "1", + "catenateAll": "1", + "splitOnCaseChange": "1", + "splitOnNumerics": "1", + "preserveOriginal": "1" + }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\w\d*æøåÆØÅ ])", + "replacement": " ", + "replace": "all" + } + ] + }, + queryAnalyzer: { + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "0", + "generateNumberParts": "0", + "catenateWords": "0", + "catenateNumbers": "0", + "catenateAll": "0", + "splitOnCaseChange": "0", + "splitOnNumerics": "0" + }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\w\d*æøåÆØÅ ])", + "replacement": " ", + "replace": "all" + } + ] + } + } + ] + end + + def init_fields + [ + #{ name: "_version_", type: "plong", indexed: true, stored: true, multiValued: false }, + { name: "resource_id", type: "string", indexed: true, multiValued: false, required: true, stored: true }, + { name: "resource_model", type: "string", indexed: true, multiValued: false, required: true, stored: false }, + { name: "_text_", type: "text_general", indexed: true, multiValued: true, stored: false }, + ] + end + + def init_dynamic_fields + [ + {"name": "*_t", "type": "text_general", stored: true, "multiValued": false }, + {"name": "*_txt", "type": "text_general", stored: true, "multiValued": true}, + {"name": "*_i", "type": "pint", stored: true }, + {"name": "*_is", "type": "pints", stored: true }, + {"name": "*_f", "type": "pfloat", stored: true }, + {"name": "*_fs", "type": "pfloats", stored: true }, + {"name": "*_b", "type": "boolean", stored: true }, + {"name": "*_bs", "type": "booleans", stored: true }, + {"name": "*_dt", "type": "pdate", stored: true }, + {"name": "*_dts", "type": "pdate", stored: true , multiValued: true}, + { "name": "*Exact", "type": "string_ci", "multiValued": true, stored: false }, + { "name": "*Suggest", "type": "text_suggest", "omitNorms": true, stored: false, "multiValued": true }, + { "name": "*SuggestEdge", "type": "text_suggest_edge", stored: false, "multiValued": true }, + { "name": "*SuggestNgram", "type": "text_suggest_ngram", stored: false, "omitNorms": true, "multiValued": true }, + { "name": "*_text", "type": "text_general", stored: true, "multiValued": false }, + { "name": "*_texts", "type": "text_general", stored: true, "multiValued": true }, + {"name": "*_sort", "type": "string", stored: false }, + {"name": "*_sorts", "type": "strings", stored: false , "multiValued": true}, + ] + end + + def init_copy_fields + [ + { source: "*_text", dest: %w[_text_ *Exact *Suggest *SuggestEdge *SuggestNgram *_sort] }, + { source: "*_texts", dest: %w[_text_ *Exact *Suggest *SuggestEdge *SuggestNgram *_sorts] }, + ] + end + end +end diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake index d9b334f4..c84879a9 100644 --- a/rakelib/docker_based_test.rake +++ b/rakelib/docker_based_test.rake @@ -5,6 +5,20 @@ namespace :test do namespace :docker do task :up do system("docker compose up -d") || abort("Unable to start docker containers") + unless system("curl -sf http://localhost:8983/solr || exit 1") + printf("waiting for Solr container to initialize") + sec = 0 + until system("curl -sf http://localhost:8983/solr || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + abort(" Solr container hasn't initialized properly") + end + end + printf("\n") + end + end task :down do #system("docker compose --profile fs --profile ag stop") diff --git a/test/solr/test_solr.rb b/test/solr/test_solr.rb new file mode 100644 index 00000000..6428bc8a --- /dev/null +++ b/test/solr/test_solr.rb @@ -0,0 +1,122 @@ +require_relative '../test_case' +require 'benchmark' + + +class TestSolr < MiniTest::Unit::TestCase + def self.before_suite + @@connector = SOLR::SolrConnector.new(Goo.search_conf, 'test') + @@connector.delete_collection('test') + @@connector.init + end + + def self.after_suite + @@connector.delete_collection('test') + end + + def test_add_collection + connector = @@connector + connector.create_collection('test2') + all_collections = connector.fetch_all_collections + assert_includes all_collections, 'test2' + end + + def test_delete_collection + connector = @@connector + test_add_collection + connector.delete_collection('test2') + + all_collections = connector.fetch_all_collections + refute_includes all_collections, 'test2' + end + + def test_schema_generator + connector = @@connector + + all_fields = connector.all_fields + + connector.schema_generator.fields_to_add.each do |f| + field = all_fields.select { |x| x["name"].eql?(f[:name]) }.first + refute_nil field + assert_equal field["type"], f[:type] + assert_equal field["indexed"], f[:indexed] + assert_equal field["stored"], f[:stored] + assert_equal field["multiValued"], f[:multiValued] + end + + copy_fields = connector.all_copy_fields + connector.schema_generator.copy_fields_to_add.each do |f| + field = copy_fields.select { |x| x["source"].eql?(f[:source]) }.first + refute_nil field + assert_equal field["source"], f[:source] + assert_includes f[:dest], field["dest"] + end + + dynamic_fields = connector.all_dynamic_fields + + connector.schema_generator.dynamic_fields_to_add.each do |f| + field = dynamic_fields.select { |x| x["name"].eql?(f[:name]) }.first + refute_nil field + assert_equal field["name"], f[:name] + assert_equal field["type"], f[:type] + assert_equal field["multiValued"], f[:multiValued] + assert_equal field["stored"], f[:stored] + end + + connector.clear_all_schema + connector.fetch_schema + all_fields = connector.all_fields + connector.schema_generator.fields_to_add.each do |f| + field = all_fields.select { |x| x["name"].eql?(f[:name]) }.first + assert_nil field + end + + copy_fields = connector.all_copy_fields + connector.schema_generator.copy_fields_to_add.each do |f| + field = copy_fields.select { |x| x["source"].eql?(f[:source]) }.first + assert_nil field + end + + dynamic_fields = connector.all_dynamic_fields + connector.schema_generator.dynamic_fields_to_add.each do |f| + field = dynamic_fields.select { |x| x["name"].eql?(f[:name]) }.first + assert_nil field + end + end + + def test_add_field + connector = @@connector + add_field('test', connector) + + + field = connector.fetch_all_fields.select { |f| f['name'] == 'test' }.first + + refute_nil field + assert_equal field['type'], 'string' + assert_equal field['indexed'], true + assert_equal field['stored'], true + assert_equal field['multiValued'], true + + connector.delete_field('test') + end + + def test_delete_field + connector = @@connector + + add_field('test', connector) + + connector.delete_field('test') + + field = connector.all_fields.select { |f| f['name'] == 'test' }.first + + assert_nil field + end + + private + + def add_field(name, connector) + if connector.fetch_field(name) + connector.delete_field(name) + end + connector.add_field(name, 'string', indexed: true, stored: true, multi_valued: true) + end +end diff --git a/test/test_search.rb b/test/test_search.rb index 180062d1..0bba79d9 100644 --- a/test/test_search.rb +++ b/test/test_search.rb @@ -3,9 +3,9 @@ module TestSearch class TermSearch < Goo::Base::Resource - model :term_search, name_with: :id + model :term_search, name_with: lambda { |resource| uuid_uri_generator(resource) } attribute :prefLabel, enforce: [:existence] - attribute :synonym # array of strings + attribute :synonym, enforce: [:list] # array of strings attribute :definition # array of strings attribute :submissionAcronym, enforce: [:existence] attribute :submissionId, enforce: [:existence, :integer] @@ -14,6 +14,39 @@ class TermSearch < Goo::Base::Resource attribute :semanticType attribute :cui + enable_indexing(:term_search) do | schema_generator | + schema_generator.add_field(:prefLabel, 'text_general', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:synonym, 'text_general', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:definition, 'string', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:submissionAcronym, 'string', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:submissionId, 'pint', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:cui, 'text_general', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:semanticType, 'text_general', indexed: true, stored: true, multi_valued: true) + + # Copy fields for term search + schema_generator.add_copy_field('prefLabel', '_text_') + # for exact search + schema_generator.add_copy_field('prefLabel', 'prefLabelExact') + + # Matches whole terms in the suggest text + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggest') + + # Will match from the left of the field, e.g. if the document field + # is "A brown fox" and the query is "A bro", it will match, but not "brown" + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggestEdge') + + # Matches any word in the input field, with implicit right truncation. + # This means that the field "A brown fox" will be matched by query "bro". + # We use this to get partial matches, but these would be boosted lower than exact and left-anchored + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggestNgram') + + schema_generator.add_copy_field('synonym', '_text_') + schema_generator.add_copy_field('synonym', 'synonymExact') + schema_generator.add_copy_field('synonym', 'synonymSuggest') + schema_generator.add_copy_field('synonym', 'synonymSuggestEdge') + schema_generator.add_copy_field('synonym', 'synonymSuggestNgram') + end + def index_id() "#{self.id.to_s}_#{self.submissionAcronym}_#{self.submissionId}" end @@ -23,8 +56,45 @@ def index_doc(to_set = nil) end end + class TermSearch2 < Goo::Base::Resource + model :term_search2, name_with: :prefLabel + attribute :prefLabel, enforce: [:existence], fuzzy_search: true + attribute :synonym, enforce: [:list] + attribute :definition + attribute :submissionAcronym, enforce: [:existence] + attribute :submissionId, enforce: [:existence, :integer] + attribute :private, enforce: [:boolean], default: false, index: false + # Dummy attributes to validate non-searchable files + attribute :semanticType + attribute :cui + + enable_indexing(:test_solr) + end + + class TermSearch3 < Goo::Base::Resource + model :term_search3, name_with: :prefLabel + attribute :prefLabel, enforce: [:existence] + attribute :synonym, enforce: [:list] + attribute :definition + attribute :submissionAcronym, enforce: [:existence] + attribute :submissionId, enforce: [:existence, :integer] + attribute :private, enforce: [:boolean], default: false, index: false + # Dummy attributes to validate non-searchable files + attribute :semanticType + attribute :cui + + attribute :object, enforce: [:term_search] + attribute :object_list, enforce: [:term_search, :list] + + + enable_indexing(:test_solr) + end + class TestModelSearch < MiniTest::Unit::TestCase + def self.before_suite + Goo.init_search_connections(true) + end def setup @terms = [ TermSearch.new( @@ -61,7 +131,21 @@ def setup submissionId: 2, semanticType: "Neoplastic Process", cui: "C0375111" - ) + ), + TermSearch.new( + id: RDF::URI.new("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Melanoma2"), + prefLabel: "Melanoma with cutaneous melanoma syndrome", + synonym: [ + "Cutaneous Melanoma", + "Skin Cancer", + "Malignant Melanoma" + ], + definition: "Melanoma refers to a malignant skin cancer", + submissionAcronym: "NCIT", + submissionId: 2, + semanticType: "Neoplastic Process", + cui: "C0025202" + ), ] end @@ -78,6 +162,98 @@ def test_search assert_equal @terms[1].prefLabel, resp["response"]["docs"][0]["prefLabel"] end + def test_search_filters + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id cui semanticType", + "pf"=>"prefLabelSuggest^50", + } + resp = TermSearch.search("Melanoma wi", params) + assert_equal(3, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_exact_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelExact", + } + resp = TermSearch.search("Melanoma", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[0].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_suggest_edge_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggestEdge", + } + resp = TermSearch.search("Melanoma with", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + resp = TermSearch.search("Melanoma", params) + assert_equal(2, resp["response"]["numFound"]) + assert_equal @terms[0].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_suggest_ngram_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggestNgram", + } + resp = TermSearch.search("cutaneous", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + resp = TermSearch.search("eous", params) + assert_equal(0, resp["response"]["numFound"]) + end + + def test_search_suggest_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggest", + } + resp = TermSearch.search("cutaneous test with Neoplasm Melanoma", params) + assert_equal(3, resp["response"]["numFound"]) + + + resp = TermSearch.search("mel", params) + assert_equal(0, resp["response"]["numFound"]) + end + def test_unindex TermSearch.indexClear() @terms[1].index() @@ -120,7 +296,7 @@ def test_indexBatch TermSearch.indexBatch(@terms) TermSearch.indexCommit() resp = TermSearch.search("*:*") - assert_equal 2, resp["response"]["docs"].length + assert_equal @terms.size, resp["response"]["docs"].length end def test_unindexBatch @@ -128,7 +304,7 @@ def test_unindexBatch TermSearch.indexBatch(@terms) TermSearch.indexCommit() resp = TermSearch.search("*:*") - assert_equal 2, resp["response"]["docs"].length + assert_equal @terms.size, resp["response"]["docs"].length TermSearch.unindexBatch(@terms) TermSearch.indexCommit() @@ -142,6 +318,69 @@ def test_indexClear resp = TermSearch.search("*:*") assert_equal 0, resp["response"]["docs"].length end + + def test_index_on_save_delete + TermSearch2.find("test").first&.delete + TermSearch3.find("test2").first&.delete + + term = TermSearch2.new(prefLabel: "test", + submissionId: 1, + definition: "definition of test", + synonym: ["synonym1", "synonym2"], + submissionAcronym: "test", + private: true + ) + + term2 = TermSearch3.new(prefLabel: "test2", + submissionId: 1, + definition: "definition of test2", + synonym: ["synonym1", "synonym2"], + submissionAcronym: "test", + private: true, + object: TermSearch.new(prefLabel: "test", submissionAcronym: 'acronym', submissionId: 1 ).save, + object_list: [TermSearch.new(prefLabel: "test2",submissionAcronym: 'acronym2', submissionId: 2).save, + TermSearch.new(prefLabel: "test3", submissionAcronym: 'acronym3', submissionId: 3).save] + ) + + term.save + term2.save + + # set as not indexed in model definition + refute_includes TermSearch2.search_client.fetch_all_fields.map{|f| f["name"]}, "private_b" + refute_includes TermSearch2.search_client.fetch_all_fields.map{|f| f["name"]}, "private_b" + + + indexed_term = TermSearch2.search("id:#{term.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + indexed_term2 = TermSearch3.search("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + term.indexable_object.each do |k, v| + assert_equal v, indexed_term[k.to_s] + end + + term2.indexable_object.each do |k, v| + assert_equal v, indexed_term2[k.to_s] + end + + term2.definition = "new definition of test2" + term2.synonym = ["new synonym1", "new synonym2"] + term2.save + + indexed_term2 = TermSearch3.search("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + term2.indexable_object.each do |k, v| + assert_equal v, indexed_term2[k.to_s] + end + + term2.delete + term.delete + + indexed_term = TermSearch2.submit_search_query("id:#{term.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + indexed_term2 = TermSearch3.submit_search_query("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + assert_nil indexed_term + assert_nil indexed_term2 + + end end end From 819086d426cd89bcaba92b5c5ce7998e6466fe3b Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Sat, 27 Apr 2024 15:14:55 +0200 Subject: [PATCH 061/106] make indexed resource_id case insensitive (#59) --- lib/goo/search/solr/solr_schema_generator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index bc1e4693..232dfdc6 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -240,7 +240,7 @@ def init_fields_types def init_fields [ #{ name: "_version_", type: "plong", indexed: true, stored: true, multiValued: false }, - { name: "resource_id", type: "string", indexed: true, multiValued: false, required: true, stored: true }, + { name: "resource_id", type: "text_general", indexed: true, multiValued: false, required: true, stored: true }, { name: "resource_model", type: "string", indexed: true, multiValued: false, required: true, stored: false }, { name: "_text_", type: "text_general", indexed: true, multiValued: true, stored: false }, ] From 8f0a9a5bddba03d9d660a363c4e6618da479db9f Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 1 May 2024 12:38:10 +0200 Subject: [PATCH 062/106] Fix: Invalidating cache after insertion of a new element (#60) * create a test to reproduce the cache invalidate on insert bug * use again insert_data instead of execute_append_request because the first invalidate the cache * update sparql client to version 3.2.0 * handle the case virtuoso insert data bug * use development branch of sparql-client --- Gemfile | 2 +- Gemfile.lock | 35 ++++++++++++++--------------- lib/goo.rb | 40 +++++++++++++++++---------------- lib/goo/base/resource.rb | 3 +-- lib/goo/config/config.rb | 12 +++++----- lib/goo/sparql/query_builder.rb | 4 +++- test/test_cache.rb | 15 ++++++++++++- 7 files changed, 64 insertions(+), 47 deletions(-) diff --git a/Gemfile b/Gemfile index af13989b..4d5a2b5b 100644 --- a/Gemfile +++ b/Gemfile @@ -22,5 +22,5 @@ group :profiling do gem "thin" end -gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' +gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'development' gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index affe4c00..dba79705 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,12 +1,11 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: 180c818f7715baac64b2699bb452ef5c756f62c5 - branch: master + revision: c96da3ad479724a31ccd6217ab9939dddfaca40e + branch: development specs: - sparql-client (1.0.1) - json_pure (>= 1.4) - net-http-persistent (= 2.9.4) - rdf (>= 1.0) + sparql-client (3.2.2) + net-http-persistent (~> 4.0, >= 4.0.2) + rdf (~> 3.2, >= 3.2.11) PATH remote: . @@ -57,25 +56,25 @@ GEM domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json_pure (2.7.1) link_header (0.0.8) macaddr (1.7.2) systemu (~> 2.6.5) - method_source (1.0.0) + method_source (1.1.0) mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2024.0206) + mime-types-data (3.2024.0305) minitest (4.7.5) multi_json (1.15.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) - net-http-persistent (2.9.4) + net-http-persistent (4.0.2) + connection_pool (~> 2.2) netrc (0.11.0) pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.4) - rack (2.2.8.1) + public_suffix (5.0.5) + rack (2.2.9) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) @@ -83,7 +82,7 @@ GEM rack-protection (3.2.0) base64 (>= 0.1.0) rack (~> 2.2, >= 2.2.4) - rake (13.1.0) + rake (13.2.1) rdf (3.2.11) link_header (~> 0.0, >= 0.0.8) rdf-raptor (3.2.0) @@ -99,9 +98,9 @@ GEM rdf-xsd (3.2.1) rdf (~> 3.2) rexml (~> 3.2) - redis (5.1.0) - redis-client (>= 0.17.0) - redis-client (0.20.0) + redis (5.2.0) + redis-client (>= 0.22.0) + redis-client (0.22.1) connection_pool request_store (1.6.0) rack (>= 1.4) @@ -111,7 +110,7 @@ GEM mime-types (>= 1.16, < 4.0) netrc (~> 0.8) rexml (3.2.6) - rsolr (2.5.0) + rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) ruby2_keywords (0.0.5) @@ -163,4 +162,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.2.33 + 2.4.22 diff --git a/lib/goo.rb b/lib/goo.rb index adf73d3a..a1399a84 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -117,24 +117,26 @@ def self.add_sparql_backend(name, *opts) opts = opts[0] @@sparql_backends = @@sparql_backends.dup @@sparql_backends[name] = opts - @@sparql_backends[name][:query]=Goo::SPARQL::Client.new(opts[:query], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) - @@sparql_backends[name][:update]=Goo::SPARQL::Client.new(opts[:update], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) - @@sparql_backends[name][:data]=Goo::SPARQL::Client.new(opts[:data], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) + @@sparql_backends[name][:query] = Goo::SPARQL::Client.new(opts[:query], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client) + @@sparql_backends[name][:update] = Goo::SPARQL::Client.new(opts[:update], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client, + cube_options: @@cube_options) + @@sparql_backends[name][:data] = Goo::SPARQL::Client.new(opts[:data], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client, + cube_options: @@cube_options) @@sparql_backends[name][:backend_name] = opts[:backend_name] @@sparql_backends.freeze end @@ -255,7 +257,7 @@ def self.configure yield self configure_sanity_check - init_search_connections + init_search_connections @@namespaces.freeze @@sparql_backends.freeze diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index bd4ff741..5c829d8e 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -293,8 +293,7 @@ def save(*opts) batch_file.write(lines.join("")) batch_file.flush() else - data = graph_insert.to_a.reduce("") { |acc, x| acc << x.to_s + " " } - Goo.sparql_data_client.execute_append_request(graph, data, "application/x-turtle") + Goo.sparql_update_client.insert_data(graph_insert, graph: graph, use_insert_data: !Goo.backend_vo?) end rescue Exception => e raise e diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index 4c51a223..2019893c 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -68,11 +68,13 @@ def self.test_reset if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? raise Exception, "only for testing" end - @@sparql_backends[:main][:query] = Goo::SPARQL::Client.new("http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 300, - redis_cache: @@redis_client }) + @@sparql_backends = {} + Goo.add_sparql_backend(:main, + backend_name: @settings.goo_backend_name, + query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", + data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", + update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", + options: { rules: :NONE }) end - end diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 674ae4ec..d41ced10 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -51,7 +51,9 @@ def build_query(ids, variables, graphs, patterns) @query.filter(filter) end - @query.union(*@unions) unless @unions.empty? + Array(@unions).each do |union| + @query.union(*union) + end ids_filter(ids) if ids diff --git a/test/test_cache.rb b/test/test_cache.rb index f5659ca4..11dccf41 100644 --- a/test/test_cache.rb +++ b/test/test_cache.rb @@ -26,6 +26,19 @@ def self.after_suite GooTestData.delete_test_case_data end + def test_cache_invalidate + address = Address.all.first + Goo.use_cache = true + puts "save 1" + University.new(name: 'test', address: [address]).save + u2 = University.new(name: 'test', address: [address]) + puts "request 1" + refute u2.valid? + expected_error = { :name => { :duplicate => "There is already a persistent resource with id `http://goo.org/default/university/test`" } } + assert_equal expected_error, u2.errors + Goo.use_cache = false + end + def test_cache_models redis = Goo.redis_client redis.flushdb @@ -49,7 +62,7 @@ def test_cache_models assert !key.nil? assert redis.exists(key) - + prg = programs.first prg.bring_remaining prg.credits = 999 From b2a635fb1e8206e6e3010be4dbe033b47eb58481 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 1 May 2024 18:50:41 +0200 Subject: [PATCH 063/106] fix search resource_id case insensitive by using string_ci instead --- lib/goo/search/solr/solr_schema_generator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index 232dfdc6..ba23e70b 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -240,7 +240,7 @@ def init_fields_types def init_fields [ #{ name: "_version_", type: "plong", indexed: true, stored: true, multiValued: false }, - { name: "resource_id", type: "text_general", indexed: true, multiValued: false, required: true, stored: true }, + { name: "resource_id", type: "string_ci", indexed: true, multiValued: false, required: true, stored: true }, { name: "resource_model", type: "string", indexed: true, multiValued: false, required: true, stored: false }, { name: "_text_", type: "text_general", indexed: true, multiValued: true, stored: false }, ] From c48a1e4dfe82a2e5c42614a30380f3bdb2044ba9 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 23 May 2024 00:58:09 +0200 Subject: [PATCH 064/106] Merge to master: Release 2.4.0 - Multi-backend stores integrations, RDF 3.0 and SOLR API (#58) * Feature: Add Virtuso, Allegrograph and Graphdb integration to GOO (#48) * simplify the test configuration init * add docker based tests rake task to run test against 4s, ag, gb, vo * remove faraday gem usage * update test CI to test against all the supported backends with diffirent slice sizes * add high level helper to to know which backend we are currently using * extract sparql processor module from where module * handle language_match? value to upcase by default * add support for virtuoso and graphdb sparql client * replace delete sparql query by delete graph in the model complex test * add some new edge cases tests t o test_where.rb and test_schemaless * make test_chunks_write.rb tests support multiple backends * replace native insert_data with execute_append_request in model save * remove add_rules as it seems to no more be used * move expand_equivalent_predicates from loader to builder module * build two diffirent queries depending on which backend used * update mapper to handle the two different queries depending on the backend used * simplify the loader code, by removing inferable variables * refactor and simplify map_attributes method * fix test chunks write concenrency issues * Refactor: clean model settings module code (#52) * remove old file no more used * extract attribute settings module from the model settings module * remove the inmutable feature as deprecated and not used * rename callbacks method names * Feature: Add after_save and after_destroy hooks to models (#53) * remove old file no more used * extract attribute settings module from the model settings module * remove the inmutable feature as deprecated and not used * rename callbacks method names * add hooks module * Feature: update rdf gem to latest version (#56) * un pin rdf version, to use the latest and add rdf vocab and xml * update URI class monkey patch because Addressable does no more exist * RDF::SKOS is replaced with RDF::Vocab::SKOS in the latest version of RDF * pin rdf version to 3.2.11 the latest version that support ruby 2.7 * monkey path Literal::DateTime format to be supported by 4store * remove addressable dependency * Fix: saving a model removing unmodified attributes after consecutive save * Fix: enforce to use str() when doing a filter with a string value (#57) * enforce to use str() when doing a filter with a string * update agraph version to 8.1.0 * Fix: monkey path RDF to not remove xsd:string by default * Feature: Enhance SOLR integration and add a Schema API (#54) * add an abstraction to SOLR integeration and add Schema API * add SOLR Schema API tests * update SOLR backend configuration and init * use the new Solr connector in the model search interface * update search test to cover the new automatic indexing and unindexing * handle the solr container initialization when running docker for tests * add omit_norms options for SolrSchemaGenerator * fix solr schema initial dynamic fields declaration and replace the usage of mapping-ISOLatin1Accent * delay the schema generation to after model declarations or in demand * add solr edismax fitlers tests * fix indexBatch and unindexBatch tests * add security checks to the index and unindex functions * change dynamic fields names to have less code migration * update clear_all_schema to remove all copy and normal fields * add an option to force solr initialization if wanted * handle indexing embed objects of a model * add index update option * fix clear all schema to just remove all the fields and recreate them * add index_enabled? helper for models * perform a status test when initializing the solr connector * extract init_search_connection function from init_search_connections * fix typo in indexOptimize call * add solr search using HTTP post instead of GET for large queries * make indexed resource_id case insensitive (#59) * Fix: Invalidating cache after insertion of a new element (#60) * create a test to reproduce the cache invalidate on insert bug * use again insert_data instead of execute_append_request because the first invalidate the cache * update sparql client to version 3.2.0 * handle the case virtuoso insert data bug * use development branch of sparql-client * fix search resource_id case insensitive by using string_ci instead --- .github/workflows/ruby-unit-test.yml | 15 +- .gitignore | 2 + .ruby-version | 1 + Gemfile | 4 +- Gemfile.lock | 126 ++++--- Rakefile | 7 - config/config.rb.sample | 23 ++ docker-compose.yml | 96 +++++- goo.gemspec | 7 +- lib/goo.rb | 125 +++++-- lib/goo/base/attribute_proxy.rb | 57 ---- lib/goo/base/resource.rb | 46 +-- lib/goo/base/settings/attribute.rb | 239 +++++++++++++ lib/goo/base/settings/hooks.rb | 62 ++++ lib/goo/base/settings/settings.rb | 315 ++---------------- lib/goo/base/where.rb | 119 +------ lib/goo/config/config.rb | 80 +++++ lib/goo/mixins/sparql_client.rb | 102 ++---- lib/goo/search/search.rb | 195 +++++++---- lib/goo/search/solr/solr_admin.rb | 79 +++++ lib/goo/search/solr/solr_connector.rb | 41 +++ lib/goo/search/solr/solr_query.rb | 108 ++++++ lib/goo/search/solr/solr_schema.rb | 184 ++++++++++ lib/goo/search/solr/solr_schema_generator.rb | 279 ++++++++++++++++ lib/goo/sparql/client.rb | 27 +- lib/goo/sparql/loader.rb | 68 ++-- lib/goo/sparql/mixins/query_pattern.rb | 3 - lib/goo/sparql/mixins/solution_lang_filter.rb | 2 +- lib/goo/sparql/processor.rb | 137 ++++++++ lib/goo/sparql/query_builder.rb | 107 +++--- lib/goo/sparql/solutions_mapper.rb | 116 ++++--- lib/goo/sparql/sparql.rb | 1 + lib/goo/sparql/triples.rb | 2 + lib/goo/utils/callbacks_utils.rb | 22 ++ lib/goo/validators/enforce.rb | 8 +- rakelib/docker_based_test.rake | 121 +++++++ test/app/bioportal.rb | 166 +++++---- test/app/models.rb | 2 - test/app/test_app.rb | 2 - test/console.rb | 1 - test/data/graphdb-repo-config.ttl | 33 ++ test/data/graphdb-test-load.nt | 0 test/data/virtuoso.init | 240 +++++++++++++ test/settings/test_hooks.rb | 50 +++ test/solr/test_solr.rb | 122 +++++++ test/test_basic_persistence.rb | 2 - test/test_cache.rb | 33 +- test/test_case.rb | 33 +- test/test_chunks_write.rb | 82 ++--- test/test_collections.rb | 2 - test/test_dsl_settings.rb | 3 +- test/test_enum.rb | 2 - test/test_index.rb | 2 - test/test_inmutable.rb | 102 ------ test/test_inverse.rb | 2 - test/test_model_complex.rb | 34 +- test/test_name_with.rb | 2 - test/test_namespaces.rb | 2 - test/test_read_only.rb | 2 - test/test_schemaless.rb | 21 +- test/test_search.rb | 251 +++++++++++++- test/test_validators.rb | 1 - test/test_where.rb | 82 +++-- 63 files changed, 2959 insertions(+), 1241 deletions(-) create mode 100644 .ruby-version create mode 100644 config/config.rb.sample delete mode 100644 lib/goo/base/attribute_proxy.rb create mode 100644 lib/goo/base/settings/attribute.rb create mode 100644 lib/goo/base/settings/hooks.rb create mode 100644 lib/goo/config/config.rb create mode 100644 lib/goo/search/solr/solr_admin.rb create mode 100644 lib/goo/search/solr/solr_connector.rb create mode 100644 lib/goo/search/solr/solr_query.rb create mode 100644 lib/goo/search/solr/solr_schema.rb create mode 100644 lib/goo/search/solr/solr_schema_generator.rb create mode 100644 lib/goo/sparql/processor.rb create mode 100644 lib/goo/utils/callbacks_utils.rb create mode 100644 rakelib/docker_based_test.rake create mode 100644 test/data/graphdb-repo-config.ttl create mode 100644 test/data/graphdb-test-load.nt create mode 100644 test/data/virtuoso.init create mode 100644 test/settings/test_hooks.rb create mode 100644 test/solr/test_solr.rb delete mode 100644 test/test_inmutable.rb diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml index ccb161f3..ac40314b 100644 --- a/.github/workflows/ruby-unit-test.yml +++ b/.github/workflows/ruby-unit-test.yml @@ -12,21 +12,26 @@ jobs: strategy: fail-fast: false matrix: - ruby-version: ['2.7'] + goo-slice: [ '20', '100', '500' ] + ruby-version: [ '2.7', '3.0' ] + triplestore: [ 'fs', 'ag', 'vo', 'gb' ] steps: - uses: actions/checkout@v3 - name: Install Dependencies - run: sudo apt-get -y install raptor2-utils + run: sudo apt-get update && sudo apt-get -y install raptor2-utils - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true # runs 'bundle install' and caches installed gems automatically - - name: Start backend services via docker-compose - run: docker compose up -d + - name: Add config file + # tempoaray workaround for the config.rb file requirement + run: echo 'Goo.config do |config| end' > config/config.rb + - name: List directory contents + run: ls -R ./test/data - name: Run tests - run: bundle exec rake test + run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/.gitignore b/.gitignore index f887556d..5dcefa7c 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ doc/ .idea/* projectFilesBackup/* + +config/config.rb \ No newline at end of file diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..6a81b4c8 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +2.7.8 diff --git a/Gemfile b/Gemfile index 3564fe3b..4d5a2b5b 100644 --- a/Gemfile +++ b/Gemfile @@ -4,7 +4,6 @@ gemspec gem "activesupport" gem "cube-ruby", require: "cube" -gem "faraday", '~> 1.9' gem "rake" gem "uuid" gem "request_store" @@ -23,4 +22,5 @@ group :profiling do gem "thin" end -gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' +gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'development' +gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index 9fe7bd02..dba79705 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,12 +1,11 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 - branch: master + revision: c96da3ad479724a31ccd6217ab9939dddfaca40e + branch: development specs: - sparql-client (1.0.1) - json_pure (>= 1.4) - net-http-persistent (= 2.9.4) - rdf (>= 1.0) + sparql-client (3.2.2) + net-http-persistent (~> 4.0, >= 4.0.2) + rdf (~> 3.2, >= 3.2.11) PATH remote: . @@ -14,7 +13,10 @@ PATH goo (0.0.2) addressable (~> 2.8) pry - rdf (= 1.0.8) + rdf (= 3.2.11) + rdf-raptor + rdf-rdfxml + rdf-vocab redis rest-client rsolr @@ -30,87 +32,85 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.1) + addressable (2.8.6) public_suffix (>= 2.0.2, < 6.0) + base64 (0.2.0) builder (3.2.4) coderay (1.1.3) - concurrent-ruby (1.2.2) - connection_pool (2.3.0) + concurrent-ruby (1.2.3) + connection_pool (2.4.1) cube-ruby (0.0.3) daemons (1.4.1) docile (1.4.0) - domain_name (0.5.20190701) - unf (>= 0.0.5, < 1.0.0) + domain_name (0.6.20240107) eventmachine (1.2.7) - faraday (1.10.3) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0) - faraday-multipart (~> 1.0) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.0) - faraday-patron (~> 1.0) - faraday-rack (~> 1.0) - faraday-retry (~> 1.0) + faraday (2.7.11) + base64 + faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-multipart (1.0.4) - multipart-post (~> 2) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - faraday-rack (1.0.0) - faraday-retry (1.0.3) + faraday-net_http (3.0.2) + ffi (1.16.3) + htmlentities (4.3.4) http-accept (1.7.0) http-cookie (1.0.5) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json_pure (2.6.3) + link_header (0.0.8) macaddr (1.7.2) systemu (~> 2.6.5) - method_source (1.0.0) - mime-types (3.4.1) + method_source (1.1.0) + mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2023.0218.1) + mime-types-data (3.2024.0305) minitest (4.7.5) multi_json (1.15.0) - multipart-post (2.3.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) - net-http-persistent (2.9.4) + net-http-persistent (4.0.2) + connection_pool (~> 2.2) netrc (0.11.0) pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.1) - rack (2.2.6.3) + public_suffix (5.0.5) + rack (2.2.9) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) activesupport (>= 2.3) - rack-protection (3.0.5) - rack - rake (13.0.6) - rdf (1.0.8) - addressable (>= 2.2) - redis (5.0.6) - redis-client (>= 0.9.0) - redis-client (0.13.0) + rack-protection (3.2.0) + base64 (>= 0.1.0) + rack (~> 2.2, >= 2.2.4) + rake (13.2.1) + rdf (3.2.11) + link_header (~> 0.0, >= 0.0.8) + rdf-raptor (3.2.0) + ffi (~> 1.15) + rdf (~> 3.2) + rdf-rdfxml (3.2.2) + builder (~> 3.2) + htmlentities (~> 4.3) + rdf (~> 3.2) + rdf-xsd (~> 3.2) + rdf-vocab (3.2.7) + rdf (~> 3.2, >= 3.2.4) + rdf-xsd (3.2.1) + rdf (~> 3.2) + rexml (~> 3.2) + redis (5.2.0) + redis-client (>= 0.22.0) + redis-client (0.22.1) connection_pool - request_store (1.5.1) + request_store (1.6.0) rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rexml (3.2.5) - rsolr (2.5.0) + rexml (3.2.6) + rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) ruby2_keywords (0.0.5) @@ -123,34 +123,30 @@ GEM simplecov (~> 0.19) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - sinatra (3.0.5) + sinatra (3.2.0) mustermann (~> 3.0) rack (~> 2.2, >= 2.2.4) - rack-protection (= 3.0.5) + rack-protection (= 3.2.0) tilt (~> 2.0) systemu (2.6.5) - thin (1.8.1) + thin (1.8.2) daemons (~> 1.0, >= 1.0.9) eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thread_safe (0.3.6) - tilt (2.1.0) - tzinfo (0.3.61) - unf (0.1.4) - unf_ext - unf_ext (0.0.8.2) + tilt (2.3.0) + tzinfo (0.3.62) uuid (2.3.9) macaddr (~> 1.0) PLATFORMS - ruby - x86_64-darwin-16 + x86_64-darwin-23 x86_64-linux DEPENDENCIES activesupport cube-ruby - faraday (~> 1.9) + faraday (= 2.7.11) goo! minitest (< 5.0) pry @@ -166,4 +162,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.3.22 + 2.4.22 diff --git a/Rakefile b/Rakefile index e593ddce..80c18410 100644 --- a/Rakefile +++ b/Rakefile @@ -50,12 +50,6 @@ Rake::TestTask.new do |t| t.warning = false end -Rake::TestTask.new do |t| - t.name = "test:inmutable" - t.test_files = FileList['test/test_inmutable.rb'] - t.warning = false -end - Rake::TestTask.new do |t| t.name = "test:inverse" t.test_files = FileList['test/test_inverse.rb'] @@ -107,7 +101,6 @@ end desc "Console for working with data" task :console do require_relative "test/test_case" - GooTest.configure_goo binding.pry end diff --git a/config/config.rb.sample b/config/config.rb.sample new file mode 100644 index 00000000..12abdccb --- /dev/null +++ b/config/config.rb.sample @@ -0,0 +1,23 @@ +Goo.config do |config| + # 4store + config.goo_backend_name = '4store' + config.goo_port = 8080 + config.goo_host = 'localhost' + config.goo_path_query = '/sparql/' + config.goo_path_data = '/data/' + config.goo_path_update = '/update/' + + # AllegroGraph + # config.goo_backend_name = 'AG' + # config.goo_port = 10035 + # config.goo_host = 'localhost' + # config.goo_path_query = "/repositories/ontoportal" + # config.goo_path_data = "/repositories/ontoportal/statements/" + # config.goo_path_update = "/repositories/ontoportal/statements/" + + config.search_server_url = 'http://localhost:8983/solr/term_search_core1' + config.redis_host = 'localhost' + config.redis_port = 6379 + config.bioportal_namespace = 'http://data.bioontology.org/' + config.queries_debug = false +end \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index fe29bc33..463a1b92 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,16 +1,5 @@ -version: '3' - services: - 4store: - image: bde2020/4store - ports: - - 9000:9000 - command: > - bash -c "4s-backend-setup --segments 4 ontoportal_kb - && 4s-backend ontoportal_kb - && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" - - redis: + redis-ut: image: redis ports: - 6379:6379 @@ -20,7 +9,86 @@ services: timeout: 3s retries: 30 - solr: - image: ontoportal/solr-ut:0.1 + solr-ut: + image: solr:8.11.2 ports: - 8983:8983 + command: bin/solr start -cloud -f + + agraph-ut: + image: franzinc/agraph:v8.1.0 + platform: linux/amd64 + environment: + - AGRAPH_SUPER_USER=test + - AGRAPH_SUPER_PASSWORD=xyzzy + shm_size: 1g + ports: + # - 10035:10035 + - 10000-10035:10000-10035 + volumes: + - agdata:/agraph/data + # - ./agraph/etc:/agraph/etc + command: > + bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start + ; agtool repos create ontoportal_test --supersede + ; agtool users add anonymous + ; agtool users grant anonymous root:ontoportal_test:rw + ; tail -f /agraph/data/agraph.log" + # healthcheck: + # test: ["CMD-SHELL", "curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1"] + # start_period: 10s + # interval: 10s + # timeout: 5s + # retries: 5 + profiles: + - ag + + 4store-ut: + image: bde2020/4store + platform: linux/amd64 + ports: + - 9000:9000 + command: > + bash -c "4s-backend-setup --segments 4 ontoportal_kb + && 4s-backend ontoportal_kb + && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" + profiles: + - fs + virtuoso-ut: + image: tenforce/virtuoso:virtuoso7.2.5 + platform: linux/amd64 + environment: + - SPARQL_UPDATE=true + ports: + - 1111:1111 + - 8890:8890 + + profiles: + - vo + + graphdb: + image: ontotext/graphdb:10.3.3 + platform: linux/amd64 + privileged: true + environment: + GDB_HEAP_SIZE: 5G + GDB_JAVA_OPTS: >- + -Xms5g -Xmx5g + ports: + - 7200:7200 + - 7300:7300 + volumes: + - ./test/data/graphdb-repo-config.ttl:/opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl + - ./test/data/graphdb-test-load.nt:/opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt + + entrypoint: > + bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt ; graphdb -Ddefault.min.distinct.threshold=3000 " + profiles: + - gb + +volumes: + agdata: + + + + diff --git a/goo.gemspec b/goo.gemspec index c3386799..b7175779 100644 --- a/goo.gemspec +++ b/goo.gemspec @@ -6,9 +6,12 @@ Gem::Specification.new do |s| s.email = "manuelso@stanford.edu" s.files = Dir["lib/**/*.rb"] s.homepage = "http://github.com/ncbo/goo" - s.add_dependency("addressable", "~> 2.8") + s.add_dependency("addressable", "~> 2.8") s.add_dependency("pry") - s.add_dependency("rdf", "= 1.0.8") + s.add_dependency("rdf", "3.2.11") #unpin when we support only Ruby >= 3.0 + s.add_dependency("rdf-vocab") + s.add_dependency("rdf-rdfxml") + s.add_dependency("rdf-raptor") s.add_dependency("redis") s.add_dependency("rest-client") s.add_dependency("rsolr") diff --git a/lib/goo.rb b/lib/goo.rb index ff0e6279..a1399a84 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -1,6 +1,8 @@ require "pry" require "rdf" +require "rdf/vocab" require "rdf/ntriples" +require "rdf/rdfxml" require "sparql/client" require "set" @@ -12,6 +14,7 @@ require 'uuid' require "cube" +require_relative "goo/config/config" require_relative "goo/sparql/sparql" require_relative "goo/search/search" require_relative "goo/base/base" @@ -39,6 +42,7 @@ module Goo @@model_by_name = {} @@search_backends = {} @@search_connection = {} + @@search_collections = {} @@default_namespace = nil @@id_prefix = nil @@redis_client = nil @@ -52,6 +56,31 @@ module Goo @@slice_loading_size = 500 + + def self.log_debug_file(str) + debug_file = "./queries.txt" + File.write(debug_file, str.to_s + "\n", mode: 'a') + end + + + + def backend_4s? + sparql_backend_name.downcase.eql?("4store") + end + + def backend_ag? + sparql_backend_name.downcase.eql?("allegrograph") + end + + def backend_gb? + sparql_backend_name.downcase.eql?("graphdb") + end + + def backend_vo? + sparql_backend_name.downcase.eql?("virtuoso") + end + + def self.main_languages @@main_languages end @@ -73,7 +102,7 @@ def self.language_includes(lang) end def self.add_namespace(shortcut, namespace,default=false) - if !(namespace.instance_of? RDF::Vocabulary) + unless namespace.instance_of? RDF::Vocabulary raise ArgumentError, "Namespace must be a RDF::Vocabulary object" end @@namespaces[shortcut.to_sym] = namespace @@ -88,38 +117,30 @@ def self.add_sparql_backend(name, *opts) opts = opts[0] @@sparql_backends = @@sparql_backends.dup @@sparql_backends[name] = opts - @@sparql_backends[name][:query]=Goo::SPARQL::Client.new(opts[:query], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) - @@sparql_backends[name][:update]=Goo::SPARQL::Client.new(opts[:update], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) - @@sparql_backends[name][:data]=Goo::SPARQL::Client.new(opts[:data], - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 10000, - validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options}) + @@sparql_backends[name][:query] = Goo::SPARQL::Client.new(opts[:query], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client) + @@sparql_backends[name][:update] = Goo::SPARQL::Client.new(opts[:update], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client, + cube_options: @@cube_options) + @@sparql_backends[name][:data] = Goo::SPARQL::Client.new(opts[:data], + protocol: "1.1", + headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, + read_timeout: 10000, + validate: false, + redis_cache: @@redis_client, + cube_options: @@cube_options) @@sparql_backends[name][:backend_name] = opts[:backend_name] @@sparql_backends.freeze end - def self.test_reset - if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? - raise Exception, "only for testing" - end - @@sparql_backends[:main][:query]=Goo::SPARQL::Client.new("http://localhost:9000/sparql/", - {protocol: "1.1", "Content-Type" => "application/x-www-form-urlencoded", - read_timeout: 300, - redis_cache: @@redis_client }) - end - def self.main_lang @@main_lang end @@ -234,11 +255,9 @@ def self.configure raise ArgumentError, "Configuration needs to receive a code block" end yield self - configure_sanity_check() + configure_sanity_check - if @@search_backends.length > 0 - @@search_backends.each { |name, val| @@search_connection[name] = RSolr.connect(url: search_conf(name), timeout: 1800, open_timeout: 1800) } - end + init_search_connections @@namespaces.freeze @@sparql_backends.freeze @@ -262,8 +281,44 @@ def self.search_conf(name=:main) return @@search_backends[name][:service] end - def self.search_connection(name=:main) - return @@search_connection[name] + def self.search_connection(collection_name) + return search_client(collection_name).solr + end + + def self.search_client(collection_name) + @@search_connection[collection_name] + end + + def self.add_search_connection(collection_name, search_backend = :main, &block) + @@search_collections[collection_name] = { + search_backend: search_backend, + block: block_given? ? block : nil + } + end + + def self.search_connections + @@search_connection + end + + def self.init_search_connection(collection_name, search_backend = :main, block = nil, force: false) + return @@search_connection[collection_name] if @@search_connection[collection_name] && !force + + @@search_connection[collection_name] = SOLR::SolrConnector.new(search_conf(search_backend), collection_name) + if block + block.call(@@search_connection[collection_name].schema_generator) + @@search_connection[collection_name].enable_custom_schema + end + @@search_connection[collection_name].init(force) + @@search_connection[collection_name] + end + + + def self.init_search_connections(force=false) + @@search_collections.each do |collection_name, backend| + search_backend = backend[:search_backend] + block = backend[:block] + init_search_connection(collection_name, search_backend, block, force: force) + end end def self.sparql_query_client(name=:main) diff --git a/lib/goo/base/attribute_proxy.rb b/lib/goo/base/attribute_proxy.rb deleted file mode 100644 index 5600256e..00000000 --- a/lib/goo/base/attribute_proxy.rb +++ /dev/null @@ -1,57 +0,0 @@ - -module Goo - module Base - - class AttributeValueProxy - def initialize(validator,internals) - @validator = validator - @internals = internals - end - - def cardinality_transform(attr, value, current_value) - if @validator.nil? - unless value.kind_of? Array - raise ArgumentError, "Attribute '#{attr} must be an array. No cardinality configured.'" - end - return value - end - if value.kind_of? Array - if @validator.options[:max] and value.length > @validator.options[:max] - #TODO review this - return value[0] if attr == :prefLabel - raise ArgumentError, "Attribute '#{attr}' does not satisfy max cardinality." - end - if @validator.options[:min] and value.length < @validator.options[:min] - raise ArgumentError, "Attribute '#{attr}' does not satisfy min cardinality." - end - if @validator.options[:max] and @validator.options[:max] == 1 - return value[0] - end - else #not an array - if (not @validator.options[:max]) or @validator.options[:max] > 1 - return [value] - end - if @validator.options[:max] and @validator.options[:max] == 1 - return value - end - if @validator.options[:min] and @validator.options[:min] > 0 - return [value] - end - end - if not value.kind_of? Array and current_value.kind_of? Array - raise ArgumentError, - "Multiple value objects cannot be replaced for non-array objects" - end - if value.kind_of? Array then value else [value] end - end - - def call(*args) - options = args[0] - value = options[:value] - attr = options[:attr] - current_value = options[:current_value] - tvalue = cardinality_transform(attr,value,current_value) - end - end - end -end diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 2eaf17ad..5c829d8e 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -168,7 +168,11 @@ def delete(*args) end @persistent = false @modified = true - self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances + + if self.class.after_destroy? + self.class.call_after_destroy(self) + end + return nil end @@ -259,7 +263,7 @@ def save(*opts) #call update callback before saving if callbacks - self.class.attributes_with_update_callbacks.each do |attr| + self.class.attributes_with_callbacks.each do |attr| Goo::Validators::Enforce.enforce_callbacks(self, attr) end end @@ -289,7 +293,7 @@ def save(*opts) batch_file.write(lines.join("")) batch_file.flush() else - Goo.sparql_update_client.insert_data(graph_insert, graph: graph) + Goo.sparql_update_client.insert_data(graph_insert, graph: graph, use_insert_data: !Goo.backend_vo?) end rescue Exception => e raise e @@ -301,7 +305,11 @@ def save(*opts) @modified_attributes = Set.new @persistent = true - self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances + + if self.class.after_save? + self.class.call_after_save(self) + end + return self end @@ -377,25 +385,26 @@ def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false next if inst.class.collection?(attr) #collection is already there next unless inst.respond_to?(attr) attr_uri = klass.attribute_uri(attr,inst.collection).to_s - if unmapped_string_keys.include?(attr_uri.to_s) || - (equivalent_predicates && equivalent_predicates.include?(attr_uri)) - if !unmapped_string_keys.include?(attr_uri) - object = Array(equivalent_predicates[attr_uri].map { |eq_attr| unmapped_string_keys[eq_attr] }).flatten.compact - if include_languages && [RDF::URI, Hash].all?{|c| object.map(&:class).include?(c)} - object = object.reduce({}) do |all, new_v| - new_v = { none: [new_v] } if new_v.is_a?(RDF::URI) - all.merge(new_v) {|_, a, b| a + b } + if unmapped_string_keys.include?(attr_uri.to_s) || equivalent_predicates&.include?(attr_uri) + object = nil + + if unmapped_string_keys.include?(attr_uri) + object = unmapped_string_keys[attr_uri] + else + equivalent_predicates[attr_uri].each do |eq_attr| + next if unmapped_string_keys[eq_attr].nil? + + if object.nil? + object = unmapped_string_keys[eq_attr].dup + elsif object.is_a?(Array) + object.concat(unmapped_string_keys[eq_attr]) end - elsif include_languages - object = object.first end if object.nil? inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) next end - else - object = unmapped_string_keys[attr_uri] end if object.is_a?(Hash) @@ -425,11 +434,6 @@ def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false def self.find(id, *options) id = RDF::URI.new(id) if !id.instance_of?(RDF::URI) && self.name_with == :id id = id_from_unique_attribute(name_with(),id) unless id.instance_of?(RDF::URI) - if self.inmutable? && self.inm_instances && self.inm_instances[id] - w = Goo::Base::Where.new(self) - w.instance_variable_set("@result", [self.inm_instances[id]]) - return w - end options_load = { ids: [id], klass: self }.merge(options[-1] || {}) options_load[:find] = true where = Goo::Base::Where.new(self) diff --git a/lib/goo/base/settings/attribute.rb b/lib/goo/base/settings/attribute.rb new file mode 100644 index 00000000..dbf52b78 --- /dev/null +++ b/lib/goo/base/settings/attribute.rb @@ -0,0 +1,239 @@ +module Goo + module Base + module Settings + module AttributeSettings + + def attribute(*args) + options = args.reverse + attr_name = options.pop + attr_name = attr_name.to_sym + options = options.pop + options = {} if options.nil? + + options[:enforce] ||= [] + + set_data_type(options) + set_no_list_by_default(options) + + @model_settings[:attributes][attr_name] = options + load_yaml_scheme_options(attr_name) + shape_attribute(attr_name) + namespace = attribute_namespace(attr_name) || @model_settings[:namespace] + vocab = Goo.vocabulary(namespace) + if options[:property].is_a?(Proc) + @attribute_uris[attr_name] = options[:property] + else + @attribute_uris[attr_name] = vocab[options[:property] || attr_name] + end + if options[:enforce].include?(:unique) && options[:enforce].include?(:list) + raise ArgumentError, ":list options cannot be combined with :list" + end + set_range(attr_name) + end + + def shape_attribute(attr) + return if attr == :resource_id + + attr = attr.to_sym + define_method("#{attr}=") do |*args| + if self.class.handler?(attr) + raise ArgumentError, "Method based attributes cannot be set" + end + if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) + raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." + end + @loaded_attributes.add(attr) + value = args[0] + unless args.last.instance_of?(Hash) and args.last[:on_load] + if self.persistent? and self.class.name_with == attr + raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." + end + prev = self.instance_variable_get("@#{attr}") + if !prev.nil? and !@modified_attributes.include?(attr) + if prev != value + @previous_values = @previous_values || {} + @previous_values[attr] = prev + end + end + @modified_attributes.add(attr) + end + if value.instance_of?(Array) + value = value.dup.freeze + end + self.instance_variable_set("@#{attr}", value) + end + define_method("#{attr}") do |*args| + attr_value = self.instance_variable_get("@#{attr}") + + if self.class.not_show_all_languages?(attr_value, args) + is_array = attr_value.values.first.is_a?(Array) + attr_value = attr_value.values.flatten + attr_value = attr_value.first unless is_array + end + + if self.class.handler?(attr) + if @loaded_attributes.include?(attr) + return attr_value + end + value = self.send("#{self.class.handler(attr)}") + self.instance_variable_set("@#{attr}", value) + @loaded_attributes << attr + return value + end + + if (not @persistent) or @loaded_attributes.include?(attr) + return attr_value + else + # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) + raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." + end + end + end + + def attributes(*options) + if options and options.length > 0 + option = options.first + + if option == :all + return @model_settings[:attributes].keys + end + + if option == :inverse + return @model_settings[:attributes].select { |_, v| v[:inverse] }.keys + end + + attrs = @model_settings[:attributes].select { |_, opts| opts[:enforce].include?(option) }.keys + + attrs.concat(attributes(:inverse)) if option == :list + + return attrs + end + + @model_settings[:attributes].select { |k, attr| attr[:inverse].nil? && !handler?(k) }.keys + + end + + def attributes_with_defaults + @model_settings[:attributes].select { |_, opts| opts[:default] }.keys + end + + def attribute_namespace(attr) + attribute_settings(attr)[:namespace] + end + + def default(attr) + attribute_settings(attr)[:default] + end + + def range(attr) + @model_settings[:range][attr] + end + + def attribute_settings(attr) + @model_settings[:attributes][attr] + end + + def required?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:existence) + end + + def unique?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:unique) + end + + def datatype(attr) + enforced = attribute_settings(attr)[:enforce].dup + return :string if enforced.nil? + + enforced.delete(:list) + enforced.delete(:no_list) + + enforced.find { |e| Goo::Validators::DataType.ids.include?(e) } || :string + end + + def list?(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:enforce].include?(:list) + end + + def transitive?(attr) + return false unless @model_settings[:attributes].include?(attr) + attribute_settings(attr)[:transitive] == true + end + + def alias?(attr) + return false unless @model_settings[:attributes].include?(attr) + attribute_settings(attr)[:alias] == true + end + + def handler?(attr) + return false if attribute_settings(attr).nil? + !attribute_settings(attr)[:handler].nil? + end + + def handler(attr) + return false if attribute_settings(attr).nil? + attribute_settings(attr)[:handler] + end + + def inverse?(attr) + return false if attribute_settings(attr).nil? + !attribute_settings(attr)[:inverse].nil? + end + + def inverse_opts(attr) + attribute_settings(attr)[:inverse] + end + + def attribute_uri(attr, *args) + if attr == :id + raise ArgumentError, ":id cannot be treated as predicate for .where, use find " + end + uri = @attribute_uris[attr] + if uri.is_a?(Proc) + uri = uri.call(*args.flatten) + end + return uri unless uri.nil? + attr_string = attr.to_s + Goo.namespaces.keys.each do |ns| + nss = ns.to_s + if attr_string.start_with?(nss) + return Goo.vocabulary(ns)[attr_string[nss.length + 1..-1]] + end + end + + Goo.vocabulary(nil)[attr] + end + + + def indexable?(attr) + setting = attribute_settings(attr.to_sym) + setting && (setting[:index].nil? || setting[:index] == true) + end + + def fuzzy_searchable?(attr) + attribute_settings(attr)[:fuzzy_search] == true + end + + + private + + def set_no_list_by_default(options) + if options[:enforce].nil? or !options[:enforce].include?(:list) + options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] + end + end + + def set_data_type(options) + if options[:type] + options[:enforce] += Array(options[:type]) + options[:enforce].uniq! + options.delete :type + end + end + end + end + end +end diff --git a/lib/goo/base/settings/hooks.rb b/lib/goo/base/settings/hooks.rb new file mode 100644 index 00000000..7925b2a0 --- /dev/null +++ b/lib/goo/base/settings/hooks.rb @@ -0,0 +1,62 @@ +require 'yaml' +require_relative '../../utils/callbacks_utils' + +module Goo + module Base + module Settings + module Hooks + + include CallbackRunner + + def after_save(*methods) + @model_settings[:after_save] ||= [] + @model_settings[:after_save].push(*methods) + end + + def after_destroy(*methods) + @model_settings[:after_destroy] ||= [] + @model_settings[:after_destroy].push(*methods) + end + + def after_save_callbacks + Array(@model_settings[:after_save]) + end + + def after_destroy_callbacks + Array(@model_settings[:after_destroy]) + end + + def after_save? + !after_save_callbacks.empty? + end + + def after_destroy? + !after_destroy_callbacks.empty? + end + + def call_after_save(inst) + run_callbacks(inst, after_save_callbacks) + end + + def call_after_destroy(inst) + run_callbacks(inst, after_destroy_callbacks) + end + + def attributes_with_callbacks + (@model_settings[:attributes]. + select{ |attr,opts| opts[:onUpdate] }).keys + end + + + def attribute_callbacks(attr) + @model_settings[:attributes][attr][:onUpdate] + end + + end + end + end +end + + + + diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index a7008087..bf2c38da 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -1,5 +1,7 @@ require 'active_support/core_ext/string' require_relative 'yaml_settings' +require_relative 'hooks' +require_relative 'attribute' module Goo module Base @@ -12,11 +14,12 @@ module ClassMethods attr_accessor :model_settings attr_reader :model_name attr_reader :attribute_uris + attr_reader :namespace - include YAMLScheme + include YAMLScheme ,AttributeSettings, Hooks def default_model_options - {} + {name_with: lambda {|x| uuid_uri_generator(x)}} end def model(*args) @@ -28,12 +31,8 @@ def model(*args) model_name = args[0] @model_name = model_name.to_sym - #a hash with options is expected + # a hash with options is expected options = args.last - @inmutable = (args.include? :inmutable) - if @inmutable - @inm_instances = nil - end @model_settings = default_model_options.merge(options || {}) @@ -42,7 +41,8 @@ def model(*args) unless options.include? :name_with raise ArgumentError, "The model `#{model_name}` definition should include the :name_with option" end - Goo.add_model(@model_name,self) + + Goo.add_model(@model_name, self) @attribute_uris = {} @namespace = Goo.vocabulary(@model_settings[:namespace]) @uri_type = @namespace[@model_name.to_s.camelize] @@ -50,8 +50,8 @@ def model(*args) @model_settings[:attributes] = {} @model_settings[:rdf_type] = options[:rdf_type] - #registering a new models forces to redo ranges - Goo.models.each do |k,m| + # registering a new models forces to redo ranges + Goo.models.each do |k, m| m.attributes(:all).each do |attr| next if m.range(attr) m.set_range(attr) @@ -59,247 +59,29 @@ def model(*args) end end - def attributes(*options) - if options and options.length > 0 - filt = options.first - if filt == :all - return @model_settings[:attributes].keys - end - if filt == :inverse - return @model_settings[:attributes].keys. - select{ |k| @model_settings[:attributes][k][:inverse] } - end - atts = (@model_settings[:attributes]. - select{ |attr,opts| opts[:enforce].include?(filt) }).keys() - atts.concat(attributes(:inverse)) if filt == :list - return atts - end - return @model_settings[:attributes].keys. - select{ |k| @model_settings[:attributes][k][:inverse].nil? }. - select{ |k| !handler?(k) } - end - - def inmutable? - return @inmutable - end - - def collection?(attr) - return @model_settings[:collection] == attr - end - - def collection_opts - return @model_settings[:collection] - end - - def attributes_with_defaults - return (@model_settings[:attributes]. - select{ |attr,opts| opts[:default] }).keys() - end - - def attributes_with_update_callbacks - (@model_settings[:attributes]. - select{ |attr,opts| opts[:onUpdate] }).keys - end - - - def update_callbacks(attr) - @model_settings[:attributes][attr][:onUpdate] - end - - def default(attr) - return @model_settings[:attributes][attr][:default] - end - - def attribute_namespace(attr) - return @model_settings[:attributes][attr][:namespace] - end - - def range(attr) - @model_settings[:range][attr] - end - - def attribute_settings(attr) - @model_settings[:attributes][attr] - end - - def cardinality(attr) - return nil if @model_settings[:attributes][attr].nil? - cardinality = {} - enforce = @model_settings[:attributes][attr][:enforce] - min = enforce.map {|e| e.to_s.split("_").last.to_i if e.to_s.start_with?("min_") }.compact - max = enforce.map {|e| e.to_s.split("_").last.to_i if e.to_s.start_with?("max_") }.compact - cardinality[:min] = min.first unless min.empty? - cardinality[:max] = max.first unless max.empty? - cardinality.empty? ? nil : cardinality - end - - def required?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:existence) - end - - def unique?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:unique) - end - - def list?(attr) - return false if @model_settings[:attributes][attr].nil? - @model_settings[:attributes][attr][:enforce].include?(:list) - end - - def transitive?(attr) - return false if !@model_settings[:attributes].include?(attr) - return (@model_settings[:attributes][attr][:transitive] == true) - end - - def alias?(attr) - return false if !@model_settings[:attributes].include?(attr) - return (@model_settings[:attributes][attr][:alias] == true) - end - - def handler?(attr) - return false if @model_settings[:attributes][attr].nil? - return (!@model_settings[:attributes][attr][:handler].nil?) - end - - def handler(attr) - return false if @model_settings[:attributes][attr].nil? - return @model_settings[:attributes][attr][:handler] - end - - def inverse?(attr) - return false if @model_settings[:attributes][attr].nil? - return (!@model_settings[:attributes][attr][:inverse].nil?) - end - - def inverse_opts(attr) - return @model_settings[:attributes][attr][:inverse] - end - def set_range(attr) - @model_settings[:attributes][attr][:enforce].each do |opt| + attribute_settings(attr)[:enforce].each do |opt| if Goo.models.include?(opt) || opt.respond_to?(:model_name) || (opt.respond_to?(:new) && opt.new.kind_of?(Struct)) opt = Goo.models[opt] if opt.instance_of?(Symbol) - @model_settings[:range][attr]=opt + @model_settings[:range][attr] = opt break end end - if @model_settings[:attributes][attr][:inverse] - on = @model_settings[:attributes][attr][:inverse][:on] + if attribute_settings(attr)[:inverse] + on = attribute_settings(attr)[:inverse][:on] if Goo.models.include?(on) || on.respond_to?(:model_name) on = Goo.models[on] if on.instance_of?(Symbol) - @model_settings[:range][attr]=on + @model_settings[:range][attr] = on end end end - def attribute(*args) - options = args.reverse - attr_name = options.pop - attr_name = attr_name.to_sym - options = options.pop - options = {} if options.nil? - - options[:enforce] ||= [] - - set_data_type(options) - set_no_list_by_default(options) - - @model_settings[:attributes][attr_name] = options - load_yaml_scheme_options(attr_name) - shape_attribute(attr_name) - namespace = attribute_namespace(attr_name) - namespace = namespace || @model_settings[:namespace] - vocab = Goo.vocabulary(namespace) #returns default for nil input - if options[:property].is_a?(Proc) - @attribute_uris[attr_name] = options[:property] - else - @attribute_uris[attr_name] = vocab[options[:property] || attr_name] - end - if options[:enforce].include?(:unique) and options[:enforce].include?(:list) - raise ArgumentError, ":list options cannot be combined with :list" - end - set_range(attr_name) - end - - def attribute_uri(attr,*args) - if attr == :id - raise ArgumentError, ":id cannot be treated as predicate for .where, use find " - end - uri = @attribute_uris[attr] - if uri.is_a?(Proc) - uri = uri.call(*args.flatten) - end - return uri unless uri.nil? - attr_string = attr.to_s - Goo.namespaces.keys.each do |ns| - nss = ns.to_s - if attr_string.start_with?(nss) - return Goo.vocabulary(ns)[attr_string[nss.length+1..-1]] - end - end - #default - return Goo.vocabulary(nil)[attr] + def collection?(attr) + @model_settings[:collection] == attr end - def shape_attribute(attr) - return if attr == :resource_id - attr = attr.to_sym - define_method("#{attr}=") do |*args| - if self.class.handler?(attr) - raise ArgumentError, "Method based attributes cannot be set" - end - if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) - raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." - end - @loaded_attributes.add(attr) - value = args[0] - unless args.last.instance_of?(Hash) and args.last[:on_load] - if self.persistent? and self.class.name_with == attr - raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." - end - prev = self.instance_variable_get("@#{attr}") - if !prev.nil? and !@modified_attributes.include?(attr) - if prev != value - @previous_values = @previous_values || {} - @previous_values[attr] = prev - end - end - @modified_attributes.add(attr) - end - if value.instance_of?(Array) - value = value.dup.freeze - end - self.instance_variable_set("@#{attr}",value) - end - define_method("#{attr}") do |*args| - attr_value = self.instance_variable_get("@#{attr}") - - if self.class.not_show_all_languages?(attr_value, args) - is_array = attr_value.values.first.is_a?(Array) - attr_value = attr_value.values.flatten - attr_value = attr_value.first unless is_array - end - - - if self.class.handler?(attr) - if @loaded_attributes.include?(attr) - return attr_value - end - value = self.send("#{self.class.handler(attr)}") - self.instance_variable_set("@#{attr}",value) - @loaded_attributes << attr - return value - end - - if (not @persistent) or @loaded_attributes.include?(attr) - return attr_value - else - # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) - raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." - end - end + def collection_opts + @model_settings[:collection] end def uuid_uri_generator(inst) @@ -308,19 +90,14 @@ def uuid_uri_generator(inst) if Goo.id_prefix return RDF::URI.new(Goo.id_prefix + model_name_uri + '/' + Goo.uuid) end - return namespace[ model_name_uri + '/' + Goo.uuid] + namespace[model_name_uri + '/' + Goo.uuid] end def uri_type(*args) - if @model_settings[:rdf_type] - return @model_settings[:rdf_type].call(*args) - end - return @uri_type + @model_settings[:rdf_type] ? @model_settings[:rdf_type].call(*args) : @uri_type end + alias :type_uri :uri_type - def namespace - return @namespace - end def id_prefix model_name_uri = model_name.to_s @@ -328,16 +105,15 @@ def id_prefix if Goo.id_prefix return RDF::URI.new(Goo.id_prefix + model_name_uri + '/') end - return namespace[model_name_uri + '/'] + namespace[model_name_uri + '/'] end - def id_from_unique_attribute(attr,value_attr) + def id_from_unique_attribute(attr, value_attr) if value_attr.nil? raise Goo::Base::IDGenerationError, "`#{attr}` value is nil. Id for resource cannot be generated." end uri_last_fragment = CGI.escape(value_attr) - model_prefix_uri = id_prefix() - return model_prefix_uri + uri_last_fragment + id_prefix + uri_last_fragment end def enum(*values) @@ -348,25 +124,11 @@ def enum(*values) end def name_with - return @model_settings[:name_with] - end - - def load_inmutable_instances - #TODO this should be SYNC - @inm_instances = nil - ins = self.where.include(self.attributes).all - @inm_instances = {} - ins.each do |ins| - @inm_instances[ins.id] = ins - end + @model_settings[:name_with] end def attribute_loaded?(attr) - return @loaded_attributes.include?(attr) - end - - def inm_instances - @inm_instances + @loaded_attributes.include?(attr) end def struct_object(attrs) @@ -377,12 +139,12 @@ def struct_object(attrs) attrs << :unmapped attrs << collection_opts if collection_opts attrs.uniq! - return Struct.new(*attrs) + Struct.new(*attrs) end STRUCT_CACHE = {} ## - # Return a struct-based, + # Return a struct-based, # read-only instance for a class that is populated with the contents of `attributes` def read_only(attributes) if !attributes.is_a?(Hash) || attributes.empty? @@ -396,11 +158,10 @@ def read_only(attributes) cls = STRUCT_CACHE[attributes.keys.hash] instance = cls.new instance.klass = self - attributes.each {|k,v| instance[k] = v} + attributes.each { |k, v| instance[k] = v } instance end - def show_all_languages?(args) args.first.is_a?(Hash) && args.first.keys.include?(:include_languages) && args.first[:include_languages] end @@ -408,21 +169,7 @@ def show_all_languages?(args) def not_show_all_languages?(values, args) values.is_a?(Hash) && !show_all_languages?(args) end - - private - - def set_no_list_by_default(options) - if options[:enforce].nil? or !options[:enforce].include?(:list) - options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] - end - end - def set_data_type(options) - if options[:type] - options[:enforce] += Array(options[:type]) - options[:enforce].uniq! - options.delete :type - end - end + end end end diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 81cd26ce..d4668e4c 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -6,6 +6,7 @@ class Where AGGREGATE_PATTERN = Struct.new(:pattern,:aggregate) attr_accessor :where_options_load + include Goo::SPARQL::Processor def initialize(klass,*match_patterns) if Goo.queries_debug? && Thread.current[:ncbo_debug].nil? @@ -122,118 +123,7 @@ def unmmaped_predicates() end def process_query(count=false) - if Goo.queries_debug? && Thread.current[:ncbo_debug] - tstart = Time.now - query_resp = process_query_intl(count=count) - (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - tstart) - return query_resp - end - return process_query_intl(count=count) - end - - def process_query_intl(count=false) - if @models == [] - @result = [] - return @result - end - - @include << @include_embed if @include_embed.length > 0 - - @predicates = unmmaped_predicates() - @equivalent_predicates = retrieve_equivalent_predicates() - - options_load = { models: @models, include: @include, ids: @ids, - graph_match: @pattern, klass: @klass, - filters: @filters, order_by: @order_by , - read_only: @read_only, rules: @rules, - predicates: @predicates, - no_graphs: @no_graphs, - equivalent_predicates: @equivalent_predicates } - - options_load.merge!(@where_options_load) if @where_options_load - if !@klass.collection_opts.nil? and !options_load.include?(:collection) - raise ArgumentError, "Collection needed call `#{@klass.name}`" - end - - ids = nil - if @index_key - raise ArgumentError, "Redis is not configured" unless Goo.redis_client - rclient = Goo.redis_client - cache_key = cache_key_for_index(@index_key) - raise ArgumentError, "Index not found" unless rclient.exists(cache_key) - if @page_i - if !@count - @count = rclient.llen(cache_key) - end - rstart = (@page_i -1) * @page_size - rstop = (rstart + @page_size) -1 - ids = rclient.lrange(cache_key,rstart,rstop) - else - ids = rclient.lrange(cache_key,0,-1) - end - ids = ids.map { |i| RDF::URI.new(i) } - end - - if @page_i && !@index_key - page_options = options_load.dup - page_options.delete(:include) - page_options[:include_pagination] = @include - if not @pre_count.nil? - @count = @pre_count - else - if !@count && @do_count - page_options[:count] = :count - @count = Goo::SPARQL::Queries.model_load(page_options).to_i - end - end - page_options.delete :count - page_options[:query_options] = @query_options - page_options[:page] = { page_i: @page_i, page_size: @page_size } - models_by_id = Goo::SPARQL::Queries.model_load(page_options) - options_load[:models] = models_by_id.values - - #models give the constraint - options_load.delete :graph_match - elsif count - count_options = options_load.dup - count_options.delete(:include) - count_options[:count] = :count - return Goo::SPARQL::Queries.model_load(count_options).to_i - end - - if @indexing - #do not care about include values - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - return @result - end - - options_load[:ids] = ids if ids - models_by_id = {} - if @page_i && (options_load[:models].length > 0) - options_load.delete(:filters) - options_load.delete(:order_by) - end - - if (@page_i && options_load[:models].length > 0) || - (!@page_i && (@count.nil? || @count > 0)) - models_by_id = Goo::SPARQL::Queries.model_load(options_load) - if @aggregate - if models_by_id.length > 0 - options_load_agg = { models: models_by_id.values, klass: @klass, - filters: @filters, read_only: @read_only, - aggregate: @aggregate, rules: @rules } - - options_load_agg.merge!(@where_options_load) if @where_options_load - Goo::SPARQL::Queries.model_load(options_load_agg) - end - end - end - unless @page_i - @result = @models ? @models : models_by_id.values - else - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - end - @result + process_query_call(count = count) end def disable_rules @@ -279,11 +169,6 @@ def index_as(index_key,max=nil) end def all - if @result.nil? && @klass.inmutable? && @klass.inm_instances - if @pattern.nil? && @filters.nil? - @result = @klass.inm_instances.values - end - end process_query unless @result @result end diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb new file mode 100644 index 00000000..2019893c --- /dev/null +++ b/lib/goo/config/config.rb @@ -0,0 +1,80 @@ +require 'ostruct' + +module Goo + extend self + attr_reader :settings + + @settings = OpenStruct.new + @settings_run = false + + def config(&block) + return if @settings_run + @settings_run = true + + yield @settings if block_given? + + # Set defaults + @settings.goo_backend_name ||= ENV['GOO_BACKEND_NAME'] || '4store' + @settings.goo_port ||= ENV['GOO_PORT'] || 9000 + @settings.goo_host ||= ENV['GOO_HOST'] || 'localhost' + @settings.goo_path_query ||= ENV['GOO_PATH_QUERY'] || '/sparql/' + @settings.goo_path_data ||= ENV['GOO_PATH_DATA'] || '/data/' + @settings.goo_path_update ||= ENV['GOO_PATH_UPDATE'] || '/update/' + @settings.search_server_url ||= ENV['SEARCH_SERVER_URL'] || 'http://localhost:8983/solr' + @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' + @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 + @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' + @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false + @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 + puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" + puts "(GOO) >> Using term search server at #{@settings.search_server_url}" + puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" + + connect_goo + end + + def connect_goo + begin + Goo.configure do |conf| + conf.queries_debug(@settings.queries_debug) + conf.add_sparql_backend(:main, + backend_name: @settings.goo_backend_name, + query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", + data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", + update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", + options: { rules: :NONE }) + conf.add_search_backend(:main, service: @settings.search_server_url) + conf.add_redis_backend(host: @settings.goo_redis_host, port: @settings.goo_redis_port) + + conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) + conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) + conf.add_namespace(:owl, RDF::Vocabulary.new("http://www.w3.org/2002/07/owl#")) + conf.add_namespace(:rdfs, RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")) + conf.add_namespace(:goo, RDF::Vocabulary.new("http://goo.org/default/"), default = true) + conf.add_namespace(:metadata, RDF::Vocabulary.new("http://goo.org/metadata/")) + conf.add_namespace(:foaf, RDF::Vocabulary.new("http://xmlns.com/foaf/0.1/")) + conf.add_namespace(:rdf, RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#")) + conf.add_namespace(:tiger, RDF::Vocabulary.new("http://www.census.gov/tiger/2002/vocab#")) + conf.add_namespace(:nemo, RDF::Vocabulary.new("http://purl.bioontology.org/NEMO/ontology/NEMO_annotation_properties.owl#")) + conf.add_namespace(:bioportal, RDF::Vocabulary.new(@settings.bioportal_namespace)) + conf.use_cache = false + end + rescue StandardError => e + abort("EXITING: Goo cannot connect to triplestore and/or search server:\n #{e}\n#{e.backtrace.join("\n")}") + end + end + + def self.test_reset + if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? + raise Exception, "only for testing" + end + @@sparql_backends = {} + Goo.add_sparql_backend(:main, + backend_name: @settings.goo_backend_name, + query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", + data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", + update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", + options: { rules: :NONE }) + end + +end diff --git a/lib/goo/mixins/sparql_client.rb b/lib/goo/mixins/sparql_client.rb index fcfb7888..d4d98523 100644 --- a/lib/goo/mixins/sparql_client.rb +++ b/lib/goo/mixins/sparql_client.rb @@ -6,91 +6,51 @@ def to_uri module RDF def self.URI(*args, &block) - return args.first - end - - class Writer - def validate? - false - end - end - - class Literal - def to_base - text = [] - text << %("#{escape(value)}") - text << "@#{language}" if has_language? - if has_datatype? - if datatype.respond_to?:to_base - text << "^^#{datatype.to_base}" - else - text << "^^<#{datatype.to_s}>" - end - end - text.join "" - end + return RDF::URI.new(*args) end class URI - def initialize(uri_or_options) - case uri_or_options - when Hash - @uri = Addressable::URI.new(uri_or_options) - when Addressable::URI - @uri = uri_or_options - else - @uri = uri_or_options.to_s - #@uri = Addressable::URI.parse(uri_or_options.to_s) - end - rescue Addressable::URI::InvalidURIError => e - raise ArgumentError, e.message - end - - def method_missing(symbol, *args, &block) - unless @uri.respond_to?(symbol) - if (Addressable::URI.instance_methods.include?(symbol) && @uri.instance_of?(String)) - @uri = Addressable::URI.parse(@uri) - end - end - if @uri.respond_to?(symbol) - case result = @uri.send(symbol, *args, &block) - when Addressable::URI - self.class.new(result) - else result - end + # Delegate any undefined method calls to the String object + def method_missing(method, *args, &block) + if self.to_s.respond_to?(method) + self.to_s.send(method, *args, &block) else super end end - def last_part - f = fragment - return f if f - return to_s.split("/")[-1] + # Ensure respond_to? reflects the delegated methods + def respond_to_missing?(method, include_private = false) + self.to_s.respond_to?(method) || super end - def respond_to?(symbol,include_private = false) - @uri.respond_to?(symbol,include_private=false) || super - end + end - def hash - @uri.to_s.hash + class Writer + def validate? + false end - - end #end URI + end class Literal - @@subclasses_by_uri = {} - def self.datatyped_class(uri) - return nil if uri.nil? - if @@subclasses.length != (@@subclasses_by_uri.length + 1) - @@subclasses.each do |child| - if child.const_defined?(:DATATYPE) - @@subclasses_by_uri[child.const_get(:DATATYPE).to_s] = child - end - end - end - return @@subclasses_by_uri[uri] + class DateTime < Temporal + FORMAT = '%Y-%m-%dT%H:%M:%S'.freeze # the format that is supported by 4store + end + + def initialize(value, language: nil, datatype: nil, lexical: nil, validate: false, canonicalize: false, **options) + @object = value.freeze + @string = lexical if lexical + @string = value if !defined?(@string) && value.is_a?(String) + @string = @string.encode(Encoding::UTF_8).freeze if instance_variable_defined?(:@string) + @object = @string if instance_variable_defined?(:@string) && @object.is_a?(String) + @language = language.to_s.downcase.to_sym if language + @datatype = RDF::URI(datatype).freeze if datatype + @datatype ||= self.class.const_get(:DATATYPE) if self.class.const_defined?(:DATATYPE) + @datatype ||= instance_variable_defined?(:@language) && @language ? RDF.langString : RDF::URI("http://www.w3.org/2001/XMLSchema#string") + @original_datatype = datatype end + + attr_reader :original_datatype end + end #end RDF diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 1dc72ea9..b0cccfce 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -1,4 +1,5 @@ require 'rsolr' +require_relative 'solr/solr_connector' module Goo @@ -8,102 +9,184 @@ def self.included(base) base.extend(ClassMethods) end - def index(connection_name=:main) + def index(connection_name = nil, to_set = nil) raise ArgumentError, "ID must be set to be able to index" if @id.nil? - doc = indexable_object - Goo.search_connection(connection_name).add(doc) + document = indexable_object(to_set) + + return if document.blank? || document[:id].blank? + + connection_name ||= self.class.search_collection_name + unindex(connection_name) + self.class.search_client(connection_name).index_document(document) end - def index_update(to_set, connection_name=:main) + def index_update(attributes_to_update, connection_name = nil, to_set = nil) raise ArgumentError, "ID must be set to be able to index" if @id.nil? - raise ArgumentError, "Field names to be updated in index must be provided" if to_set.nil? + raise ArgumentError, "Field names to be updated in index must be provided" if attributes_to_update.blank? + + old_doc = self.class.search("id:\"#{index_id}\"").dig("response", "docs")&.first + + raise ArgumentError, "ID must be set to be able to index" if old_doc.blank? + doc = indexable_object(to_set) - doc.each { |key, val| - next if key === :id - doc[key] = {set: val} - } + doc.each do |key, val| + next unless attributes_to_update.any? { |attr| key.to_s.eql?(attr.to_s) || key.to_s.include?("#{attr}_") } + old_doc[key] = val + end + + connection_name ||= self.class.search_collection_name + unindex(connection_name) - Goo.search_connection(connection_name).update( - data: "[#{doc.to_json}]", - headers: { 'Content-Type' => 'application/json' } - ) + old_doc.reject! { |k, v| k.to_s.end_with?('_sort') || k.to_s.end_with?('_sorts') } + old_doc.delete("_version_") + self.class.search_client(connection_name).index_document(old_doc) end - def unindex(connection_name=:main) - id = index_id - Goo.search_connection(connection_name).delete_by_id(id) + def unindex(connection_name = nil) + connection_name ||= self.class.search_collection_name + self.class.search_client(connection_name).delete_by_id(index_id) end # default implementation, should be overridden by child class - def index_id() + def index_id raise ArgumentError, "ID must be set to be able to index" if @id.nil? @id.to_s end # default implementation, should be overridden by child class - def index_doc(to_set=nil) + def index_doc(to_set = nil) raise NoMethodError, "You must define method index_doc in your class for it to be indexable" end - def indexable_object(to_set=nil) - doc = index_doc(to_set) - # use resource_id for the actual term id because :id is a Solr reserved field - doc[:resource_id] = doc[:id].to_s - doc[:id] = index_id.to_s - doc + def embedded_doc + raise NoMethodError, "You must define method embedded_doc in your class for it to be indexable" end + def indexable_object(to_set = nil) + begin + document = index_doc(to_set) + rescue NoMethodError + document = self.to_hash.reject { |k, _| !self.class.indexable?(k) } + document.transform_values! do |v| + is_array = v.is_a?(Array) + v = Array(v).map do |x| + if x.is_a?(Goo::Base::Resource) + x.embedded_doc rescue x.id.to_s + else + if x.is_a?(RDF::URI) + x.to_s + else + x.respond_to?(:object) ? x.object : x + end + end + end + is_array ? v : v.first + end + end + + document = document.reduce({}) do |h, (k, v)| + if v.is_a?(Hash) + v.each { |k2, v2| h["#{k}_#{k2}".to_sym] = v2 } + else + h[k] = v + end + h + end + + model_name = self.class.model_name.to_s.downcase + document.delete(:id) + document.delete("id") + + document.transform_keys! do |k| + self.class.index_document_attr(k) + end + + document[:resource_id] = self.id.to_s + document[:resource_model] = model_name + document[:id] = index_id.to_s + document + end module ClassMethods - def search(q, params={}, connection_name=:main) - params["q"] = q - Goo.search_connection(connection_name).post('select', :data => params) + def index_enabled? + !@model_settings[:search_collection].nil? end - def indexBatch(collection, connection_name=:main) - docs = Array.new - collection.each do |c| - docs << c.indexable_object + def enable_indexing(collection_name, search_backend = :main, &block) + @model_settings[:search_collection] = collection_name + + if block_given? + # optional block to generate custom schema + Goo.add_search_connection(collection_name, search_backend, &block) + else + Goo.add_search_connection(collection_name, search_backend) end - Goo.search_connection(connection_name).add(docs) + + after_save :index + after_destroy :unindex end - def unindexBatch(collection, connection_name=:main) - docs = Array.new - collection.each do |c| - docs << c.index_id - end - Goo.search_connection(connection_name).delete_by_id(docs) + def search_collection_name + @model_settings[:search_collection] + end + + def search_client(connection_name = search_collection_name) + Goo.search_client(connection_name) + end + + def custom_schema?(connection_name = search_collection_name) + search_client(connection_name)&.custom_schema? + end + + def schema_generator + Goo.search_client(search_collection_name).schema_generator + end + + def index_document_attr(key) + return key.to_s if custom_schema? || self.attribute_settings(key).nil? + + type = self.datatype(key) + is_list = self.list?(key) + fuzzy = self.fuzzy_searchable?(key) + + SOLR::SolrConnector.index_document_attr(key, type, is_list, fuzzy) + end + + def search(q, params = {}, connection_name = search_collection_name) + search_client(connection_name).search(q, params) + end + + def submit_search_query(query, params = {}, connection_name = search_collection_name) + search_client(connection_name).submit_search_query(query, params) + end + + def indexBatch(collection, connection_name = search_collection_name) + docs = collection.map(&:indexable_object) + search_client(connection_name).index_document(docs) end - def unindexByQuery(query, connection_name=:main) - Goo.search_connection(connection_name).delete_by_query(query) + def unindexBatch(collection, connection_name = search_collection_name) + docs = collection.map(&:index_id) + search_client(connection_name).delete_by_id(docs) end - # Get the doc that will be indexed in solr - def get_indexable_object() - # To make the code less readable the guys that wrote it managed to hide the real function called by this line - # It is "get_index_doc" in ontologies_linked_data Class.rb - doc = self.class.model_settings[:search_options][:document].call(self) - doc[:resource_id] = doc[:id].to_s - doc[:id] = get_index_id.to_s - # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 - doc + def unindexByQuery(query, connection_name = search_collection_name) + search_client(connection_name).delete_by_query(query) end - def indexCommit(attrs=nil, connection_name=:main) - Goo.search_connection(connection_name).commit(:commit_attributes => attrs || {}) + def indexCommit(attrs = nil, connection_name = search_collection_name) + search_client(connection_name).index_commit(attrs) end - def indexOptimize(attrs=nil, connection_name=:main) - Goo.search_connection(connection_name).optimize(:optimize_attributes => attrs || {}) + def indexOptimize(attrs = nil, connection_name = search_collection_name) + search_client(connection_name).index_optimize(attrs) end - def indexClear(connection_name=:main) - # WARNING: this deletes ALL data from the index - unindexByQuery("*:*", connection_name) + # WARNING: this deletes ALL data from the index + def indexClear(connection_name = search_collection_name) + search_client(connection_name).clear_all_data end end end diff --git a/lib/goo/search/solr/solr_admin.rb b/lib/goo/search/solr/solr_admin.rb new file mode 100644 index 00000000..4d20271b --- /dev/null +++ b/lib/goo/search/solr/solr_admin.rb @@ -0,0 +1,79 @@ +module SOLR + module Administration + + def admin_url + "#{@solr_url}/admin" + end + + def solr_alive? + collections_url = URI.parse("#{admin_url}/collections?action=CLUSTERSTATUS") + http = Net::HTTP.new(collections_url.host, collections_url.port) + request = Net::HTTP::Get.new(collections_url.request_uri) + + begin + response = http.request(request) + return response.code.eql?("200") && JSON.parse(response.body).dig("responseHeader", "status").eql?(0) + rescue StandardError => e + return false + end + end + + def fetch_all_collections + collections_url = URI.parse("#{admin_url}/collections?action=LIST") + + http = Net::HTTP.new(collections_url.host, collections_url.port) + request = Net::HTTP::Get.new(collections_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to fetch collections. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to fetch collections. #{e.message}" + end + + collections = [] + if response.is_a?(Net::HTTPSuccess) + collections = JSON.parse(response.body)['collections'] + end + + collections + end + + def create_collection(name = @collection_name, num_shards = 1, replication_factor = 1) + return if collection_exists?(name) + create_collection_url = URI.parse("#{admin_url}/collections?action=CREATE&name=#{name}&numShards=#{num_shards}&replicationFactor=#{replication_factor}") + + http = Net::HTTP.new(create_collection_url.host, create_collection_url.port) + request = Net::HTTP::Post.new(create_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to create collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to create collection. #{e.message}" + end + end + + def delete_collection(collection_name = @collection_name) + return unless collection_exists?(collection_name) + + delete_collection_url = URI.parse("#{admin_url}/collections?action=DELETE&name=#{collection_name}") + + http = Net::HTTP.new(delete_collection_url.host, delete_collection_url.port) + request = Net::HTTP::Post.new(delete_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to delete collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to delete collection. #{e.message}" + end + + end + + def collection_exists?(collection_name) + fetch_all_collections.include?(collection_name.to_s) + end + end +end + diff --git a/lib/goo/search/solr/solr_connector.rb b/lib/goo/search/solr/solr_connector.rb new file mode 100644 index 00000000..e367f5cd --- /dev/null +++ b/lib/goo/search/solr/solr_connector.rb @@ -0,0 +1,41 @@ +require 'rsolr' +require_relative 'solr_schema_generator' +require_relative 'solr_schema' +require_relative 'solr_admin' +require_relative 'solr_query' + +module SOLR + + class SolrConnector + include Schema, Administration, Query + attr_reader :solr + + def initialize(solr_url, collection_name) + @solr_url = solr_url + @collection_name = collection_name + @solr = RSolr.connect(url: collection_url) + + # Perform a status test and wait up to 30 seconds before raising an error + wait_time = 0 + max_wait_time = 30 + until solr_alive? || wait_time >= max_wait_time + sleep 1 + wait_time += 1 + end + raise "Solr instance not reachable within #{max_wait_time} seconds" unless solr_alive? + + + @custom_schema = false + end + + def init(force = false) + return if collection_exists?(@collection_name) && !force + + create_collection + + init_schema + end + + end +end + diff --git a/lib/goo/search/solr/solr_query.rb b/lib/goo/search/solr/solr_query.rb new file mode 100644 index 00000000..ed194950 --- /dev/null +++ b/lib/goo/search/solr/solr_query.rb @@ -0,0 +1,108 @@ +module SOLR + module Query + + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + def index_document_attr(key, type, is_list, fuzzy_search) + dynamic_field(type: type, is_list: is_list, is_fuzzy_search: fuzzy_search).gsub('*', key.to_s) + end + + private + + def dynamic_field(type:, is_list:, is_fuzzy_search: false) + return is_list ? '*_texts' : '*_text' if is_fuzzy_search + + dynamic_type = case type + when :uri, :string, nil + '*_t' + when :integer + '*_i' + when :boolean + '*_b' + when :date_time + '*_dt' + when :float + '*_f' + else + # Handle unknown data types or raise an error based on your specific requirements + raise ArgumentError, "Unsupported ORM data type: #{type}" + end + + if is_list + dynamic_type = dynamic_type.eql?('*_t') ? "*_txt" : "#{dynamic_type}s" + end + + dynamic_type + end + end + + def clear_all_data + delete_by_query('*:*') + end + + def collection_url + "#{@solr_url}/#{@collection_name}" + end + + def index_commit(attrs = nil) + @solr.commit(:commit_attributes => attrs || {}) + end + + def index_optimize(attrs = nil) + @solr.optimize(:optimize_attributes => attrs || {}) + end + + def index_document(document, commit: true) + @solr.add(document) + @solr.commit if commit + end + + def index_document_attr(key, type, is_list, fuzzy_search) + self.class.index_document_attr(key, type, is_list, fuzzy_search) + end + + + + def delete_by_id(document_id, commit: true) + return if document_id.nil? + + @solr.delete_by_id(document_id) + @solr.commit if commit + end + + def delete_by_query(query) + @solr.delete_by_query(query) + @solr.commit + end + + def search(query, params = {}) + params[:q] = query + @solr.get('select', params: params) + end + + def submit_search_query(query, params = {}) + uri = ::URI.parse("#{collection_url}/select") + + http = Net::HTTP.new(uri.host, uri.port) + request = Net::HTTP::Post.new(uri.request_uri) + + params[:q] = query + request.set_form_data(params) + + response = http.request(request) + + if response.is_a?(Net::HTTPSuccess) + JSON.parse(response.body) + else + puts "Error: #{response.code} - #{response.message}" + nil + end + end + + + end +end + diff --git a/lib/goo/search/solr/solr_schema.rb b/lib/goo/search/solr/solr_schema.rb new file mode 100644 index 00000000..8c38fd2f --- /dev/null +++ b/lib/goo/search/solr/solr_schema.rb @@ -0,0 +1,184 @@ +module SOLR + module Schema + + def fetch_schema + uri = URI.parse("#{@solr_url}/#{@collection_name}/schema") + http = Net::HTTP.new(uri.host, uri.port) + + request = Net::HTTP::Get.new(uri.path, 'Content-Type' => 'application/json') + response = http.request(request) + + if response.code.to_i == 200 + @schema = JSON.parse(response.body)["schema"] + else + raise StandardError, "Failed to upload schema. HTTP #{response.code}: #{response.body}" + end + end + + def schema + @schema ||= fetch_schema + end + + def all_fields + schema["fields"] + end + + def all_copy_fields + schema["copyFields"] + end + + def all_dynamic_fields + schema["dynamicFields"] + end + + def all_fields_types + schema["fieldTypes"] + end + + def fetch_all_fields + fetch_schema["fields"] + end + + def fetch_all_copy_fields + fetch_schema["copyFields"] + end + + def fetch_all_dynamic_fields + fetch_schema["dynamicFields"] + end + + def fetch_all_fields_types + fetch_schema["fieldTypes"] + end + + def schema_generator + @schema_generator ||= SolrSchemaGenerator.new + end + + def init_collection(num_shards = 1, replication_factor = 1) + create_collection_url = URI.parse("#{@solr_url}/admin/collections?action=CREATE&name=#{@collection_name}&numShards=#{num_shards}&replicationFactor=#{replication_factor}") + + http = Net::HTTP.new(create_collection_url.host, create_collection_url.port) + request = Net::HTTP::Post.new(create_collection_url.request_uri) + + begin + response = http.request(request) + raise StandardError, "Failed to create collection. HTTP #{response.code}: #{response.message}" unless response.code.to_i == 200 + rescue StandardError => e + raise StandardError, "Failed to create collection. #{e.message}" + end + end + + def init_schema(generator = schema_generator) + clear_all_schema(generator) + fetch_schema + default_fields = all_fields.map { |f| f['name'] } + + solr_schema = { + "add-field-type": generator.field_types_to_add, + 'add-field' => generator.fields_to_add.reject { |f| default_fields.include?(f[:name]) }, + 'add-dynamic-field' => generator.dynamic_fields_to_add, + 'add-copy-field' => generator.copy_fields_to_add + } + + update_schema(solr_schema) + end + + def custom_schema? + @custom_schema + end + + def enable_custom_schema + @custom_schema = true + end + + def clear_all_schema(generator = schema_generator) + init_ft = generator.field_types_to_add.map { |f| f[:name] } + dynamic_fields = all_dynamic_fields.map { |f| { name: f['name'] } } + copy_fields = all_copy_fields.map { |f| { source: f['source'], dest: f['dest'] } } + fields_types = all_fields_types.select { |f| init_ft.include?(f['name']) }.map { |f| { name: f['name']} } + fields = all_fields.reject { |f| %w[id _version_ ].include?(f['name']) }.map { |f| { name: f['name'] } } + + upload_schema('delete-copy-field' => copy_fields) unless copy_fields.empty? + upload_schema('delete-dynamic-field' => dynamic_fields) unless dynamic_fields.empty? + upload_schema('delete-field' => fields) unless copy_fields.empty? + upload_schema('delete-field-type' => fields_types) unless fields_types.empty? + end + + def map_to_indexer_type(orm_data_type) + case orm_data_type + when :uri + 'string' # Assuming a string field for URIs + when :string, nil # Default to 'string' if no type is given + 'text_general' # Assuming a generic text field for strings + when :integer + 'pint' + when :boolean + 'boolean' + when :date_time + 'pdate' + when :float + 'pfloat' + else + # Handle unknown data types or raise an error based on your specific requirements + raise ArgumentError, "Unsupported ORM data type: #{orm_data_type}" + end + end + + def delete_field(name) + update_schema('delete-field' => [ + { name: name } + ]) + end + + def add_field(name, type, indexed: true, stored: true, multi_valued: false) + update_schema('add-field' => [ + { name: name, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + ]) + end + + def add_dynamic_field(name, type, indexed: true, stored: true, multi_valued: false) + update_schema('add-dynamic-field' => [ + { name: name, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + ]) + end + + def add_copy_field(source, dest) + update_schema('add-copy-field' => [ + { source: source, dest: dest } + ]) + end + + def fetch_field(name) + fetch_all_fields.select { |f| f['name'] == name }.first + end + + def update_schema(schema_json) + permitted_actions = %w[add-field add-copy-field add-dynamic-field add-field-type delete-copy-field delete-dynamic-field delete-field delete-field-type] + + unless permitted_actions.any? { |action| schema_json.key?(action) } + raise StandardError, "The schema need to implement at least one of this actions: #{permitted_actions.join(', ')}" + end + upload_schema(schema_json) + fetch_schema + end + + private + + def upload_schema(schema_json) + uri = URI.parse("#{@solr_url}/#{@collection_name}/schema") + http = Net::HTTP.new(uri.host, uri.port) + + request = Net::HTTP::Post.new(uri.path, 'Content-Type' => 'application/json') + request.body = schema_json.to_json + response = http.request(request) + if response.code.to_i == 200 + response + else + raise StandardError, "Failed to upload schema. HTTP #{response.code}: #{response.body}" + end + end + + end +end + diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb new file mode 100644 index 00000000..ba23e70b --- /dev/null +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -0,0 +1,279 @@ +module SOLR + + class SolrSchemaGenerator + + attr_reader :schema + + def initialize + @schema = {} + end + + def add_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil) + @schema['add-field'] ||= [] + af = { name: name.to_s, type: type, indexed: indexed, stored: stored, multiValued: multi_valued} + af[:omitNorms] = omit_norms unless omit_norms.nil? + @schema['add-field'] << af + end + + def add_dynamic_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil) + @schema['add-dynamic-field'] ||= [] + df = { name: name.to_s, type: type, indexed: indexed, stored: stored, multiValued: multi_valued } + df[:omitNorms] = omit_norms unless omit_norms.nil? + @schema['add-dynamic-field'] << df + end + + def add_copy_field(source, dest) + @schema['add-copy-field'] ||= [] + @schema['add-copy-field'] << { source: source, dest: dest } + end + + def add_field_type(type_definition) + @schema['add-field-type'] ||= [] + @schema['add-field-type'] << type_definition + end + + def fields_to_add + custom_fields = @schema['add-field'] || [] + custom_fields + init_fields + end + + def dynamic_fields_to_add + custom_fields = @schema['add-dynamic-field'] || [] + custom_fields + init_dynamic_fields + end + + def copy_fields_to_add + custom_fields = @schema['add-copy-field'] || [] + custom_fields + init_copy_fields + end + + def field_types_to_add + custom_fields = @schema['add-field-type'] || [] + custom_fields + init_fields_types + end + + def init_fields_types + [ + { + "name": "string_ci", + "class": "solr.TextField", + "sortMissingLast": true, + "omitNorms": true, + "queryAnalyzer": + { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + } + ] + } + }, + { + "name": "text_suggest_ngram", + "class": "solr.TextField", + "positionIncrementGap": "100", + "analyzer": { + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.EdgeNGramTokenizerFactory", + "minGramSize": 1, + "maxGramSize": 25 + } + ] + } + }, + { + "name": "text_suggest_edge", + "class": "solr.TextField", + "positionIncrementGap": "100", + "indexAnalyzer": { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([\\.,;:-_])", + "replacement": " ", + "replace": "all" + }, + { + "class": "solr.EdgeNGramFilterFactory", + "minGramSize": 1, + "maxGramSize": 30, + "preserveOriginal": true + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + } + ] + }, + "queryAnalyzer": { + "tokenizer": { + "class": "solr.KeywordTokenizerFactory" + }, + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "filters": [ + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([\\.,;:-_])", + "replacement": " ", + "replace": "all" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "^(.{30})(.*)?", + "replacement": "$1", + "replace": "all" + } + ] + } + }, + { + "name": "text_suggest", + "class": "solr.TextField", + "positionIncrementGap": 100, + indexAnalyzer: { + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "1", + "generateNumberParts": "1", + "catenateWords": "1", + "catenateNumbers": "1", + "catenateAll": "1", + "splitOnCaseChange": "1", + "splitOnNumerics": "1", + "preserveOriginal": "1" + }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\w\d*æøåÆØÅ ])", + "replacement": " ", + "replace": "all" + } + ] + }, + queryAnalyzer: { + "char_filters": [ + { + "class": "solr.MappingCharFilterFactory", + "mapping": "solr/resources/org/apache/lucene/analysis/miscellaneous/MappingCharFilter.greekaccent" + } + ], + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "0", + "generateNumberParts": "0", + "catenateWords": "0", + "catenateNumbers": "0", + "catenateAll": "0", + "splitOnCaseChange": "0", + "splitOnNumerics": "0" + }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\w\d*æøåÆØÅ ])", + "replacement": " ", + "replace": "all" + } + ] + } + } + ] + end + + def init_fields + [ + #{ name: "_version_", type: "plong", indexed: true, stored: true, multiValued: false }, + { name: "resource_id", type: "string_ci", indexed: true, multiValued: false, required: true, stored: true }, + { name: "resource_model", type: "string", indexed: true, multiValued: false, required: true, stored: false }, + { name: "_text_", type: "text_general", indexed: true, multiValued: true, stored: false }, + ] + end + + def init_dynamic_fields + [ + {"name": "*_t", "type": "text_general", stored: true, "multiValued": false }, + {"name": "*_txt", "type": "text_general", stored: true, "multiValued": true}, + {"name": "*_i", "type": "pint", stored: true }, + {"name": "*_is", "type": "pints", stored: true }, + {"name": "*_f", "type": "pfloat", stored: true }, + {"name": "*_fs", "type": "pfloats", stored: true }, + {"name": "*_b", "type": "boolean", stored: true }, + {"name": "*_bs", "type": "booleans", stored: true }, + {"name": "*_dt", "type": "pdate", stored: true }, + {"name": "*_dts", "type": "pdate", stored: true , multiValued: true}, + { "name": "*Exact", "type": "string_ci", "multiValued": true, stored: false }, + { "name": "*Suggest", "type": "text_suggest", "omitNorms": true, stored: false, "multiValued": true }, + { "name": "*SuggestEdge", "type": "text_suggest_edge", stored: false, "multiValued": true }, + { "name": "*SuggestNgram", "type": "text_suggest_ngram", stored: false, "omitNorms": true, "multiValued": true }, + { "name": "*_text", "type": "text_general", stored: true, "multiValued": false }, + { "name": "*_texts", "type": "text_general", stored: true, "multiValued": true }, + {"name": "*_sort", "type": "string", stored: false }, + {"name": "*_sorts", "type": "strings", stored: false , "multiValued": true}, + ] + end + + def init_copy_fields + [ + { source: "*_text", dest: %w[_text_ *Exact *Suggest *SuggestEdge *SuggestNgram *_sort] }, + { source: "*_texts", dest: %w[_text_ *Exact *Suggest *SuggestEdge *SuggestNgram *_sorts] }, + ] + end + end +end diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 8f7ad9e1..cf958398 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -14,7 +14,6 @@ class Client < RSPARQL::Client "text/x-nquads" => "nquads" } - BACKEND_4STORE = "4store" def status_based_sleep_time(operation) sleep(0.5) @@ -39,16 +38,17 @@ def status_based_sleep_time(operation) end class DropGraph - def initialize(g) + def initialize(g, silent: false) @graph = g @caching_options = { :graph => @graph.to_s } + @silent = silent end def to_s - return "DROP GRAPH <#{@graph.to_s}>" + "DROP #{@silent ? 'SILENT' : ''} GRAPH <#{@graph.to_s}>" end def options #Returns the caching option - return @caching_options + @caching_options end end @@ -77,7 +77,7 @@ def bnodes_filter_file(file_path,mime_type) end def delete_data_graph(graph) - Goo.sparql_update_client.update(DropGraph.new(graph)) + Goo.sparql_update_client.update(DropGraph.new(graph, silent: Goo.backend_vo?)) end def append_triples_no_bnodes(graph,file_path,mime_type_in) @@ -184,9 +184,7 @@ def status resp end - private - - def execute_append_request(graph, data_file, mime_type_in) + def params_for_backend(graph, data_file, mime_type_in, method = :post) mime_type = "text/turtle" if mime_type_in == "text/x-nquads" @@ -194,10 +192,9 @@ def execute_append_request(graph, data_file, mime_type_in) graph = "http://data.bogus.graph/uri" end - params = {method: :post, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - backend_name = Goo.sparql_backend_name + params = {method: method, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - if backend_name == BACKEND_4STORE + if Goo.backend_4s? params[:payload] = { graph: graph.to_s, data: data_file, @@ -205,12 +202,18 @@ def execute_append_request(graph, data_file, mime_type_in) } #for some reason \\\\ breaks parsing params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") + elsif Goo.backend_vo? + params[:url] = "http://localhost:8890/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" + params[:payload] = data_file else params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" params[:payload] = data_file end + params + end - RestClient::Request.execute(params) + def execute_append_request(graph, data_file, mime_type_in) + RestClient::Request.execute(params_for_backend(graph, data_file, mime_type_in)) end end end diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index 094fbba2..cc101855 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -36,63 +36,48 @@ def model_load(*options) ## def model_load_sliced(*options) options = options.last - ids = options[:ids] klass = options[:klass] incl = options[:include] models = options[:models] - aggregate = options[:aggregate] - read_only = options[:read_only] collection = options[:collection] - count = options[:count] - include_pagination = options[:include_pagination] - equivalent_predicates = options[:equivalent_predicates] - predicates = options[:predicates] - embed_struct, klass_struct = get_structures(aggregate, count, incl, include_pagination, klass, read_only) - - raise_resource_must_persistent_error(models) if models + embed_struct, klass_struct = get_structures(options[:aggregate], options[:count] , incl, options[:include_pagination], klass, options[:read_only]) + raise_not_persistent_error(models) if models graphs = get_graphs(collection, klass) - ids, models_by_id = get_models_by_id_hash(ids, klass, klass_struct, models) + models_by_id = get_models_by_id_hash( options[:ids], klass, klass_struct, models) - query_options = {} #TODO: breaks the reasoner patterns = [[:id, RDF.type, klass.uri_type(collection)]] incl_embed = nil - unmapped = nil bnode_extraction = nil properties_to_include = [] variables = [:id] - if incl - if incl.first && incl.first.is_a?(Hash) && incl.first.include?(:bnode) + if incl && !incl.empty? + if incl.first.is_a?(Hash) && incl.first.include?(:bnode) #limitation only one level BNODE bnode_extraction, patterns, variables = get_bnode_extraction(collection, incl, klass, patterns) else variables = %i[id attributeProperty attributeObject] if incl.first == :unmapped - unmapped = true - properties_to_include = predicate_map(predicates) + properties_to_include = predicate_map(options[:predicates]) else - #make it deterministic - incl_embed = get_embed_includes(incl) - graphs, properties_to_include, query_options = get_includes(collection, graphs, incl, - klass, query_options) + graphs, properties_to_include, incl_embed = get_includes(collection, graphs, incl, klass) end end end - expand_equivalent_predicates(properties_to_include, equivalent_predicates) - query_builder = Goo::SPARQL::QueryBuilder.new options - select, aggregate_projections = query_builder.build_select_query(ids, variables, graphs, - patterns, query_options, - properties_to_include) + options[:properties_to_include] = properties_to_include + + + select, aggregate_projections = Goo::SPARQL::QueryBuilder.new(options) + .build_query(models_by_id.keys, variables, graphs, patterns) solution_mapper = Goo::SPARQL::SolutionMapper.new aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, - variables, ids, options + variables, options solution_mapper.map_each_solutions(select) end @@ -102,16 +87,6 @@ def model_load_sliced(*options) def set_request_lang(options) options[:requested_lang] = RequestStore.store[:requested_lang] end - def expand_equivalent_predicates(properties_to_include, eq_p) - - return unless eq_p && !eq_p.empty? - - properties_to_include&.each do |property_attr, property| - property_uri = property[:uri] - property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) - end - - end def predicate_map(predicates) predicates_map = nil @@ -120,10 +95,11 @@ def predicate_map(predicates) predicates_map = {} uniq_p.each do |p| i = 0 - key = ('var_' + p.last_part + i.to_s).to_sym + last_part = p.to_s.include?("#") ? p.to_s.split('#').last : p.to_s.split('/').last + key = ('var_' + last_part + i.to_s).to_sym while predicates_map.include?(key) i += 1 - key = ('var_' + p.last_part + i.to_s).to_sym + key = ('var_' + last_part + i.to_s).to_sym break if i > 10 end predicates_map[key] = { uri: p, is_inverse: false } @@ -132,19 +108,19 @@ def predicate_map(predicates) predicates_map end - def get_includes(collection, graphs, incl, klass, query_options) + def get_includes(collection, graphs, incl, klass) + incl_embed ,incl = get_embed_includes(incl) incl = incl.to_a incl.delete_if { |a| !a.instance_of?(Symbol) } properties_to_include = {} incl.each do |attr| graph, pattern = query_pattern(klass, attr, collection: collection) - add_rules(attr, klass, query_options) if klass.attributes(:all).include?(attr) properties_to_include[attr] = { uri: pattern[1], is_inverse: klass.inverse?(attr) } # [property_attr, property_uri , inverse: true] end graphs << graph if graph && (!klass.collection_opts || klass.inverse?(attr)) end - [graphs, properties_to_include,query_options] + [graphs, properties_to_include, incl_embed] end def get_bnode_extraction(collection, incl, klass, patterns) @@ -181,7 +157,7 @@ def get_models_by_id_hash(ids, klass, klass_struct, models) #a where without models end - return ids, models_by_id + models_by_id end def get_graphs(collection, klass) @@ -234,7 +210,7 @@ def get_structures(aggregate, count, incl, include_pagination, klass, read_only) [embed_struct, klass_struct] end - def raise_resource_must_persistent_error(models) + def raise_not_persistent_error(models) models.each do |m| if (not m.nil?) && !m.respond_to?(:klass) #read only raise ArgumentError, @@ -252,7 +228,7 @@ def get_embed_includes(incl) #variables.concat(embed_variables) incl.concat(embed_variables) end - incl_embed + [incl_embed, incl] end end diff --git a/lib/goo/sparql/mixins/query_pattern.rb b/lib/goo/sparql/mixins/query_pattern.rb index cc370795..9ee0df7d 100644 --- a/lib/goo/sparql/mixins/query_pattern.rb +++ b/lib/goo/sparql/mixins/query_pattern.rb @@ -3,9 +3,6 @@ module SPARQL module QueryPatterns - def add_rules(attr,klass,query_options) - (query_options[:rules] ||= []) << :SUBC if klass.transitive?(attr) - end def query_pattern(klass,attr,**opts) value = opts[:value] || nil diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index dedc09fb..32660cca 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -105,7 +105,7 @@ def language_match?(language) return requested_lang.include?(language) if requested_lang.is_a?(Array) - language.eql?(requested_lang) + language&.upcase.eql?(requested_lang) end def literal?(object) diff --git a/lib/goo/sparql/processor.rb b/lib/goo/sparql/processor.rb new file mode 100644 index 00000000..c89778ed --- /dev/null +++ b/lib/goo/sparql/processor.rb @@ -0,0 +1,137 @@ +module Goo + module SPARQL + module Processor + def process_query_call(count=false) + if Goo.queries_debug? && Thread.current[:ncbo_debug] + start = Time.now + query_resp = process_query_intl(count=count) + (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - start) + return query_resp + end + process_query_init(count=count) + end + + private + def process_query_init(count=false) + if @models == [] + @result = [] + return @result + end + + @include << @include_embed if @include_embed.length > 0 + + @predicates = unmmaped_predicates() + @equivalent_predicates = retrieve_equivalent_predicates() + + options_load = { models: @models, include: @include, ids: @ids, + graph_match: @pattern, klass: @klass, + filters: @filters, order_by: @order_by , + read_only: @read_only, rules: @rules, + predicates: @predicates, + no_graphs: @no_graphs, + equivalent_predicates: @equivalent_predicates } + + options_load.merge!(@where_options_load) if @where_options_load + + if !@klass.collection_opts.nil? and !options_load.include?(:collection) + raise ArgumentError, "Collection needed call `#{@klass.name}`" + end + + ids = nil + + + ids = redis_indexed_ids if use_redis_index? + + if @page_i && !use_redis_index? + page_options = options_load.dup + page_options.delete(:include) + page_options[:include_pagination] = @include + page_options[:query_options] = @query_options + + @count = run_count_query(page_options) + page_options[:page] = { page_i: @page_i, page_size: @page_size } + + models_by_id = Goo::SPARQL::Queries.model_load(page_options) + options_load[:models] = models_by_id.values + #models give the constraint + options_load.delete :graph_match + elsif count + count_options = options_load.dup + count_options.delete(:include) + return run_count_query(count_options) + end + + if @indexing + #do not care about include values + @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) + return @result + end + + options_load[:ids] = ids if ids + models_by_id = {} + + if (@page_i && options_load[:models].nil?) || + (@page_i && options_load[:models].length > 0) || + (!@page_i && (@count.nil? || @count > 0)) + + models_by_id = Goo::SPARQL::Queries.model_load(options_load) + run_aggregate_query(models_by_id) if @aggregate && models_by_id.length > 0 + end + + if @page_i + @result = Goo::Base::Page.new(@page_i, @page_size, @count, models_by_id.values) + else + @result = @models ? @models : models_by_id.values + end + @result + end + + + def use_redis_index? + @index_key + end + + def run_aggregate_query(models_by_id) + options_load_agg = { models: models_by_id.values, klass: @klass, + filters: @filters, read_only: @read_only, + aggregate: @aggregate, rules: @rules } + options_load_agg.merge!(@where_options_load) if @where_options_load + Goo::SPARQL::Queries.model_load(options_load_agg) + end + def run_count_query(page_options) + count = 0 + if @pre_count + count = @pre_count + elsif !@count && @do_count + page_options[:count] = :count + r = Goo::SPARQL::Queries.model_load(page_options) + if r.is_a? Numeric + count = r.to_i + end + elsif @count + count = @count + end + page_options.delete :count + count + end + + def redis_indexed_ids + raise ArgumentError, "Redis is not configured" unless Goo.redis_client + rclient = Goo.redis_client + cache_key = cache_key_for_index(@index_key) + raise ArgumentError, "Index not found" unless rclient.exists(cache_key) + if @page_i + if !@count + @count = rclient.llen(cache_key) + end + rstart = (@page_i -1) * @page_size + rstop = (rstart + @page_size) -1 + ids = rclient.lrange(cache_key,rstart,rstop) + else + ids = rclient.lrange(cache_key,0,-1) + end + ids = ids.map { |i| RDF::URI.new(i) } + end + end + end +end diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 31880859..d41ced10 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -14,29 +14,31 @@ def initialize(options) @unions = options[:unions] || [] @aggregate = options[:aggregate] @collection = options[:collection] - @model_query_options = options[:query_options] @enable_rules = options[:rules] @order_by = options[:order_by] @internal_variables_map = {} + @equivalent_predicates = options[:equivalent_predicates] + @properties_to_include = options[:properties_to_include] @query = get_client end - def build_select_query(ids, variables, graphs, patterns, - query_options, properties_to_include) + def build_query(ids, variables, graphs, patterns) + query_options = {} + expand_equivalent_predicates(@properties_to_include, @equivalent_predicates) + + properties_to_include = @properties_to_include patterns = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) variables, patterns = add_some_type_to_id(patterns, query_options, variables) aggregate_projections, aggregate_vars, variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, @klass, @unions, variables) query_filter_str, patterns, optional_patterns, filter_variables = filter_query_strings(@collection, graphs, @klass, optional_patterns, patterns, @query_filters) @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) - order_by_str, order_variables = order_by_string - variables = [] if @count variables.delete :some_type - select_distinct(variables, aggregate_projections, filter_variables, order_variables) + select_distinct(variables, aggregate_projections, filter_variables) .from(graphs) .where(patterns) .union_bind_in_where(properties_to_include) @@ -49,48 +51,50 @@ def build_select_query(ids, variables, graphs, patterns, @query.filter(filter) end - @query.union(*@unions) unless @unions.empty? + Array(@unions).each do |union| + @query.union(*union) + end ids_filter(ids) if ids - @query.order_by(*order_by_str) if @order_by + @query.order_by(*order_by_string) if @order_by put_query_aggregate_vars(aggregate_vars) if aggregate_vars count if @count paginate if @page - ## TODO see usage of rules and query_options - query_options.merge!(@model_query_options) if @model_query_options - query_options[:rules] = [:NONE] unless @enable_rules - query_options = nil if query_options.empty? - if query_options - query_options[:rules] = query_options[:rules]&.map { |x| x.to_s }.join('+') - else - query_options = { rules: ['NONE'] } - end - @query.options[:query_options] = query_options [@query, aggregate_projections] end def union_bind_in_where(properties) binding_as = [] - properties.each do |property_attr, property| - predicates = [property[:uri]] + (property[:equivalents] || []) - options = { - binds: [{ value: property_attr, as: :attributeProperty }] - } - subject = property[:subject] || :id - predicates.uniq.each do |predicate_uri| - pattern = if property[:is_inverse] - [:attributeObject, predicate_uri, subject] - else - [subject, predicate_uri, :attributeObject] - end - binding_as << [[pattern], options] + if Goo.backend_4s? || Goo.backend_gb? + properties.each do |property_attr, property| + predicates = [property[:uri]] + (property[:equivalents] || []) + options = { + binds: [{ value: property_attr, as: :attributeProperty }] + } + subject = property[:subject] || :id + predicates.uniq.each do |predicate_uri| + pattern = if property[:is_inverse] + [:attributeObject, predicate_uri, subject] + else + [subject, predicate_uri, :attributeObject] + end + binding_as << [[pattern], options] + end end + + else + direct_predicate, inverse_predicate = include_properties + direct_filter = direct_predicate.empty? ? [] : [{ values: direct_predicate, predicate: :attributeProperty }] + inverse_filter = inverse_predicate.empty? ? [] : [{ values: inverse_predicate, predicate: :attributeProperty }] + binding_as << [[[:id, :attributeProperty, :attributeObject]], { filters: direct_filter}] unless direct_filter.empty? + binding_as << [[[:inverseAttributeObject, :attributeProperty, :id]], { filters: inverse_filter}] unless inverse_filter.empty? end + @query.optional_union_with_bind_as(*binding_as) unless binding_as.empty? self end @@ -127,7 +131,7 @@ def order_by_string order_variables << attr "#{order.to_s.upcase}(?#{attr})" end - [order_str,order_variables] + order_str end def from(graphs) @@ -142,11 +146,11 @@ def from(graphs) self end - def select_distinct(variables, aggregate_variables, filter_variables, order_variables) + def select_distinct(variables, aggregate_patterns, filter_variables) + variables << :inverseAttributeObject if inverse_predicate? select_vars = variables.dup - reject_aggregations_from_vars(select_vars, aggregate_variables) if aggregate_variables - # Fix for 4store pagination with a filter https://github.com/ontoportal-lirmm/ontologies_api/issues/25 - select_vars = (select_vars + filter_variables + order_variables).uniq if @page + reject_aggregations_from_vars(select_vars, aggregate_patterns) if aggregate_patterns + select_vars = (select_vars + filter_variables).uniq if @page && Goo.backend_4s? # Fix for 4store pagination with a filter @query = @query.select(*select_vars).distinct(true) self end @@ -164,6 +168,16 @@ def ids_filter(ids) private + def include_properties + direct_predicates = @properties_to_include.select { |_, property| !property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + inverse_predicates = @properties_to_include.select { |_, property| property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + [direct_predicates, inverse_predicates] + end + + def inverse_predicate? + @properties_to_include.any? { |_, property| property[:is_inverse] } + end + def patterns_for_match(klass, attr, value, graphs, patterns, unions, internal_variables, subject = :id, in_union = false, in_aggregate = false, query_options = {}, collection = nil) @@ -182,7 +196,7 @@ def patterns_for_match(klass, attr, value, graphs, patterns, unions, @internal_variables_map[new_internal_var] = value.empty? ? attr : {attr => value} end - add_rules(attr, klass, query_options) + graph, pattern = query_pattern(klass, attr, value: new_internal_var, subject: subject, collection: collection) if pattern @@ -370,10 +384,13 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, else value = RDF::Literal.new(filter_operation.value) if filter_operation.value.is_a? String - value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) + value = RDF::Literal.new(filter_operation.value) + filter_var = "str(?#{filter_var})" + else + filter_var = "?#{filter_var}" end filter_operations << ( - "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + + "#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + " #{value.to_ntriples}") end @@ -403,7 +420,7 @@ def filter_query_strings(collection, graphs, klass, patterns.concat(filter_patterns) end end - filter_variables << inspected_patterns.values.last + #filter_variables << inspected_patterns.values.last end [query_filter_str, patterns, optional_patterns, filter_variables] end @@ -424,6 +441,16 @@ def add_some_type_to_id(patterns, query_options, variables) def internal_variables @internal_variables_map.keys end + + def expand_equivalent_predicates(query_properties, eq_p) + + return unless eq_p && !eq_p.empty? + + query_properties&.each do |_, property| + property_uri = property[:uri] + property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) + end + end end end end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 64d258d5..18ad4f06 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -3,39 +3,36 @@ module SPARQL class SolutionMapper BNODES_TUPLES = Struct.new(:id, :attribute) - def initialize(aggregate_projections, bnode_extraction, embed_struct, - incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables, ids, options) + def initialize(aggregate_projections, bnode_extraction, embed_struct,incl_embed, klass_struct, models_by_id, variables, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @embed_struct = embed_struct @incl_embed = incl_embed + @incl = options[:include] @klass_struct = klass_struct @models_by_id = models_by_id - @properties_to_include = properties_to_include - @unmapped = unmapped + @properties_to_include = options[:properties_to_include] + @unmapped = options[:include] && options[:include].first.eql?(:unmapped) @variables = variables - @ids = ids + @ids = models_by_id.keys @klass = options[:klass] @read_only = options[:read_only] - @incl = options[:include] @count = options[:count] @collection = options[:collection] @options = options end - + def map_each_solutions(select) found = Set.new objects_new = {} list_attributes = Set.new(@klass.attributes(:list)) - all_attributes = Set.new(@klass.attributes(:all)) @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, - list_attributes: list_attributes) - + list_attributes: list_attributes) + select.each_solution do |sol| - + next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] return sol[:count_var].object if @count @@ -59,26 +56,32 @@ def map_each_solutions(select) next end - predicate = sol[:attributeProperty].to_s.to_sym + predicates = find_predicate(sol[:attributeProperty], inverse: !sol[:inverseAttributeObject].nil?) + next if predicates.empty? - next if predicate.nil? || !all_attributes.include?(predicate) + object = if sol[:attributeObject] + sol[:attributeObject] + elsif sol[:inverseAttributeObject] + sol[:inverseAttributeObject] + end - object = sol[:attributeObject] - # bnodes - if bnode_id?(object, predicate) - objects_new = bnode_id_tuple(id, object, objects_new, predicate) - next + predicates.each do |predicate| + # bnodes + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) + next + end + + objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, objects, object, predicate) end - objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) - add_object_to_model(id, objects, object, predicate) end - - # for this moment we are not going to enrich models , maybe we will use it if the results are empty + # for this moment we are not going to enrich models , maybe we will use it if the results are empty @lang_filter.fill_models_with_all_languages(@models_by_id) - init_unloaded_attributes(found, list_attributes) + init_unloaded_attributes(list_attributes) return @models_by_id if @bnode_extraction @@ -97,19 +100,30 @@ def map_each_solutions(select) include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? models_unmapped_to_array(@models_by_id) if @unmapped - - + + @models_by_id end private - def init_unloaded_attributes(found, list_attributes) - return if @incl.nil? + def find_predicate(predicate, unmapped: false, inverse: false) + if Goo.backend_4s? || Goo.backend_gb? + return [] if predicate.nil? || unmapped && @properties_to_include[predicate].nil? + predicate = predicate.to_s.to_sym + else + predicate = @properties_to_include.select { |x, v| v[:uri].to_s.eql?(predicate.to_s) || v[:equivalents]&.any? { |e| e.to_s.eql?(predicate.to_s) } } + return [] if predicate.empty? + predicate = predicate.select{|x, y| y[:is_inverse]&.eql?(inverse)}.keys + end + Array(predicate) + end + + def init_unloaded_attributes(list_attributes) + return if @incl.nil? || @incl.empty? # Here we are setting to nil all attributes that have been included but not found in the triplestore - found.uniq.each do |model_id| - m = @models_by_id[model_id] + @models_by_id.each do |id, m| @incl.each do |attr_to_incl| is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) next if attr_to_incl.to_s.eql?('unmapped') || is_handler @@ -133,12 +147,12 @@ def init_unloaded_attributes(found, list_attributes) def get_value_object(id, objects_new, object, list_attributes, predicate) object = object.object if object && !(object.is_a? RDF::URI) range_for_v = @klass.range(predicate) - + if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? if objects_new.include?(object) object = objects_new[object] - elsif !range_for_v.inmutable? + else pre_val = get_preload_value(id, object, predicate) object, objects_new = if !@read_only preloaded_or_new_object(object, objects_new, pre_val, predicate) @@ -146,8 +160,6 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) # depedent read only preloaded_or_new_struct(object, objects_new, pre_val, predicate) end - else - object = range_for_v.find(object).first end end @@ -156,7 +168,7 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) if object.nil? object = pre.nil? ? [] : pre - else + else object = pre.nil? ? [object] : (Array(pre).dup << object) object.uniq! end @@ -170,8 +182,8 @@ def add_object_to_model(id, objects, current_obj, predicate) if @models_by_id[id].respond_to?(:klass) @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? elsif !@models_by_id[id].class.handler?(predicate) && - !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && - predicate != :id + !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + predicate != :id @lang_filter.set_model_value(@models_by_id[id], predicate, objects, current_obj) end end @@ -329,7 +341,7 @@ def model_set_collection_attributes(models_by_id, objects_new) collection_attribute = obj_new[:klass].collection_opts obj_new[collection_attribute] = collection_value elsif obj_new.class.respond_to?(:collection_opts) && - obj_new.class.collection_opts.instance_of?(Symbol) + obj_new.class.collection_opts.instance_of?(Symbol) collection_attribute = obj_new.class.collection_opts obj_new.send("#{collection_attribute}=", collection_value) end @@ -369,11 +381,9 @@ def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new if range_for_v if objects_new.include?(object) object = objects_new[object] - elsif !range_for_v.inmutable? - pre_val = get_pre_val(id, models_by_id, object, v, read_only) - object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) else - object = range_for_v.find(object).first + pre_val = get_pre_val(id, models_by_id, object, v) + object = get_object_from_range(pre_val, embed_struct, object, objects_new, v) end end end @@ -399,8 +409,8 @@ def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) def get_pre_val(id, models_by_id, object, predicate) pre_val = nil if models_by_id[id] && - ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || - models_by_id[id].loaded_attributes.include?(predicate)) + ((models_by_id[id].respond_to?(:klass) && models_by_id[id]) || + models_by_id[id].loaded_attributes.include?(predicate)) pre_val = if !@read_only models_by_id[id].instance_variable_get("@#{predicate}") else @@ -413,13 +423,17 @@ def get_pre_val(id, models_by_id, object, predicate) end def add_unmapped_to_model(sol) - predicate = sol[:attributeProperty].to_s.to_sym - return unless @properties_to_include[predicate] - - id = sol[:id] - value = sol[:attributeObject] - - @lang_filter.set_unmapped_value(@models_by_id[id], @properties_to_include[predicate][:uri], value) + predicates = find_predicate(sol[:attributeProperty]) + predicates.each do |predicate| + if Goo.backend_4s? || Goo.backend_gb? + predicate = @properties_to_include[predicate][:uri] + else + predicate = sol[:attributeProperty] + end + id = sol[:id] + value = sol[:attributeObject] + @lang_filter.set_unmapped_value(@models_by_id[id], predicate, value) + end end def add_aggregations_to_model(sol) diff --git a/lib/goo/sparql/sparql.rb b/lib/goo/sparql/sparql.rb index 6fa1d582..d5315cde 100644 --- a/lib/goo/sparql/sparql.rb +++ b/lib/goo/sparql/sparql.rb @@ -8,3 +8,4 @@ require_relative "triples" require_relative "loader" require_relative "queries" +require_relative 'processor' diff --git a/lib/goo/sparql/triples.rb b/lib/goo/sparql/triples.rb index df3f9f1d..317d1d84 100644 --- a/lib/goo/sparql/triples.rb +++ b/lib/goo/sparql/triples.rb @@ -53,6 +53,8 @@ def self.model_update_triples(model) if model.previous_values graph_delete = RDF::Graph.new model.previous_values.each do |attr,value| + next unless model.modified_attributes.any?{|x| attr.eql?(x)} + predicate = model.class.attribute_uri(attr,model.collection) values = value.kind_of?(Array) ? value : [value] values.each do |v| diff --git a/lib/goo/utils/callbacks_utils.rb b/lib/goo/utils/callbacks_utils.rb new file mode 100644 index 00000000..b9e747ff --- /dev/null +++ b/lib/goo/utils/callbacks_utils.rb @@ -0,0 +1,22 @@ +module CallbackRunner + + def run_callbacks(inst, callbacks) + callbacks.each do |proc| + if instance_proc?(inst, proc) + call_proc(inst.method(proc)) + elsif proc.is_a?(Proc) + call_proc(proc) + end + end + end + + def instance_proc?(inst, opt) + opt && (opt.is_a?(Symbol) || opt.is_a?(String)) && inst.respond_to?(opt) + end + + def call_proc(proc) + proc.call + end + + +end \ No newline at end of file diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index d6f3816d..5c157fb1 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -1,9 +1,11 @@ +require_relative '../utils/callbacks_utils' module Goo module Validators module Enforce class EnforceInstance + include CallbackRunner attr_reader :errors_by_opt def initialize @errors_by_opt = {} @@ -67,7 +69,7 @@ def enforce(inst,attr,value) end def enforce_callback(inst, attr) - callbacks = Array(inst.class.update_callbacks(attr)) + callbacks = Array(inst.class.attribute_callbacks(attr)) callbacks.each do |proc| if instance_proc?(inst, proc) call_proc(inst.method(proc), inst, attr) @@ -87,10 +89,6 @@ def object_type?(opt) opt.respond_to?(:shape_attribute) ? opt : Goo.model_by_name(opt) end - def instance_proc?(inst, opt) - opt && (opt.is_a?(Symbol) || opt.is_a?(String)) && inst.respond_to?(opt) - end - def check_object_type(inst, attr, value, opt) model_range = object_type(opt) if model_range && !value.nil? diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake new file mode 100644 index 00000000..c84879a9 --- /dev/null +++ b/rakelib/docker_based_test.rake @@ -0,0 +1,121 @@ +# Rake tasks for running unit tests with backend services running as docker containers + +desc 'Run unit tests with docker based backend' +namespace :test do + namespace :docker do + task :up do + system("docker compose up -d") || abort("Unable to start docker containers") + unless system("curl -sf http://localhost:8983/solr || exit 1") + printf("waiting for Solr container to initialize") + sec = 0 + until system("curl -sf http://localhost:8983/solr || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + abort(" Solr container hasn't initialized properly") + end + end + printf("\n") + end + + end + task :down do + #system("docker compose --profile fs --profile ag stop") + #system("docker compose --profile fs --profile ag kill") + end + desc "run tests with docker AG backend" + task :ag do + ENV["GOO_BACKEND_NAME"]="allegrograph" + ENV["GOO_PORT"]="10035" + ENV["GOO_PATH_QUERY"]="/repositories/ontoportal_test" + ENV["GOO_PATH_DATA"]="/repositories/ontoportal_test/statements" + ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal_test/statements" + ENV["COMPOSE_PROFILES"]="ag" + Rake::Task["test:docker:up"].invoke + # AG takes some time to start and create databases/accounts + # TODO: replace system curl command with native ruby code + unless system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") + printf("waiting for AllegroGraph container to initialize") + sec = 0 + until system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") do + sleep(1) + printf(".") + sec += 1 + end + end + puts + system("docker compose ps") # TODO: remove after GH actions troubleshooting is complete + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + desc "run tests with docker 4store backend" + task :fs do + ENV["GOO_PORT"]="9000" + ENV["COMPOSE_PROFILES"]='fs' + Rake::Task["test:docker:up"].invoke + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + desc "run tests with docker Virtuoso backend" + task :vo do + ENV["GOO_BACKEND_NAME"]="virtuoso" + ENV["GOO_PORT"]="8890" + ENV["GOO_PATH_QUERY"]="/sparql" + ENV["GOO_PATH_DATA"]="/sparql" + ENV["GOO_PATH_UPDATE"]="/sparql" + ENV["COMPOSE_PROFILES"]="vo" + Rake::Task["test:docker:up"].invoke + # + unless system("curl -sf http://localhost:8890/sparql || exit 1") + printf("waiting for Virtuoso container to initialize") + sec = 0 + until system("curl -sf http://localhost:8890/sparql || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs virtuoso-ut") + abort(" Virtuoso container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + + desc "run tests with docker GraphDb backend" + task :gb do + ENV["GOO_BACKEND_NAME"]="graphdb" + ENV["GOO_PORT"]="7200" + ENV["GOO_PATH_QUERY"]="/repositories/ontoportal" + ENV["GOO_PATH_DATA"]="/repositories/ontoportal/statements" + ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal/statements" + ENV["COMPOSE_PROFILES"]="gb" + Rake::Task["test:docker:up"].invoke + + #system("docker compose cp ./test/data/graphdb-repo-config.ttl graphdb:/opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl") + #system("docker compose cp ./test/data/graphdb-test-load.nt graphdb:/opt/graphdb/dist/configs/templates/graphdb-test-load.nt") + #system('docker compose exec graphdb sh -c "importrdf load -f -c /opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/graphdb-test-load.nt ;"') + unless system("curl -sf http://localhost:7200/repositories || exit 1") + printf("waiting for Graphdb container to initialize") + sec = 0 + until system("curl -sf http://localhost:7200/repositories || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs graphdb") + abort(" Graphdb container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + end +end diff --git a/test/app/bioportal.rb b/test/app/bioportal.rb index 2cef2c2b..8c99df03 100644 --- a/test/app/bioportal.rb +++ b/test/app/bioportal.rb @@ -4,93 +4,92 @@ require_relative '../test_case' require_relative './query_profiler' -GooTest.configure_goo - module Test - module BioPortal + module BioPortal class Ontology < Goo::Base::Resource model :ontology, namespace: :bioportal, name_with: :acronym attribute :acronym, namespace: :omv, enforce: [:existence, :unique] attribute :name, namespace: :omv, enforce: [:existence] attribute :administeredBy, enforce: [:user, :existence] - end + end class User < Goo::Base::Resource model :user, name_with: :username attribute :username, enforce: [:existence, :unique] attribute :email, enforce: [:existence, :email] attribute :roles, enforce: [:list, :role, :existence] - attribute :created, enforce: [ DateTime ], + attribute :created, enforce: [DateTime], default: lambda { |record| DateTime.now } - attribute :notes, inverse: { on: :note, attribute: :owner} - end + attribute :notes, inverse: { on: :note, attribute: :owner } + end class Role < Goo::Base::Resource - model :role, :inmutable, name_with: :code + model :role, name_with: :code attribute :code, enforce: [:existence, :unique] attribute :users, inverse: { on: :user, attribute: :roles } end class Note < Goo::Base::Resource - model :note, name_with: lambda { |s| id_generator(s) } + model :note, name_with: lambda { |s| id_generator(s) } attribute :content, enforce: [:existence] attribute :ontology, enforce: [:existence, :ontology] attribute :owner, enforce: [:existence, :user] + def self.id_generator(inst) - return RDF::URI.new("http://example.org/note/" + inst.owner.username + "/" + Random.rand(1000000).to_s ) + return RDF::URI.new("http://example.org/note/" + inst.owner.username + "/" + Random.rand(1000000).to_s) end end def self.benchmark_data - Goo.sparql_query_client.reset_profiling - if false - 10.times do |i| - Role.new(code: "role#{i}").save - end - puts "Roles created" - 900.times do |i| - roles = [] - 2.times do |j| - roles << Role.find("role#{j}").first + Goo.sparql_query_client.reset_profiling + if false + 10.times do |i| + Role.new(code: "role#{i}").save + end + puts "Roles created" + 900.times do |i| + roles = [] + 2.times do |j| + roles << Role.find("role#{j}").first + end + u = User.new(username: "user#{i}name", email: "email#{i}@example.org", roles: roles) + u.save + puts "#{i} users created" + end + 400.times do |i| + ont = Ontology.new(acronym: "ontology #{i}", name: "ontology ontology ontology #{i}") + ont.administeredBy = User.find("user#{i % 75}name").first + ont.save + end + binding.pry + 1000.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(200)}").all.first + owner = User.where(username: "user#{i % 300}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" + end + binding.pry + 2000.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(15)}").all.first + owner = User.where(username: "user#{i % 200}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" + end + binding.pry + 800.times do |i| + ont = Ontology.where(acronym: "ontology #{Random.rand(6)}").all.first + owner = User.where(username: "user#{i % 200}name").include(:username).all.first + n = Note.new(content: "content " * 60, owner: owner, ontology: ont) + n.save + puts "created note #{i}" end - u = User.new(username: "user#{i}name", email: "email#{i}@example.org", roles: roles) - u.save - puts "#{i} users created" - end - 400.times do |i| - ont = Ontology.new(acronym: "ontology #{i}",name: "ontology ontology ontology #{i}") - ont.administeredBy = User.find("user#{i % 75}name").first - ont.save - end - binding.pry - 1000.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(200)}").all.first - owner = User.where(username: "user#{i % 300}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" - end - binding.pry - 2000.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(15)}").all.first - owner = User.where(username: "user#{i % 200}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" - end - binding.pry - 800.times do |i| - ont = Ontology.where(acronym: "ontology #{Random.rand(6)}").all.first - owner = User.where(username: "user#{i % 200}name").include(:username).all.first - n = Note.new(content: "content " * 60, owner: owner, ontology: ont) - n.save - puts "created note #{i}" end - end 500.times do |i| ont_id = 0 begin - ont_id = Random.rand(5)+180 + ont_id = Random.rand(5) + 180 end ont = Ontology.where(acronym: "ontology #{ont_id}").all.first owner = User.where(username: "user#{i % 200}name").include(:username).all.first @@ -100,12 +99,12 @@ def self.benchmark_data end end - def self.benchmark_naive_query + def self.benchmark_naive_query Goo.sparql_query_client.reset_profiling ont = Ontology.where.include(:acronym).all bench_result = [] ont.each do |ont| - qq =< . ?id ?username . @@ -130,12 +129,12 @@ def self.benchmark_naive_query users = {} roles = {} count_sol = 0 - res = client.query(qq) + res = client.query(qq) res.each do |sol| unless users.include?(sol[:id]) users[sol[:id]] = User.new - users[sol[:id]].username=sol[:username] - users[sol[:id]].email=sol[:email] + users[sol[:id]].username = sol[:username] + users[sol[:id]].email = sol[:email] end unless roles.include?(sol[:roles]) roles[sol[:roles]] = Role.new @@ -147,14 +146,14 @@ def self.benchmark_naive_query end count_sol = count_sol + 1 end - bench_result << [Time.now - start,notes.length, client.query_times.last, client.parse_times.last,count_sol ] + bench_result << [Time.now - start, notes.length, client.query_times.last, client.parse_times.last, count_sol] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_naive.csv", "wb") do |csv| - csv << ["total", "notes", "qt", "pt","sol"] + csv << ["total", "notes", "qt", "pt", "sol"] bench_result.each do |b| - csv << b + csv << b end end end @@ -164,7 +163,7 @@ def self.benchmark_naive_fast ont = Ontology.where.include(:acronym).all bench_result = [] ont.each do |ont| - qq =< . ?note ?id . @@ -185,11 +184,11 @@ def self.benchmark_naive_fast users = {} roles = {} count_sol = 0 - res = client.query(qq) + res = client.query(qq) res.each do |sol| unless users.include?(sol[:id]) users[sol[:id]] = User.new - users[sol[:id]].username=sol[:username] + users[sol[:id]].username = sol[:username] end unless roles.include?(sol[:roles]) roles[sol[:roles]] = Role.new @@ -201,14 +200,14 @@ def self.benchmark_naive_fast end count_sol = count_sol + 1 end - bench_result << [Time.now - start,notes.length, client.query_times.last, client.parse_times.last,count_sol ] + bench_result << [Time.now - start, notes.length, client.query_times.last, client.parse_times.last, count_sol] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_naive_fast.csv", "wb") do |csv| - csv << ["total", "notes", "qt", "pt","sol"] + csv << ["total", "notes", "qt", "pt", "sol"] bench_result.each do |b| - csv << b + csv << b end end end @@ -223,20 +222,20 @@ def self.benchmark_query_goo_fast start = Time.now notes = nil notes = Note.where(ontology: ont) - .include(:content) - .include(:owner) - .all + .include(:content) + .include(:owner) + .all num_queries = client.query_times.length - agg_parsing = client.parse_times.inject{|sum,x| sum + x } - agg_queries = client.query_times.inject{|sum,x| sum + x } - bench_result << [Time.now - start, notes.length,agg_queries,agg_parsing,num_queries ] + agg_parsing = client.parse_times.inject { |sum, x| sum + x } + agg_queries = client.query_times.inject { |sum, x| sum + x } + bench_result << [Time.now - start, notes.length, agg_queries, agg_parsing, num_queries] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_goo_fast.csv", "wb") do |csv| csv << ["total", "notes", "agg_qt", "agg_qp", "queries"] bench_result.each do |b| - csv << b + csv << b end end end @@ -246,27 +245,26 @@ def self.benchmark_query_goo client.reset_profiling ont = Ontology.where.include(:acronym).all bench_result = [] - Role.load_inmutable_instances ont.each do |ont| client.reset_profiling start = Time.now notes = nil notes = Note.where(ontology: ont) - .include(:content) - .include(owner: [ :username, :email, roles: [:code]]) - .read_only - .all + .include(:content) + .include(owner: [:username, :email, roles: [:code]]) + .read_only + .all num_queries = client.query_times.length - agg_parsing = client.parse_times.inject{|sum,x| sum + x } - agg_queries = client.query_times.inject{|sum,x| sum + x } - bench_result << [Time.now - start, notes.length,agg_queries,agg_parsing,num_queries ] + agg_parsing = client.parse_times.inject { |sum, x| sum + x } + agg_queries = client.query_times.inject { |sum, x| sum + x } + bench_result << [Time.now - start, notes.length, agg_queries, agg_parsing, num_queries] end bench_result.select! { |x| x[1] > 0 } bench_result.sort_by! { |x| x[1] } CSV.open("benchmark_goo.csv", "wb") do |csv| csv << ["total", "notes", "agg_qt", "agg_qp", "queries"] bench_result.each do |b| - csv << b + csv << b end end end diff --git a/test/app/models.rb b/test/app/models.rb index 5aeb2a2e..876b70df 100644 --- a/test/app/models.rb +++ b/test/app/models.rb @@ -1,7 +1,5 @@ require_relative '../test_case' -GooTest.configure_goo - module Test module Models diff --git a/test/app/test_app.rb b/test/app/test_app.rb index 26a88d60..4d444ad4 100644 --- a/test/app/test_app.rb +++ b/test/app/test_app.rb @@ -1,6 +1,4 @@ require_relative '../test_case' require_relative 'bioportal' -GooTest.configure_goo - binding.pry diff --git a/test/console.rb b/test/console.rb index e64d4adf..39d19aa2 100644 --- a/test/console.rb +++ b/test/console.rb @@ -1,5 +1,4 @@ require_relative "../lib/goo.rb" require_relative "./test_case.rb" -GooTest.configure_goo binding.pry diff --git a/test/data/graphdb-repo-config.ttl b/test/data/graphdb-repo-config.ttl new file mode 100644 index 00000000..9200da9a --- /dev/null +++ b/test/data/graphdb-repo-config.ttl @@ -0,0 +1,33 @@ +@prefix rdfs: . +@prefix rep: . +@prefix sail: . +@prefix xsd: . + +<#ontoportal> a rep:Repository; + rep:repositoryID "ontoportal"; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository"; + [ + "http://example.org/owlim#"; + "false"; + ""; + "true"; + "false"; + "true"; + "true"; + "32"; + "10000000"; + ""; + "true"; + ""; + "0"; + "0"; + "false"; + "file-repository"; + "rdfsplus-optimized"; + "storage"; + "false"; + sail:sailType "owlim:Sail" + ] + ]; + rdfs:label "" . \ No newline at end of file diff --git a/test/data/graphdb-test-load.nt b/test/data/graphdb-test-load.nt new file mode 100644 index 00000000..e69de29b diff --git a/test/data/virtuoso.init b/test/data/virtuoso.init new file mode 100644 index 00000000..e5f4bd85 --- /dev/null +++ b/test/data/virtuoso.init @@ -0,0 +1,240 @@ + ; + ; virtuoso.ini + ; + ; Configuration file for the OpenLink Virtuoso VDBMS Server + ; + ; To learn more about this product, or any other product in our + ; portfolio, please check out our web site at: + ; + ; http://virtuoso.openlinksw.com/ + ; + ; or contact us at: + ; + ; general.information@openlinksw.com + ; + ; If you have any technical questions, please contact our support + ; staff at: + ; + ; technical.support@openlinksw.com + ; + ; + ; Database setup + ; + [Database] + DatabaseFile = ../database/virtuoso.db + ErrorLogFile = ../database/virtuoso.log + LockFile = ../database/virtuoso.lck + TransactionFile = ../database/virtuoso.trx + xa_persistent_file = ../database/virtuoso.pxa + ErrorLogLevel = 7 + FileExtend = 200 + MaxCheckpointRemap = 2000 + Striping = 0 + TempStorage = TempDatabase + + [TempDatabase] + DatabaseFile = ../database/virtuoso-temp.db + TransactionFile = ../database/virtuoso-temp.trx + MaxCheckpointRemap = 2000 + Striping = 0 + + ; + ; Server parameters + ; + [Parameters] + ServerPort = 1111 + LiteMode = 0 + DisableUnixSocket = 1 + DisableTcpSocket = 0 + ;SSLServerPort = 2111 + ;SSLCertificate = cert.pem + ;SSLPrivateKey = pk.pem + ;X509ClientVerify = 0 + ;X509ClientVerifyDepth = 0 + ;X509ClientVerifyCAFile = ca.pem + MaxClientConnections = 10 + CheckpointInterval = 60 + O_DIRECT = 0 + CaseMode = 2 + MaxStaticCursorRows = 5000 + CheckpointAuditTrail = 0 + AllowOSCalls = 0 + SchedulerInterval = 10 + DirsAllowed = ., ../vad, /usr/share/proj + ThreadCleanupInterval = 0 + ThreadThreshold = 10 + ResourcesCleanupInterval = 0 + FreeTextBatchSize = 100000 + SingleCPU = 0 + VADInstallDir = ../vad/ + PrefixResultNames = 0 + RdfFreeTextRulesSize = 100 + IndexTreeMaps = 64 + MaxMemPoolSize = 200000000 + PrefixResultNames = 0 + MacSpotlight = 0 + MaxQueryMem = 2G ; memory allocated to query processor + VectorSize = 1000 ; initial parallel query vector (array of query operations) size + MaxVectorSize = 1000000 ; query vector size threshold. + AdjustVectorSize = 0 + ThreadsPerQuery = 4 + AsyncQueueMaxThreads = 10 + ;; + ;; When running with large data sets, one should configure the Virtuoso + ;; process to use between 2/3 to 3/5 of free system memory and to stripe + ;; storage on all available disks. + ;; + ;; Uncomment next two lines if there is 2 GB system memory free + ;NumberOfBuffers = 170000 + ;MaxDirtyBuffers = 130000 + ;; Uncomment next two lines if there is 4 GB system memory free + ;NumberOfBuffers = 340000 + ; MaxDirtyBuffers = 250000 + ;; Uncomment next two lines if there is 8 GB system memory free + ;NumberOfBuffers = 680000 + ;MaxDirtyBuffers = 500000 + ;; Uncomment next two lines if there is 16 GB system memory free + ;NumberOfBuffers = 1360000 + ;MaxDirtyBuffers = 1000000 + ;; Uncomment next two lines if there is 32 GB system memory free + ;NumberOfBuffers = 2720000 + ;MaxDirtyBuffers = 2000000 + ;; Uncomment next two lines if there is 48 GB system memory free + ;NumberOfBuffers = 4000000 + ;MaxDirtyBuffers = 3000000 + ;; Uncomment next two lines if there is 64 GB system memory free + ;NumberOfBuffers = 5450000 + ;MaxDirtyBuffers = 4000000 + ;; + ;; Note the default settings will take very little memory + ;; but will not result in very good performance + ;; + NumberOfBuffers = 10000 + MaxDirtyBuffers = 6000 + + [HTTPServer] + ServerPort = 8890 + ServerRoot = ../vsp + MaxClientConnections = 10 + DavRoot = DAV + EnabledDavVSP = 0 + HTTPProxyEnabled = 0 + TempASPXDir = 0 + DefaultMailServer = localhost:25 + MaxKeepAlives = 10 + KeepAliveTimeout = 10 + MaxCachedProxyConnections = 10 + ProxyConnectionCacheTimeout = 15 + HTTPThreadSize = 280000 + HttpPrintWarningsInOutput = 0 + Charset = UTF-8 + ;HTTPLogFile = logs/http.log + MaintenancePage = atomic.html + EnabledGzipContent = 1 + + [AutoRepair] + BadParentLinks = 0 + + [Client] + SQL_PREFETCH_ROWS = 100 + SQL_PREFETCH_BYTES = 16000 + SQL_QUERY_TIMEOUT = 0 + SQL_TXN_TIMEOUT = 0 + ;SQL_NO_CHAR_C_ESCAPE = 1 + ;SQL_UTF8_EXECS = 0 + ;SQL_NO_SYSTEM_TABLES = 0 + ;SQL_BINARY_TIMESTAMP = 1 + ;SQL_ENCRYPTION_ON_PASSWORD = -1 + + [VDB] + ArrayOptimization = 0 + NumArrayParameters = 10 + VDBDisconnectTimeout = 1000 + KeepConnectionOnFixedThread = 0 + + [Replication] + ServerName = db-BIONIC-PORT + ServerEnable = 1 + QueueMax = 50000 + + ; + ; Striping setup + ; + ; These parameters have only effect when Striping is set to 1 in the + ; [Database] section, in which case the DatabaseFile parameter is ignored. + ; + ; With striping, the database is spawned across multiple segments + ; where each segment can have multiple stripes. + ; + ; Format of the lines below: + ; Segment = , [, .. ] + ; + ; must be ordered from 1 up. + ; + ; The is the total size of the segment which is equally divided + ; across all stripes forming the segment. Its specification can be in + ; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks + ; (b, the default) + ; + ; Note that the segment size must be a multiple of the database page size + ; which is currently 8k. Also, the segment size must be divisible by the + ; number of stripe files forming the segment. + ; + ; The example below creates a 200 meg database striped on two segments + ; with two stripes of 50 meg and one of 100 meg. + ; + ; You can always add more segments to the configuration, but once + ; added, do not change the setup. + ; + [Striping] + Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + Segment2 = 100M, db-seg2-1.db + ;... + ;[TempStriping] + ;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + ;Segment2 = 100M, db-seg2-1.db + ;... + ;[Ucms] + ;UcmPath = + ;Ucm1 = + ;Ucm2 = + ;... + + [Zero Config] + ServerName = virtuoso (BIONIC-PORT) + ;ServerDSN = ZDSN + ;SSLServerName = + ;SSLServerDSN = + + [Mono] + ;MONO_TRACE = Off + ;MONO_PATH = + ;MONO_ROOT = + ;MONO_CFG_DIR = + ;virtclr.dll = + + [URIQA] + DynamicLocal = 0 + DefaultHost = localhost:8890 + + [SPARQL] + ;ExternalQuerySource = 1 + ;ExternalXsltSource = 1 + ;DefaultGraph = http://localhost:8890/dataspace + ;ImmutableGraphs = http://localhost:8890/dataspace + ResultSetMaxRows = 10000 + MaxConstructTriples = 10000 + MaxQueryCostEstimationTime = 400 ; in seconds + MaxQueryExecutionTime = 60 ; in seconds + DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100 + DeferInferenceRulesInit = 0 ; controls inference rules loading + MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited) + ;LabelInferenceName = facets ; Only needed when using the Faceted Browser + ;PingService = http://rpc.pingthesemanticweb.com/ + + [Plugins] + LoadPath = ../hosting + Load1 = plain, geos + Load2 = plain, graphql + Load3 = plain, proj4 + Load4 = plain, shapefileio \ No newline at end of file diff --git a/test/settings/test_hooks.rb b/test/settings/test_hooks.rb new file mode 100644 index 00000000..47d8fa0f --- /dev/null +++ b/test/settings/test_hooks.rb @@ -0,0 +1,50 @@ +require_relative '../test_case' + +class TestHookModel < Goo::Base::Resource + model :test_hook, name_with: lambda { |s| RDF::URI.new("http://example.org/test/#{rand(1000)}") } + after_save :update_count, :update_count_2 + after_destroy :decrease_count_2 + attribute :name, enforce: [:existence, :unique] + + attr_reader :count, :count2 + + def update_count + @count ||= 0 + @count += 1 + end + + def update_count_2 + @count2 ||= 0 + @count2 += 2 + end + + def decrease_count_2 + @count2 -= 2 + end + +end + +class TestHooksSetting < MiniTest::Unit::TestCase + + def test_model_hooks + TestHookModel.find("test").first&.delete + + model = TestHookModel.new(name: "test").save + + assert_equal 1, model.count + assert_equal 2, model.count2 + + model.name = "test2" + model.save + + assert_equal 2, model.count + assert_equal 4, model.count2 + + + model.delete + + assert_equal 2, model.count + assert_equal 2, model.count2 + + end +end diff --git a/test/solr/test_solr.rb b/test/solr/test_solr.rb new file mode 100644 index 00000000..6428bc8a --- /dev/null +++ b/test/solr/test_solr.rb @@ -0,0 +1,122 @@ +require_relative '../test_case' +require 'benchmark' + + +class TestSolr < MiniTest::Unit::TestCase + def self.before_suite + @@connector = SOLR::SolrConnector.new(Goo.search_conf, 'test') + @@connector.delete_collection('test') + @@connector.init + end + + def self.after_suite + @@connector.delete_collection('test') + end + + def test_add_collection + connector = @@connector + connector.create_collection('test2') + all_collections = connector.fetch_all_collections + assert_includes all_collections, 'test2' + end + + def test_delete_collection + connector = @@connector + test_add_collection + connector.delete_collection('test2') + + all_collections = connector.fetch_all_collections + refute_includes all_collections, 'test2' + end + + def test_schema_generator + connector = @@connector + + all_fields = connector.all_fields + + connector.schema_generator.fields_to_add.each do |f| + field = all_fields.select { |x| x["name"].eql?(f[:name]) }.first + refute_nil field + assert_equal field["type"], f[:type] + assert_equal field["indexed"], f[:indexed] + assert_equal field["stored"], f[:stored] + assert_equal field["multiValued"], f[:multiValued] + end + + copy_fields = connector.all_copy_fields + connector.schema_generator.copy_fields_to_add.each do |f| + field = copy_fields.select { |x| x["source"].eql?(f[:source]) }.first + refute_nil field + assert_equal field["source"], f[:source] + assert_includes f[:dest], field["dest"] + end + + dynamic_fields = connector.all_dynamic_fields + + connector.schema_generator.dynamic_fields_to_add.each do |f| + field = dynamic_fields.select { |x| x["name"].eql?(f[:name]) }.first + refute_nil field + assert_equal field["name"], f[:name] + assert_equal field["type"], f[:type] + assert_equal field["multiValued"], f[:multiValued] + assert_equal field["stored"], f[:stored] + end + + connector.clear_all_schema + connector.fetch_schema + all_fields = connector.all_fields + connector.schema_generator.fields_to_add.each do |f| + field = all_fields.select { |x| x["name"].eql?(f[:name]) }.first + assert_nil field + end + + copy_fields = connector.all_copy_fields + connector.schema_generator.copy_fields_to_add.each do |f| + field = copy_fields.select { |x| x["source"].eql?(f[:source]) }.first + assert_nil field + end + + dynamic_fields = connector.all_dynamic_fields + connector.schema_generator.dynamic_fields_to_add.each do |f| + field = dynamic_fields.select { |x| x["name"].eql?(f[:name]) }.first + assert_nil field + end + end + + def test_add_field + connector = @@connector + add_field('test', connector) + + + field = connector.fetch_all_fields.select { |f| f['name'] == 'test' }.first + + refute_nil field + assert_equal field['type'], 'string' + assert_equal field['indexed'], true + assert_equal field['stored'], true + assert_equal field['multiValued'], true + + connector.delete_field('test') + end + + def test_delete_field + connector = @@connector + + add_field('test', connector) + + connector.delete_field('test') + + field = connector.all_fields.select { |f| f['name'] == 'test' }.first + + assert_nil field + end + + private + + def add_field(name, connector) + if connector.fetch_field(name) + connector.delete_field(name) + end + connector.add_field(name, 'string', indexed: true, stored: true, multi_valued: true) + end +end diff --git a/test/test_basic_persistence.rb b/test/test_basic_persistence.rb index 0cafdfbd..665a5d60 100644 --- a/test/test_basic_persistence.rb +++ b/test/test_basic_persistence.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module Dep class Ontology < Goo::Base::Resource model :ontology, name_with: :name diff --git a/test/test_cache.rb b/test/test_cache.rb index aecbdef3..11dccf41 100644 --- a/test/test_cache.rb +++ b/test/test_cache.rb @@ -1,7 +1,4 @@ require_relative 'test_case' - -GooTest.configure_goo - require_relative 'models' class TestCache < MiniTest::Unit::TestCase @@ -29,6 +26,19 @@ def self.after_suite GooTestData.delete_test_case_data end + def test_cache_invalidate + address = Address.all.first + Goo.use_cache = true + puts "save 1" + University.new(name: 'test', address: [address]).save + u2 = University.new(name: 'test', address: [address]) + puts "request 1" + refute u2.valid? + expected_error = { :name => { :duplicate => "There is already a persistent resource with id `http://goo.org/default/university/test`" } } + assert_equal expected_error, u2.errors + Goo.use_cache = false + end + def test_cache_models redis = Goo.redis_client redis.flushdb @@ -52,7 +62,7 @@ def test_cache_models assert !key.nil? assert redis.exists(key) - + prg = programs.first prg.bring_remaining prg.credits = 999 @@ -103,7 +113,7 @@ def test_cache_models_back_door data = " " + " " + " ." - + Goo.sparql_data_client.append_triples(Student.type_uri,data,"application/x-turtle") programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) .include(:students).all @@ -131,11 +141,16 @@ def x.response_backup *args def x.response *args raise Exception, "Should be a successful hit" end - programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) - .include(:students).all + begin + programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) + .include(:students).all + rescue Exception + assert false, "should be cached" + end + #from cache - assert programs.length == 1 - assert_raises Exception do + assert_equal 1, programs.length + assert_raises Exception do #different query programs = Program.where(name: "BioInformatics X", university: [ name: "Stanford" ]).all end diff --git a/test/test_case.rb b/test/test_case.rb index 82d9c50e..af7f2a84 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -20,6 +20,7 @@ MiniTest::Unit.autorun require_relative "../lib/goo.rb" +require_relative '../config/config' class GooTest @@ -41,9 +42,8 @@ def _run_suites(suites, type) end def _run_suite(suite, type) - %[1,5,10,20] ret = [] - [1,5,10,20].each do |slice_size| + [Goo.slice_loading_size].each do |slice_size| puts "\nrunning test with slice_loading_size=#{slice_size}" Goo.slice_loading_size=slice_size begin @@ -59,35 +59,6 @@ def _run_suite(suite, type) MiniTest::Unit.runner = GooTest::Unit.new - def self.configure_goo - if not Goo.configure? - Goo.configure do |conf| - conf.add_redis_backend(host: "localhost") - conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) - conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) - conf.add_namespace(:owl, RDF::Vocabulary.new("http://www.w3.org/2002/07/owl#")) - conf.add_namespace(:rdfs, RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")) - conf.add_namespace(:goo, RDF::Vocabulary.new("http://goo.org/default/"),default=true) - conf.add_namespace(:metadata, RDF::Vocabulary.new("http://goo.org/metadata/")) - conf.add_namespace(:foaf, RDF::Vocabulary.new("http://xmlns.com/foaf/0.1/")) - conf.add_namespace(:rdf, RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#")) - conf.add_namespace(:tiger, RDF::Vocabulary.new("http://www.census.gov/tiger/2002/vocab#")) - conf.add_namespace(:bioportal, RDF::Vocabulary.new("http://data.bioontology.org/")) - conf.add_namespace(:nemo, RDF::Vocabulary.new("http://purl.bioontology.org/NEMO/ontology/NEMO_annotation_properties.owl#")) - conf.add_sparql_backend( - :main, - backend_name: "4store", - query: "http://localhost:9000/sparql/", - data: "http://localhost:9000/data/", - update: "http://localhost:9000/update/", - options: { rules: :NONE } - ) - conf.add_search_backend(:main, service: "http://localhost:8983/solr/term_search_core1") - conf.use_cache = false - end - end - end - def self.triples_for_subject(resource_id) rs = Goo.sparql_query_client.query("SELECT * WHERE { #{resource_id.to_ntriples} ?p ?o . }") count = 0 diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index 4f0a8676..bbf0c5ca 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -1,10 +1,8 @@ require_relative 'test_case' -GooTest.configure_goo - module TestChunkWrite - ONT_ID = "http:://example.org/data/nemo" - ONT_ID_EXTRA = "http:://example.org/data/nemo/extra" + ONT_ID = "http://example.org/data/nemo" + ONT_ID_EXTRA = "http://example.org/data/nemo/extra" class TestChunkWrite < MiniTest::Unit::TestCase @@ -20,14 +18,14 @@ def self.after_suite _delete end + def setup + self.class._delete + end + + def self._delete - graphs = [ONT_ID,ONT_ID_EXTRA] - url = Goo.sparql_data_client.url - graphs.each do |graph| - # This bypasses the chunks stuff - params = { method: :delete, url: "#{url.to_s}#{graph.to_s}", timeout: nil } - RestClient::Request.execute(params) - end + graphs = [ONT_ID, ONT_ID_EXTRA] + graphs.each { |graph| Goo.sparql_data_client.delete_graph(graph) } end def test_put_data @@ -74,20 +72,13 @@ def test_reentrant_queries ntriples_file_path = "./test/data/nemo_ontology.ntriples" # Bypass in chunks - url = Goo.sparql_data_client.url - params = { - method: :put, - url: "#{url.to_s}#{ONT_ID}", - payload: File.read(ntriples_file_path), - headers: {content_type: "application/x-turtle"}, - timeout: nil - } + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { Goo.sparql_data_client.put_triples(ONT_ID_EXTRA, ntriples_file_path, mime_type="application/x-turtle") - sleep(1.5) } + count_queries = 0 tq = Thread.new { 5.times do @@ -103,16 +94,16 @@ def test_reentrant_queries assert_equal 5, count_queries tput.join - triples_no_bnodes = 25256 + count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| - assert_equal triples_no_bnodes, sol[:c].object + assert_includes [25256, 50512], sol[:c].object end tdelete = Thread.new { Goo.sparql_data_client.delete_graph(ONT_ID_EXTRA) - sleep(1.5) } + count_queries = 0 tq = Thread.new { 5.times do @@ -124,9 +115,8 @@ def test_reentrant_queries end } tq.join - assert tdelete.alive? - assert_equal 5, count_queries tdelete.join + assert_equal 5, count_queries count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| @@ -136,16 +126,7 @@ def test_reentrant_queries def test_query_flood ntriples_file_path = "./test/data/nemo_ontology.ntriples" - - # Bypass in chunks - url = Goo.sparql_data_client.url - params = { - method: :put, - url: "#{url.to_s}#{ONT_ID}", - payload: File.read(ntriples_file_path), - headers: {content_type: "application/x-turtle"}, - timeout: nil - } + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { @@ -158,27 +139,34 @@ def test_query_flood 50.times do |j| oq = "SELECT (count(?s) as ?c) WHERE { ?s a ?o }" Goo.sparql_query_client.query(oq).each do |sol| - assert sol[:c].object > 0 + refute_equal 0, sol[:c].to_i end end } end - log_status = [] - Thread.new { - 10.times do |i| - log_status << Goo.sparql_query_client.status - sleep(1.2) + threads.join + + if Goo.backend_4s? + log_status = [] + Thread.new { + 10.times do |i| + log_status << Goo.sparql_query_client.status + end + } + + threads.each do |t| + t.join end - } + tput.join - threads.each do |t| - t.join + assert log_status.map { |x| x[:outstanding] }.max > 0 + assert_equal 16, log_status.map { |x| x[:running] }.max end - tput.join + end - assert log_status.map { |x| x[:outstanding] }.max > 0 - assert_equal 16, log_status.map { |x| x[:running] }.max + def self.params_for_backend(method, graph_name, ntriples_file_path = nil) + Goo.sparql_data_client.params_for_backend(graph_name, File.read(ntriples_file_path), "text/turtle", method) end end diff --git a/test/test_collections.rb b/test/test_collections.rb index 2177c669..65d1f46d 100644 --- a/test/test_collections.rb +++ b/test/test_collections.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - #collection on attribute class Issue < Goo::Base::Resource model :issue, collection: :owner, name_with: :description diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index 9a8f03df..3b8f493b 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -1,6 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo class NewPersonModel < Goo::Base::Resource model :person_model_new, name_with: :name @@ -64,7 +63,7 @@ class YamlSchemeModelTest < Goo::Base::Resource end -class TestDSLSeeting < MiniTest::Unit::TestCase +class TestDSLSetting < MiniTest::Unit::TestCase def initialize(*args) super(*args) end diff --git a/test/test_enum.rb b/test/test_enum.rb index db41c343..eaf13af2 100644 --- a/test/test_enum.rb +++ b/test/test_enum.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestEnum VALUES = ["uploaded","removed","archived"] diff --git a/test/test_index.rb b/test/test_index.rb index 4d781973..bf4b8937 100644 --- a/test/test_index.rb +++ b/test/test_index.rb @@ -1,8 +1,6 @@ require_relative 'test_case' require_relative './app/models' -GooTest.configure_goo - module TestIndex class TestSchemaless < MiniTest::Unit::TestCase diff --git a/test/test_inmutable.rb b/test/test_inmutable.rb deleted file mode 100644 index 9d6037c0..00000000 --- a/test/test_inmutable.rb +++ /dev/null @@ -1,102 +0,0 @@ -require_relative 'test_case' - -GooTest.configure_goo - -module TestInmutable - class Status < Goo::Base::Resource - model :status, :inmutable, name_with: :code - attribute :code, enforce: [:unique, :existence] - attribute :description, enforce: [:existence] - end - - class Person < Goo::Base::Resource - model :person, :inmutable, name_with: :name - attribute :name, enforce: [:unique, :existence] - attribute :status, enforce: [:status, :existence] - end - - class TestInmutableCase < MiniTest::Unit::TestCase - def initialize(*args) - super(*args) - end - - def setup - end - - def self.before_suite - status = ["single", "married", "divorced", "widowed"] - status.each do |st| - stt = Status.new(code: st, description: (st + " some desc")) - stt.save - end - people = [ - ["Susan","married"], - ["Lee","divorced"], - ["John","divorced"], - ["Peter","married"], - ["Christine","married"], - ["Ana","single"], - ] - people.each do |p| - po = Person.new - po.name = p[0] - po.status = Status.find(p[1]).first - po.save - end - end - - def self.after_suite - objs = [Person,Status] - objs.each do |obj| - obj.where.all.each do |st| - st.delete - end - end - end - - ## TODO inmutable are deprecated - they might come back in a different way" - def skip_test_inmutable - #they come fully loaded - Status.load_inmutable_instances - status1 = Status.where.all.sort_by { |s| s.code } - status2 = Status.where.all.sort_by { |s| s.code } - assert status1.length == 4 - assert status2.length == 4 - #same referencs - status1.each_index do |i| - assert status1[i].object_id==status2[i].object_id - end - - #create a new object - stt = Status.new(code: "xx", description: ("xx" + " some desc")) - stt.save - - status1 = Status.where.all.sort_by { |s| s.code } - status2 = Status.where.all.sort_by { |s| s.code } - assert status1.length == 5 - assert status2.length == 5 - #same referencs - status1.each_index do |i| - assert status1[i].object_id==status2[i].object_id - end - - status1.each do |st| - assert st.code - assert st.description - end - - marr = Status.find("divorced").first - assert marr.code == "divorced" - assert marr.description - assert marr.object_id == status1.first.object_id - - people = Person.where.include(:name, status: [ :code, :description ]).all - people.each do |p| - assert p.status.object_id == status1.select { |st| st.id == p.status.id }.first.object_id - assert p.status.code - assert p.status.description - end - end - - end -end diff --git a/test/test_inverse.rb b/test/test_inverse.rb index e926a572..2fbb4479 100644 --- a/test/test_inverse.rb +++ b/test/test_inverse.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class Task < Goo::Base::Resource model :task, name_with: :description attribute :description, enforce: [ :existence, :unique] diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index 8f904d8b..38a282d6 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - module TestComplex class Submission < Goo::Base::Resource @@ -48,7 +46,7 @@ def self.tree_property(*args) if collection.id.to_s["submission1"] return RDF::RDFS[:subClassOf] end - return RDF::SKOS[:broader] + return RDF::Vocab::SKOS[:broader] end def self.class_rdf_type(*args) @@ -56,7 +54,7 @@ def self.class_rdf_type(*args) if collection.id.to_s["submission1"] return RDF::OWL[:Class] end - return RDF::SKOS[:Concept] + return RDF::Vocab::SKOS[:Concept] end attribute :methodBased, namespace: :rdfs, property: :subClassOf, handler: :dataMethod @@ -78,12 +76,13 @@ def self.before_suite if GooTest.count_pattern("?s ?p ?o") > 100000 raise Exception, "Too many triples in KB, does not seem right to run tests" end - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def self.after_suite Goo.use_cache = false - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def test_method_handler @@ -185,6 +184,11 @@ def test_multiple_collection() def test_collection() + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -313,6 +317,11 @@ def test_two_resources_same_id def test_parents_inverse_children + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -320,6 +329,7 @@ def test_parents_inverse_children submission = Submission.find("submission1").first end + terms = Term.in(submission) terms.each do |t| t.delete @@ -653,6 +663,11 @@ def test_empty_pages assert_equal 0, GooTest.count_pattern("GRAPH #{submission.id.to_ntriples} { #{t.id.to_ntriples} ?p ?o . }") end + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + terms = [] 10.times do |i| term = Term.new @@ -679,6 +694,12 @@ def test_empty_pages end def test_readonly_pages_with_include + + # This call is not usually necessary as it is usually covered by + # the model declaration above. See the explanation in + # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 + Goo.add_model(:class, Term) + submission = Submission.new(name: "submission1") unless submission.exist? submission.save @@ -691,6 +712,7 @@ def test_readonly_pages_with_include assert_equal(0, GooTest.count_pattern("GRAPH #{submission.id.to_ntriples} { #{t.id.to_ntriples} ?p ?o . }")) end + terms = [] 10.times do |i| term = Term.new diff --git a/test/test_name_with.rb b/test/test_name_with.rb index 7ba4df42..c2f226a4 100644 --- a/test/test_name_with.rb +++ b/test/test_name_with.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class NameWith < Goo::Base::Resource model :name_with, name_with: lambda { |s| id_generator(s) } attribute :name, enforce: [ :existence, :string, :unique ] diff --git a/test/test_namespaces.rb b/test/test_namespaces.rb index 78ba9a93..6c4bddc0 100644 --- a/test/test_namespaces.rb +++ b/test/test_namespaces.rb @@ -1,7 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo - class NamespacesModel < Goo::Base::Resource model :namespaces, namespace: :rdfs, name_with: :name attribute :name, enforce: [ :existence, :string, :unique ], namespace: :skos diff --git a/test/test_read_only.rb b/test/test_read_only.rb index 268f7c86..9855decf 100644 --- a/test/test_read_only.rb +++ b/test/test_read_only.rb @@ -1,8 +1,6 @@ require_relative 'test_case' require_relative 'test_where' -GooTest.configure_goo - module TestReadOnly class TestReadOnlyWithStruct < TestWhere diff --git a/test/test_schemaless.rb b/test/test_schemaless.rb index f95a17d5..42084eb8 100644 --- a/test/test_schemaless.rb +++ b/test/test_schemaless.rb @@ -1,8 +1,6 @@ require_relative 'test_case' -GooTest.configure_goo - -module TestSChemaless +module TestSchemaless ONT_ID = "http:://example.org/data/nemo" @@ -118,6 +116,9 @@ def test_find_include_schemaless where = Klass.find(cognition_term).in(ontology).include(:unmapped) k = where.first enter = 0 + + assert k.unmapped.keys.include?(Goo.vocabulary(:nemo)[:definition]) + k.unmapped.each do |p,vals| if p.to_s == Goo.vocabulary(:nemo)[:synonym].to_s enter += 1 @@ -185,7 +186,19 @@ def test_index_order_by end end + + def test_all_pages_loop + ontology = Ontology.find(RDF::URI.new(ONT_ID)).first + page = 1 + count = 0 + begin + paging = Klass.in(ontology).page(page,50).all + count += paging.size + page = paging.next_page if paging.next? + end while(paging.next?) + assert_equal count, Klass.in(ontology).count + end def test_page_reuse_predicates ontology = Ontology.find(RDF::URI.new(ONT_ID)).first paging = Klass.in(ontology).include(:unmapped).page(1,100) @@ -208,7 +221,7 @@ def test_page_reuse_predicates all_ids << k.id end total += page.length - paging.page(page.next_page) if page.next? + paging.page(page.next_page, 100) if page.next? assert page.aggregate == 1713 end while(page.next?) assert all_ids.length == all_ids.uniq.length diff --git a/test/test_search.rb b/test/test_search.rb index 433dee86..0bba79d9 100644 --- a/test/test_search.rb +++ b/test/test_search.rb @@ -1,13 +1,11 @@ require_relative 'test_case' -GooTest.configure_goo - module TestSearch class TermSearch < Goo::Base::Resource - model :term_search, name_with: :id + model :term_search, name_with: lambda { |resource| uuid_uri_generator(resource) } attribute :prefLabel, enforce: [:existence] - attribute :synonym # array of strings + attribute :synonym, enforce: [:list] # array of strings attribute :definition # array of strings attribute :submissionAcronym, enforce: [:existence] attribute :submissionId, enforce: [:existence, :integer] @@ -16,6 +14,39 @@ class TermSearch < Goo::Base::Resource attribute :semanticType attribute :cui + enable_indexing(:term_search) do | schema_generator | + schema_generator.add_field(:prefLabel, 'text_general', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:synonym, 'text_general', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:definition, 'string', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:submissionAcronym, 'string', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:submissionId, 'pint', indexed: true, stored: true, multi_valued: false) + schema_generator.add_field(:cui, 'text_general', indexed: true, stored: true, multi_valued: true) + schema_generator.add_field(:semanticType, 'text_general', indexed: true, stored: true, multi_valued: true) + + # Copy fields for term search + schema_generator.add_copy_field('prefLabel', '_text_') + # for exact search + schema_generator.add_copy_field('prefLabel', 'prefLabelExact') + + # Matches whole terms in the suggest text + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggest') + + # Will match from the left of the field, e.g. if the document field + # is "A brown fox" and the query is "A bro", it will match, but not "brown" + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggestEdge') + + # Matches any word in the input field, with implicit right truncation. + # This means that the field "A brown fox" will be matched by query "bro". + # We use this to get partial matches, but these would be boosted lower than exact and left-anchored + schema_generator.add_copy_field('prefLabel', 'prefLabelSuggestNgram') + + schema_generator.add_copy_field('synonym', '_text_') + schema_generator.add_copy_field('synonym', 'synonymExact') + schema_generator.add_copy_field('synonym', 'synonymSuggest') + schema_generator.add_copy_field('synonym', 'synonymSuggestEdge') + schema_generator.add_copy_field('synonym', 'synonymSuggestNgram') + end + def index_id() "#{self.id.to_s}_#{self.submissionAcronym}_#{self.submissionId}" end @@ -25,8 +56,45 @@ def index_doc(to_set = nil) end end + class TermSearch2 < Goo::Base::Resource + model :term_search2, name_with: :prefLabel + attribute :prefLabel, enforce: [:existence], fuzzy_search: true + attribute :synonym, enforce: [:list] + attribute :definition + attribute :submissionAcronym, enforce: [:existence] + attribute :submissionId, enforce: [:existence, :integer] + attribute :private, enforce: [:boolean], default: false, index: false + # Dummy attributes to validate non-searchable files + attribute :semanticType + attribute :cui + + enable_indexing(:test_solr) + end + + class TermSearch3 < Goo::Base::Resource + model :term_search3, name_with: :prefLabel + attribute :prefLabel, enforce: [:existence] + attribute :synonym, enforce: [:list] + attribute :definition + attribute :submissionAcronym, enforce: [:existence] + attribute :submissionId, enforce: [:existence, :integer] + attribute :private, enforce: [:boolean], default: false, index: false + # Dummy attributes to validate non-searchable files + attribute :semanticType + attribute :cui + + attribute :object, enforce: [:term_search] + attribute :object_list, enforce: [:term_search, :list] + + + enable_indexing(:test_solr) + end + class TestModelSearch < MiniTest::Unit::TestCase + def self.before_suite + Goo.init_search_connections(true) + end def setup @terms = [ TermSearch.new( @@ -63,7 +131,21 @@ def setup submissionId: 2, semanticType: "Neoplastic Process", cui: "C0375111" - ) + ), + TermSearch.new( + id: RDF::URI.new("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Melanoma2"), + prefLabel: "Melanoma with cutaneous melanoma syndrome", + synonym: [ + "Cutaneous Melanoma", + "Skin Cancer", + "Malignant Melanoma" + ], + definition: "Melanoma refers to a malignant skin cancer", + submissionAcronym: "NCIT", + submissionId: 2, + semanticType: "Neoplastic Process", + cui: "C0025202" + ), ] end @@ -80,6 +162,98 @@ def test_search assert_equal @terms[1].prefLabel, resp["response"]["docs"][0]["prefLabel"] end + def test_search_filters + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id cui semanticType", + "pf"=>"prefLabelSuggest^50", + } + resp = TermSearch.search("Melanoma wi", params) + assert_equal(3, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_exact_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelExact", + } + resp = TermSearch.search("Melanoma", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[0].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_suggest_edge_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggestEdge", + } + resp = TermSearch.search("Melanoma with", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + resp = TermSearch.search("Melanoma", params) + assert_equal(2, resp["response"]["numFound"]) + assert_equal @terms[0].prefLabel, resp["response"]["docs"][0]["prefLabel"] + end + + def test_search_suggest_ngram_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggestNgram", + } + resp = TermSearch.search("cutaneous", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + resp = TermSearch.search("eous", params) + assert_equal(0, resp["response"]["numFound"]) + end + + def test_search_suggest_filter + TermSearch.indexClear + @terms[0].index + @terms[1].index + @terms[2].index + TermSearch.indexCommit + params = {"defType"=>"edismax", + "stopwords"=>"true", + "lowercaseOperators"=>"true", + "qf"=>"prefLabelSuggest", + } + resp = TermSearch.search("cutaneous test with Neoplasm Melanoma", params) + assert_equal(3, resp["response"]["numFound"]) + + + resp = TermSearch.search("mel", params) + assert_equal(0, resp["response"]["numFound"]) + end + def test_unindex TermSearch.indexClear() @terms[1].index() @@ -122,7 +296,7 @@ def test_indexBatch TermSearch.indexBatch(@terms) TermSearch.indexCommit() resp = TermSearch.search("*:*") - assert_equal 2, resp["response"]["docs"].length + assert_equal @terms.size, resp["response"]["docs"].length end def test_unindexBatch @@ -130,7 +304,7 @@ def test_unindexBatch TermSearch.indexBatch(@terms) TermSearch.indexCommit() resp = TermSearch.search("*:*") - assert_equal 2, resp["response"]["docs"].length + assert_equal @terms.size, resp["response"]["docs"].length TermSearch.unindexBatch(@terms) TermSearch.indexCommit() @@ -144,6 +318,69 @@ def test_indexClear resp = TermSearch.search("*:*") assert_equal 0, resp["response"]["docs"].length end + + def test_index_on_save_delete + TermSearch2.find("test").first&.delete + TermSearch3.find("test2").first&.delete + + term = TermSearch2.new(prefLabel: "test", + submissionId: 1, + definition: "definition of test", + synonym: ["synonym1", "synonym2"], + submissionAcronym: "test", + private: true + ) + + term2 = TermSearch3.new(prefLabel: "test2", + submissionId: 1, + definition: "definition of test2", + synonym: ["synonym1", "synonym2"], + submissionAcronym: "test", + private: true, + object: TermSearch.new(prefLabel: "test", submissionAcronym: 'acronym', submissionId: 1 ).save, + object_list: [TermSearch.new(prefLabel: "test2",submissionAcronym: 'acronym2', submissionId: 2).save, + TermSearch.new(prefLabel: "test3", submissionAcronym: 'acronym3', submissionId: 3).save] + ) + + term.save + term2.save + + # set as not indexed in model definition + refute_includes TermSearch2.search_client.fetch_all_fields.map{|f| f["name"]}, "private_b" + refute_includes TermSearch2.search_client.fetch_all_fields.map{|f| f["name"]}, "private_b" + + + indexed_term = TermSearch2.search("id:#{term.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + indexed_term2 = TermSearch3.search("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + term.indexable_object.each do |k, v| + assert_equal v, indexed_term[k.to_s] + end + + term2.indexable_object.each do |k, v| + assert_equal v, indexed_term2[k.to_s] + end + + term2.definition = "new definition of test2" + term2.synonym = ["new synonym1", "new synonym2"] + term2.save + + indexed_term2 = TermSearch3.search("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + term2.indexable_object.each do |k, v| + assert_equal v, indexed_term2[k.to_s] + end + + term2.delete + term.delete + + indexed_term = TermSearch2.submit_search_query("id:#{term.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + indexed_term2 = TermSearch3.submit_search_query("id:#{term2.id.to_s.gsub(":", "\\:")}")["response"]["docs"].first + + assert_nil indexed_term + assert_nil indexed_term2 + + end end end diff --git a/test/test_validators.rb b/test/test_validators.rb index 5110da80..e5c3a9fe 100644 --- a/test/test_validators.rb +++ b/test/test_validators.rb @@ -1,6 +1,5 @@ require_relative 'test_case' -GooTest.configure_goo require_relative 'models' class Person < Goo::Base::Resource diff --git a/test/test_where.rb b/test/test_where.rb index c80fed33..a95131b6 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -1,7 +1,4 @@ require_relative 'test_case' - -GooTest.configure_goo - require_relative 'models' class TestWhere < MiniTest::Unit::TestCase @@ -73,7 +70,7 @@ def test_where_simple "http://example.org/program/Stanford/CompSci", "http://example.org/program/Stanford/Medicine" ] - assert_equal program_ids, st.programs.map { |x| x.id.to_s }.sort + assert_equal program_ids, st.programs.map { |x| x.id.to_s }.sort end def test_all @@ -104,7 +101,7 @@ def test_where_2levels programs = Program.where(name: "BioInformatics", university: [ address: [ country: "UK" ]]).all assert programs.length == 1 assert programs.first.id.to_s["Southampton/BioInformatics"] - + #any program from universities in the US programs = Program.where(university: [ address: [ country: "US" ]]).include([:name]).all assert programs.length == 3 @@ -121,15 +118,15 @@ def test_where_2levels_inverse #equivalent unis = University.where(address: [country: "US"]) - .and(programs: [category: [code: "Biology"]]).all + .and(programs: [category: [code: "Biology"]]).all assert unis.length == 1 assert unis.first.id.to_s == "http://goo.org/default/university/Stanford" end def test_embed_include programs = Program.where.include(:name) - .include(university: [:name]) - .include(category: [:code]).all + .include(university: [:name]) + .include(category: [:code]).all assert programs.length == 9 programs.each do |p| @@ -180,7 +177,7 @@ def test_iterative_include_in_place #two levels unis = University.where.all unis_return = University.where.models(unis) - .include(programs: [:name, students: [:name]]).to_a + .include(programs: [:name, students: [:name]]).to_a assert unis_return.object_id == unis.object_id return_object_id = unis.map { |x| x.object_id }.uniq.sort unis_object_id = unis.map { |x| x.object_id }.uniq.sort @@ -262,7 +259,18 @@ def test_embed_two_levels end end + def test_fetch_remaining + students = Student.where(enrolled:RDF::URI.new("http://example.org/program/Stanford/BioInformatics")) + .include(:name, :birth_date, enrolled: [:name]).all + + + s = students.select { |x| x.name['Daniel'] }.first + refute_nil s + assert_equal 2, s.enrolled.size + end + def test_paging_with_filter_order + skip('pagination with filter and order does not work in 4s') if Goo.backend_4s? f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') total_count = Student.where.filter(f).count @@ -276,6 +284,14 @@ def test_paging_with_filter_order assert_equal total_count, page_1.size + page_2.size end + def test_two_level_include + programs = Program.where.include(:name).all + r = Program.where.models(programs).include(students: [:name]).all + r.each do |p| + refute_nil p.students + end + end + def test_unique_object_references #NOTE: unique references does not apply across different slice loading @@ -335,7 +351,7 @@ def test_unique_object_references def test_complex_include #Students in a university by name students = Student.where(enrolled: [university: [name: "Stanford"]]) - .include(:name) + .include(:name) .include(enrolled: [:name, university: [ :address ]]).all assert students.map { |x| x.name }.sort == ["Daniel","John","Susan"] @@ -345,7 +361,7 @@ def test_complex_include assert_instance_of University, p.university assert_instance_of Array, p.university.addresses assert_instance_of Address, p.university.addresses.first - assert_raises Goo::Base::AttributeNotLoaded do + assert_raises Goo::Base::AttributeNotLoaded do p.university.addresses.first.country end end @@ -405,23 +421,23 @@ def test_where_union_pattern def test_where_direct_attributes st = Student.where(name: "Daniel") - .or(name: "Louis") - .or(name: "Lee") - .or(name: "John").all + .or(name: "Louis") + .or(name: "Lee") + .or(name: "John").all assert st.length == 4 st = Student.where(name: "Daniel") - .and(name: "John").all + .and(name: "John").all assert st.length == 0 st = Student.where(name: "Daniel") - .and(birth_date: DateTime.parse('1978-01-04')).all + .and(birth_date: DateTime.parse('1978-01-04')).all assert st.length == 1 assert st.first.id.to_s["Daniel"] st = Student.where(name: "Daniel") - .or(name: "Louis") - .and(birth_date: DateTime.parse('1978-01-04')) + .or(name: "Louis") + .and(birth_date: DateTime.parse('1978-01-04')) assert st.length == 1 assert st.first.id.to_s["Daniel"] @@ -457,8 +473,8 @@ def test_combine_where_patterns_with_include st.each do |p| assert (p.name == "Susan" || p.name == "Daniel") assert Array, p.enrolled - assert (p.name == "Susan" && p.enrolled.length == 1) || - (p.name == "Daniel" && p.enrolled.length == 2) + assert (p.name == "Susan" && p.enrolled.length == 1) || + (p.name == "Daniel" && p.enrolled.length == 2) assert String, p.enrolled.first.university.address.first.country end end @@ -470,31 +486,31 @@ def test_filter f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == ["http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Lee", - "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Robert"] + "http://goo.org/default/student/Lee", + "http://goo.org/default/student/Louis", + "http://goo.org/default/student/Robert"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse('1978-01-01')) .or(Goo::Filter.new(:birth_date) >= DateTime.parse('1978-01-07')) st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Robert", - "http://goo.org/default/student/Susan"] + "http://goo.org/default/student/Robert", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse('1978-01-01')) .or(Goo::Filter.new(:name) == "Daniel") st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Susan"] + "http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-02')) .and(Goo::Filter.new(:birth_date) < DateTime.parse('1978-01-06')) st = Student.where.filter(f).all assert st.map { |x| x.id.to_s }.sort == [ - "http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Tim"] + "http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Louis", + "http://goo.org/default/student/Tim"] f = Goo::Filter.new(enrolled: [ :credits ]) > 8 @@ -504,8 +520,8 @@ def test_filter #students without awards f = Goo::Filter.new(:awards).unbound st = Student.where.filter(f) - .include(:name) - .all + .include(:name) + .all assert st.map { |x| x.name }.sort == ["John","Tim","Louis","Lee","Robert"].sort #unbound on some non existing property @@ -540,7 +556,7 @@ def test_aggregated sts = Student.where.include(:name).aggregate(:count, :enrolled).all sts.each do |st| assert (st.name == "Daniel" && st.aggregates.first.value == 2) || - st.aggregates.first.value == 1 + st.aggregates.first.value == 1 end #students enrolled in more than 1 program and get the programs name From a95245b8c964431505ca6315907440996c59a00d Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 28 Jun 2024 11:42:10 +0200 Subject: [PATCH 065/106] Fix: show multiple languages tags (#61) * add multilingual support tests * update the lang filter module to include not tagged values if multiple languages selected --- lib/goo/sparql/mixins/solution_lang_filter.rb | 26 +++--- lib/goo/sparql/solutions_mapper.rb | 13 ++- test/data/languages.nt | 9 ++ test/test_languages_filters.rb | 91 +++++++++++++++++++ 4 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 test/data/languages.nt create mode 100644 test/test_languages_filters.rb diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index 32660cca..a007a3c5 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -50,9 +50,9 @@ def models_unmapped_to_array(m) def set_value(model, predicate, value, &block) language = object_language(value) - - if requested_lang.eql?(:ALL) || !literal?(value) || (language_match?(language) && can_add_new_value(model,predicate, language)) - block.call + + if requested_lang.eql?(:ALL) || !literal?(value) || (language_match?(language) && can_add_new_value?(model,predicate, language)) + block.call end if requested_lang.eql?(:ALL) || requested_lang.is_a?(Array) @@ -62,7 +62,7 @@ def set_value(model, predicate, value, &block) end - def can_add_new_value(model, predicate, new_language) + def can_add_new_value?(model, predicate, new_language) old_val = model.send(predicate) rescue nil list_attributes?(predicate) || old_val.blank? || !no_lang?(new_language) end @@ -70,7 +70,7 @@ def can_add_new_value(model, predicate, new_language) def no_lang?(language) language.nil? || language.eql?(:no_lang) end - + def model_group_by_lang(model) unmapped = model.unmapped cpy = {} @@ -86,9 +86,9 @@ def group_by_lang(values) return values.to_a if values.all?{|x| x.is_a?(RDF::URI) || !x.respond_to?(:language) } - values = values.group_by { |x| x.respond_to?(:language) && x.language ? x.language.to_s.downcase : :none } + values = values.group_by { |x| x.respond_to?(:language) && x.language ? x.language.to_s.downcase.to_sym : "@none" } - no_lang = values[:none] || [] + no_lang = values["@none"] || [] return no_lang if !no_lang.empty? && no_lang.all? { |x| x.respond_to?(:plain?) && !x.plain? } values @@ -103,9 +103,9 @@ def language_match?(language) # no_lang means that the object is not a literal return true if language.eql?(:no_lang) - return requested_lang.include?(language) if requested_lang.is_a?(Array) + return requested_lang.include?(language.upcase) if requested_lang.is_a?(Array) - language&.upcase.eql?(requested_lang) + language.upcase.eql?(requested_lang) end def literal?(object) @@ -114,7 +114,7 @@ def literal?(object) def store_objects_by_lang(id, predicate, object, language) # store objects in this format: [id][predicate][language] = [objects] - return if requested_lang.is_a?(Array) && !requested_lang.include?(language) + return if requested_lang.is_a?(Array) && !requested_lang.include?(language.upcase) && !language.eql?('@none') language_key = language.downcase @@ -172,7 +172,9 @@ def show_all_languages? def get_language(languages) languages = portal_language if languages.nil? || languages.empty? - lang = languages.to_s.split(',').map { |l| l.upcase.to_sym } + lang = languages + lang = languages.to_s.split(',') unless lang.is_a?(Array) + lang = lang.map { |l| l.upcase.to_sym } lang.length == 1 ? lang.first : lang end @@ -183,4 +185,4 @@ def portal_language end end end -end +end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 18ad4f06..090db691 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -28,8 +28,17 @@ def map_each_solutions(select) objects_new = {} list_attributes = Set.new(@klass.attributes(:list)) - @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, - list_attributes: list_attributes) + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang], unmapped: @unmapped, + list_attributes: list_attributes) + + if @options[:page] + # for using prefixes before queries + # mdorf, 7/27/2023, AllegroGraph supplied a patch (rfe17161-7.3.1.fasl.patch) + # that enables implicit internal ordering. The patch requires the prefix below + select.prefix('franzOption_imposeImplicitBasicOrdering: ') + # mdorf, 1/24/2024, AllegroGraph 8 introduced a new feature that allows caching OFFSET/LIMIT queries + select.prefix('franzOption_allowCachingResults: ') + end select.each_solution do |sol| diff --git a/test/data/languages.nt b/test/data/languages.nt new file mode 100644 index 00000000..faf464f0 --- /dev/null +++ b/test/data/languages.nt @@ -0,0 +1,9 @@ + . + "John Doe"@en . + "Jean Dupont"@fr . + "Juan Pérez" . + . + "Paris"@en . + "Paris"@fr . + "París"@es . + "Berlin" . diff --git a/test/test_languages_filters.rb b/test/test_languages_filters.rb new file mode 100644 index 00000000..9fa0ccff --- /dev/null +++ b/test/test_languages_filters.rb @@ -0,0 +1,91 @@ +require_relative "test_case" +require_relative './app/models' + + + +class ExamplePerson < Goo::Base::Resource + model :person, namespace: :bioportal, name_with: lambda { |k| k.id }, + collection: :db + attribute :db, enforce: [ :database ] + attribute :label, namespace: :rdf, enforce: [ :list ] +end + +class ExamplePlace < Goo::Base::Resource + model :place, namespace: :bioportal, name_with: lambda { |k| k.id }, + collection: :db + attribute :db, enforce: [ :database ] + attribute :label, namespace: :rdf, enforce: [ :list ] +end + +class TestLanguageFilter < MiniTest::Unit::TestCase + def self.before_suite + RequestStore.store[:requested_lang] = Goo.main_languages.first + graph = RDF::URI.new(Test::Models::DATA_ID) + + database = Test::Models::Database.new + database.id = graph + database.name = "Census tiger 2002" + database.save + + @@db = Test::Models::Database.find(RDF::URI.new(Test::Models::DATA_ID)).first + @@person_id = RDF::URI.new "http://data.bioontology.org/resource1" + + + ntriples_file_path = "./test/data/languages.nt" + + Goo.sparql_data_client.put_triples( + graph, + ntriples_file_path, + mime_type = "application/x-turtle") + end + + def self.after_suite + graph = RDF::URI.new(Test::Models::DATA_ID) + Goo.sparql_data_client.delete_graph(graph) + database = Test::Models::Database.find(RDF::URI.new(Test::Models::DATA_ID)).first + database.delete if database + RequestStore.store[:requested_lang] = Goo.main_languages.first + end + + def setup + RequestStore.store[:requested_lang] = Goo.main_languages.first + end + + def test_one_language + # by default english and not tagged values + person = ExamplePerson.find(@@person_id).in(@@db).include(:label).first + assert_equal ["John Doe", "Juan Pérez"].sort, person.label.sort + + + # select french, return french values and not tagged values + RequestStore.store[:requested_lang] = :fr + person = ExamplePerson.find(@@person_id).in(@@db).include(:label).first + assert_equal ["Jean Dupont", "Juan Pérez"].sort, person.label.sort + + end + + def test_multiple_languages + # select all languages + RequestStore.store[:requested_lang] = :all + expected_result = {:en=>["John Doe"], :fr=>["Jean Dupont"], "@none"=>["Juan Pérez"]} + person = ExamplePerson.find(@@person_id).in(@@db).include(:label).first + assert_equal expected_result.values.flatten.sort, person.label.sort + + # using include_languages on any attribute returns an hash of {language: values} instead of the array of values + assert_equal expected_result, person.label(include_languages: true) + + # filter only french, english and not tagged values + RequestStore.store[:requested_lang] = [:fr, :en] + person = ExamplePerson.find(@@person_id).in(@@db).include(:label).first + assert_equal expected_result.values.flatten.sort.sort, person.label.sort + assert_equal expected_result, person.label(include_languages: true) + end + + + def test_language_not_found + RequestStore.store[:requested_lang] = :ar + person = ExamplePerson.find(@@person_id).in(@@db).include(:label).first + # will return only not tagged values if existent + assert_equal ["Juan Pérez"], person.label + end +end From 6018b33373467b778744432ec78a6d814159d129 Mon Sep 17 00:00:00 2001 From: Syphax Date: Fri, 18 Oct 2024 09:54:55 +0200 Subject: [PATCH 066/106] fix virtuoso unicode spaces issues --- lib/goo/sparql/query_builder.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index d41ced10..a6e03a58 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -159,7 +159,7 @@ def ids_filter(ids) filter_id = [] ids.each do |id| - filter_id << "?id = #{id.to_ntriples.to_s.gsub(' ', '%20')}" + filter_id << "?id = #{id.to_ntriples.to_s.gsub(' ', '%20').gsub("\\u0020", '%20')}" end filter_id_str = filter_id.join ' || ' @query.filter filter_id_str @@ -391,7 +391,7 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, end filter_operations << ( "#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + - " #{value.to_ntriples}") + " #{value.to_ntriples.to_s.gsub("\\u0020", "%20")}") end end From f8ac7b00e8d8b46d1eea04de014175525c1cdd83 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 24 Oct 2024 04:22:03 +0200 Subject: [PATCH 067/106] Feature: small refactoring to add portal_language helper (#63) --- lib/goo.rb | 4 ++++ lib/goo/sparql/mixins/solution_lang_filter.rb | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/goo.rb b/lib/goo.rb index a1399a84..1fdb985b 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -337,6 +337,10 @@ def self.sparql_backend_name(name=:main) return @@sparql_backends[name][:backend_name] end + def self.portal_language + @@main_languages.first.downcase.to_sym + end + def self.id_prefix return @@id_prefix end diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index a007a3c5..c291bea0 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -171,17 +171,13 @@ def show_all_languages? end def get_language(languages) - languages = portal_language if languages.nil? || languages.empty? + languages = Goo.portal_language if languages.nil? || languages.empty? lang = languages lang = languages.to_s.split(',') unless lang.is_a?(Array) lang = lang.map { |l| l.upcase.to_sym } lang.length == 1 ? lang.first : lang end - def portal_language - Goo.main_languages.first - end - end end end From 624387774e01e5042d4fa1cb8e5d8f9ea7a457e9 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 15 Jan 2025 23:22:53 +0100 Subject: [PATCH 068/106] Fix: implement an alertamnitvie to :ask queries to be compatible on all tripes stores (#64) * handle the case were we don't use the indexation feature * change ask to select for better compatibility with virtuoso --- Gemfile.lock | 46 ++++++++++++++++-------------- lib/goo.rb | 2 +- lib/goo/base/resource.rb | 4 +-- lib/goo/base/settings/attribute.rb | 1 + lib/goo/search/search.rb | 8 +++--- lib/goo/sparql/queries.rb | 15 +++++----- mise.toml | 2 ++ 7 files changed, 42 insertions(+), 36 deletions(-) create mode 100644 mise.toml diff --git a/Gemfile.lock b/Gemfile.lock index dba79705..706b5eef 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: c96da3ad479724a31ccd6217ab9939dddfaca40e + revision: 59251e59346c9a69a67c88552ba55a1244eec602 branch: development specs: sparql-client (3.2.2) @@ -32,16 +32,16 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.6) - public_suffix (>= 2.0.2, < 6.0) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) base64 (0.2.0) - builder (3.2.4) + builder (3.3.0) coderay (1.1.3) - concurrent-ruby (1.2.3) - connection_pool (2.4.1) + concurrent-ruby (1.3.5) + connection_pool (2.5.0) cube-ruby (0.0.3) daemons (1.4.1) - docile (1.4.0) + docile (1.4.1) domain_name (0.6.20240107) eventmachine (1.2.7) faraday (2.7.11) @@ -49,32 +49,34 @@ GEM faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) faraday-net_http (3.0.2) - ffi (1.16.3) + ffi (1.17.1) htmlentities (4.3.4) http-accept (1.7.0) - http-cookie (1.0.5) + http-cookie (1.0.8) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) link_header (0.0.8) + logger (1.6.5) macaddr (1.7.2) systemu (~> 2.6.5) method_source (1.1.0) - mime-types (3.5.2) + mime-types (3.6.0) + logger mime-types-data (~> 3.2015) - mime-types-data (3.2024.0305) + mime-types-data (3.2025.0107) minitest (4.7.5) multi_json (1.15.0) - mustermann (3.0.0) + mustermann (3.0.3) ruby2_keywords (~> 0.0.1) - net-http-persistent (4.0.2) + net-http-persistent (4.0.5) connection_pool (~> 2.2) netrc (0.11.0) - pry (0.14.2) + pry (0.15.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.5) - rack (2.2.9) + public_suffix (5.1.1) + rack (2.2.10) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) @@ -98,18 +100,18 @@ GEM rdf-xsd (3.2.1) rdf (~> 3.2) rexml (~> 3.2) - redis (5.2.0) + redis (5.3.0) redis-client (>= 0.22.0) - redis-client (0.22.1) + redis-client (0.23.1) connection_pool - request_store (1.6.0) + request_store (1.7.0) rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rexml (3.2.6) + rexml (3.4.0) rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) @@ -121,7 +123,7 @@ GEM simplecov-cobertura (2.1.0) rexml simplecov (~> 0.19) - simplecov-html (0.12.3) + simplecov-html (0.13.1) simplecov_json_formatter (0.1.4) sinatra (3.2.0) mustermann (~> 3.0) @@ -134,7 +136,7 @@ GEM eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thread_safe (0.3.6) - tilt (2.3.0) + tilt (2.6.0) tzinfo (0.3.62) uuid (2.3.9) macaddr (~> 1.0) diff --git a/lib/goo.rb b/lib/goo.rb index 1fdb985b..3c7a45e5 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -257,6 +257,7 @@ def self.configure yield self configure_sanity_check + init_search_connections @@namespaces.freeze @@ -410,4 +411,3 @@ def call(env) Goo::Filter = Goo::Base::Filter Goo::Pattern = Goo::Base::Pattern Goo::Collection = Goo::Base::Collection - diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 5c829d8e..e92c7d60 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -149,9 +149,9 @@ def delete(*args) raise ArgumentError, "This object is not persistent and cannot be deleted" if !@persistent - if !fully_loaded? + unless fully_loaded? missing = missing_load_attributes - options_load = { models: [ self ], klass: self.class, :include => missing } + options_load = { models: [self], klass: self.class, :include => missing } options_load[:collection] = self.collection if self.class.collection_opts Goo::SPARQL::Queries.model_load(options_load) end diff --git a/lib/goo/base/settings/attribute.rb b/lib/goo/base/settings/attribute.rb index dbf52b78..ff925e4d 100644 --- a/lib/goo/base/settings/attribute.rb +++ b/lib/goo/base/settings/attribute.rb @@ -188,6 +188,7 @@ def inverse_opts(attr) end def attribute_uri(attr, *args) + attr = attr.to_sym if attr == :id raise ArgumentError, ":id cannot be treated as predicate for .where, use find " end diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index b0cccfce..80da1537 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -17,7 +17,7 @@ def index(connection_name = nil, to_set = nil) connection_name ||= self.class.search_collection_name unindex(connection_name) - self.class.search_client(connection_name).index_document(document) + self.class.search_client(connection_name)&.index_document(document) end def index_update(attributes_to_update, connection_name = nil, to_set = nil) @@ -45,7 +45,7 @@ def index_update(attributes_to_update, connection_name = nil, to_set = nil) def unindex(connection_name = nil) connection_name ||= self.class.search_collection_name - self.class.search_client(connection_name).delete_by_id(index_id) + self.class.search_client(connection_name)&.delete_by_id(index_id) end # default implementation, should be overridden by child class @@ -173,11 +173,11 @@ def unindexBatch(collection, connection_name = search_collection_name) end def unindexByQuery(query, connection_name = search_collection_name) - search_client(connection_name).delete_by_query(query) + search_client(connection_name)&.delete_by_query(query) end def indexCommit(attrs = nil, connection_name = search_collection_name) - search_client(connection_name).index_commit(attrs) + search_client(connection_name)&.index_commit(attrs) end def indexOptimize(attrs = nil, connection_name = search_collection_name) diff --git a/lib/goo/sparql/queries.rb b/lib/goo/sparql/queries.rb index 4e85d52e..e699a4e4 100644 --- a/lib/goo/sparql/queries.rb +++ b/lib/goo/sparql/queries.rb @@ -8,10 +8,10 @@ module Queries def self.duplicate_attribute_value?(model,attr,store=:main) value = model.instance_variable_get("@#{attr}") if !value.instance_of? Array - so = Goo.sparql_query_client(store).ask.from(model.graph). - whether([:id, model.class.attribute_uri(attr), value]). + so = Goo.sparql_query_client(store).select(:id).from(model.graph). + where([:id, model.class.attribute_uri(attr), value]). filter("?id != #{model.id.to_ntriples}") - return so.true? + return !so.solutions.empty? else #not yet support for unique arrays end @@ -45,11 +45,12 @@ def self.graph_predicates(*graphs) def self.model_exist(model,id=nil,store=:main) id = id || model.id - so = Goo.sparql_query_client(store).ask.from(model.graph). - whether([id, RDF.type, model.class.uri_type(model.collection)]) - return so.true? - end + so = Goo.sparql_query_client(store).select(:id).from(model.graph). + where([:id, RDF.type, model.class.uri_type(model.collection)]) + .filter("?id = #{id.to_ntriples}") + return !so.solutions.empty? + end def self.model_load(*options) Goo::SPARQL::Loader.model_load(*options) diff --git a/mise.toml b/mise.toml new file mode 100644 index 00000000..83aa57a8 --- /dev/null +++ b/mise.toml @@ -0,0 +1,2 @@ +[tools] +ruby = "2.7.8" From 8d108c23a043039e9675b36f8f444d29a87b11fe Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 17 Jan 2025 10:00:10 +0100 Subject: [PATCH 069/106] Feature: Add Triple store queries logging (#65) * handle the case were we don't use the indexation feature * change ask to select for better compatibility with virtuoso * update append triples function to catch errors and log them * put the number of appended chunks to 50 000 line by chunck --- Gemfile.lock | 4 +- lib/goo/sparql/client.rb | 88 +++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 706b5eef..3fe30043 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: 59251e59346c9a69a67c88552ba55a1244eec602 + revision: 24bccbd0f4a5150fa6ce2af50d7c378c681027ea branch: development specs: sparql-client (3.2.2) @@ -102,7 +102,7 @@ GEM rexml (~> 3.2) redis (5.3.0) redis-client (>= 0.22.0) - redis-client (0.23.1) + redis-client (0.23.2) connection_pool request_store (1.7.0) rack (>= 1.4) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index cf958398..9701d131 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -14,7 +14,6 @@ class Client < RSPARQL::Client "text/x-nquads" => "nquads" } - def status_based_sleep_time(operation) sleep(0.5) st = self.status @@ -43,65 +42,90 @@ def initialize(g, silent: false) @caching_options = { :graph => @graph.to_s } @silent = silent end + def to_s "DROP #{@silent ? 'SILENT' : ''} GRAPH <#{@graph.to_s}>" end + def options - #Returns the caching option + # Returns the caching option @caching_options end end - def bnodes_filter_file(file_path,mime_type) + def bnodes_filter_file(file_path, mime_type) mime_type = "application/rdf+xml" if mime_type.nil? format = MIMETYPE_RAPPER_MAP[mime_type] if format.nil? raise Exception, "mime_type #{mime_type} not supported in slicing" end dir = Dir.mktmpdir("file_nobnodes") - dst_path = File.join(dir,"data.nt") - dst_path_bnodes_out = File.join(dir,"data_no_bnodes.nt") + dst_path = File.join(dir, "data.nt") + dst_path_bnodes_out = File.join(dir, "data_no_bnodes.nt") out_format = format == "nquads" ? "nquads" : "ntriples" rapper_command_call = "rapper -i #{format} -o #{out_format} #{file_path} > #{dst_path}" - stdout,stderr,status = Open3.capture3(rapper_command_call) + stdout, stderr, status = Open3.capture3(rapper_command_call) if not status.success? raise Exception, "Rapper cannot parse #{format} file at #{file_path}: #{stderr}" end filter_command = "LANG=C grep -v '_:genid' #{dst_path} > #{dst_path_bnodes_out}" - stdout,stderr,status = Open3.capture3(filter_command) + stdout, stderr, status = Open3.capture3(filter_command) if not status.success? raise Exception, "could not `#{filter_command}`: #{stderr}" end - return dst_path_bnodes_out,dir + return dst_path_bnodes_out, dir end def delete_data_graph(graph) Goo.sparql_update_client.update(DropGraph.new(graph, silent: Goo.backend_vo?)) end - def append_triples_no_bnodes(graph,file_path,mime_type_in) - bnodes_filter = nil + def append_triples_batch(graph, triples, mime_type_in, current_line = 0) + begin + puts "Appending triples in batch of #{triples.size} triples from line #{current_line}" + execute_append_request graph, triples.join, mime_type_in + rescue RestClient::Exception => e + puts "Error in appending triples request: #{e.response}" + if triples.size < 100 + triples.each_with_index do |line, i| + begin + execute_append_request graph, line, mime_type_in + rescue RestClient::Exception => e + puts "Error in append request: #{e.response} line #{i + current_line}: #{line}" + end + end + else + half = triples.size / 2 + append_triples_batch(graph, triples[0..half], mime_type_in, current_line) + append_triples_batch(graph, triples[half..-1], mime_type_in, current_line + half) + end + + end + end + + def append_triples_no_bnodes(graph, file_path, mime_type_in) dir = nil response = nil - if file_path.end_with?('ttl') + if file_path.end_with?('ttl') || file_path.end_with?('nt') || file_path.end_with?('n3') bnodes_filter = file_path else bnodes_filter, dir = bnodes_filter_file(file_path, mime_type_in) end - chunk_lines = 500_000 # number of line + chunk_lines = 50_000 # number of line file = File.foreach(bnodes_filter) lines = [] + line_count = 0 file.each_entry do |line| lines << line if lines.size == chunk_lines - response = execute_append_request graph, lines.join, mime_type_in + response = append_triples_batch(graph, lines, mime_type_in, line_count) + line_count += lines.size lines.clear end end - response = execute_append_request graph, lines.join, mime_type_in unless lines.empty? - + response = append_triples_batch(graph, lines, mime_type_in, line_count) unless lines.empty? unless dir.nil? File.delete(bnodes_filter) @@ -115,43 +139,43 @@ def append_triples_no_bnodes(graph,file_path,mime_type_in) response end - def append_data_triples(graph,data,mime_type) + def append_data_triples(graph, data, mime_type) f = Tempfile.open('data_triple_store') f.write(data) f.close() - res = append_triples_no_bnodes(graph,f.path,mime_type) + res = append_triples_no_bnodes(graph, f.path, mime_type) return res end - def put_triples(graph,file_path,mime_type=nil) + def put_triples(graph, file_path, mime_type = nil) delete_graph(graph) - result = append_triples_no_bnodes(graph,file_path,mime_type) - Goo.sparql_query_client.cache_invalidate_graph(graph) + result = append_triples_no_bnodes(graph, file_path, mime_type) + Goo.sparql_query_client.cache.invalidate(graph) result end - def append_triples(graph,data,mime_type=nil) - result = append_data_triples(graph,data,mime_type) - Goo.sparql_query_client.cache_invalidate_graph(graph) + def append_triples(graph, data, mime_type = nil) + result = append_data_triples(graph, data, mime_type) + Goo.sparql_query_client.cache.invalidate(graph) result end - def append_triples_from_file(graph,file_path,mime_type=nil) + def append_triples_from_file(graph, file_path, mime_type = nil) if mime_type == "text/nquads" && !graph.instance_of?(Array) raise Exception, "Nquads need a list of graphs, #{graph} provided" end - result = append_triples_no_bnodes(graph,file_path,mime_type) - Goo.sparql_query_client.cache_invalidate_graph(graph) + result = append_triples_no_bnodes(graph, file_path, mime_type) + Goo.sparql_query_client.cache.invalidate(graph) result end def delete_graph(graph) result = delete_data_graph(graph) - Goo.sparql_query_client.cache_invalidate_graph(graph) + Goo.sparql_query_client.cache.invalidate(graph) return result end - def extract_number_from(i,text) + def extract_number_from(i, text) res = [] while (text[i] != '<') res << text[i] @@ -167,7 +191,7 @@ def status resp_text = nil begin - resp_text = Net::HTTP.get(URI(status_url)) + resp_text = Net::HTTP.get(URI(status_url)) rescue StandardError => e resp[:exception] = "Error connecting to triple store: #{e.class}: #{e.message}\n#{e.backtrace.join("\n\t")}" return resp @@ -192,7 +216,7 @@ def params_for_backend(graph, data_file, mime_type_in, method = :post) graph = "http://data.bogus.graph/uri" end - params = {method: method, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} + params = { method: method, url: "#{url.to_s}", headers: { "content-type" => mime_type, "mime-type" => mime_type }, timeout: nil } if Goo.backend_4s? params[:payload] = { @@ -200,8 +224,8 @@ def params_for_backend(graph, data_file, mime_type_in, method = :post) data: data_file, 'mime-type' => mime_type } - #for some reason \\\\ breaks parsing - params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") + # for some reason \\\\ breaks parsing + params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\", "") }.join("\n") elsif Goo.backend_vo? params[:url] = "http://localhost:8890/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" params[:payload] = data_file From b8eb3d0889d00b5aebb3d49deb00dfe398ad166b Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Tue, 21 Jan 2025 21:53:28 +0100 Subject: [PATCH 070/106] Feature: Add paginated all function for resources and logging config and tests (#67) * handle the case were we don't use the indexation feature * change ask to select for better compatibility with virtuoso * update append triples function to catch errors and log them * put the number of appended chunks to 50 000 line by chunck * update chunks insert for virtuoso to be 50k lines instead of 500k to prevent crashing * implement mode paginated_all that will get all the resources using pagination * add query logging configuration and remove cube * add logging test --- .gitignore | 5 +++- Gemfile.lock | 2 +- lib/goo.rb | 60 +++++++++++++++++++++++----------------- lib/goo/base/where.rb | 20 +++++++++++++- lib/goo/config/config.rb | 6 +++- lib/goo/sparql/client.rb | 9 +++++- test/test_logging.rb | 54 ++++++++++++++++++++++++++++++++++++ 7 files changed, 125 insertions(+), 31 deletions(-) create mode 100644 test/test_logging.rb diff --git a/.gitignore b/.gitignore index 5dcefa7c..3f0f3286 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,7 @@ doc/ .idea/* projectFilesBackup/* -config/config.rb \ No newline at end of file +config/config.rb +queries.txt + +*.iml diff --git a/Gemfile.lock b/Gemfile.lock index 3fe30043..e5d8d339 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: 24bccbd0f4a5150fa6ce2af50d7c378c681027ea + revision: d4737ff08f33517cf93b4d82c78a471017991d97 branch: development specs: sparql-client (3.2.2) diff --git a/lib/goo.rb b/lib/goo.rb index 3c7a45e5..41b2e808 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -52,7 +52,8 @@ module Goo @@uuid = UUID.new @@debug_enabled = false @@use_cache = false - + @@query_logging = false + @@query_logging_file = './queries.log' @@slice_loading_size = 500 @@ -122,7 +123,8 @@ def self.add_sparql_backend(name, *opts) headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, read_timeout: 10000, validate: false, - redis_cache: @@redis_client) + redis_cache: @@redis_client, + logger: query_logging? ? Logger.new(query_logging_file) : nil) @@sparql_backends[name][:update] = Goo::SPARQL::Client.new(opts[:update], protocol: "1.1", headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, @@ -174,6 +176,25 @@ def self.queries_debug? return @@debug_enabled end + def self.query_logging? + @@query_logging + end + + def self.query_logging_file + @@query_logging_file + end + + def self.query_logging=(value) + @@query_logging = value + end + def self.query_logging_file=(value) + @@query_logging_file = value + end + + def self.logger + return @@sparql_backends[:main][:query].logger + end + def self.add_search_backend(name, *opts) opts = opts[0] unless opts.include? :service @@ -193,6 +214,12 @@ def self.add_redis_backend(*opts) set_sparql_cache end + def self.add_query_logger(enabled: false, file: ) + @@query_logging = enabled + @@query_logging_file = file + set_query_logging + end + def self.set_sparql_cache if @@sparql_backends.length > 0 && @@use_cache @@sparql_backends.each do |k,epr| @@ -209,38 +236,19 @@ def self.set_sparql_cache end end - def self.set_cube_client - if @@sparql_backends.length > 0 && @@cube_options + + def self.set_query_logging + if @@sparql_backends.length > 0 && query_logging? @@sparql_backends.each do |k,epr| - epr[:query].cube_options= @@cube_options - epr[:data].cube_options= @@cube_options - epr[:update].cube_options= @@cube_options + epr[:query].logger = Logger.new(query_logging_file) end - puts "Using cube options in Goo #{@@cube_options}" elsif @@sparql_backends.length > 0 @@sparql_backends.each do |k,epr| - epr[:query].cube_options= nil - epr[:data].cube_options= nil - epr[:update].cube_options=nil + epr[:query].logger = nil end end end - def self.enable_cube - if not block_given? - raise ArgumentError, "Cube configuration needs to receive a code block" - end - cube_options = {} - yield cube_options - @@cube_options = cube_options - set_cube_client - end - - def self.disable_cube - @@cube_options = nil - set_cube_client - end - def self.configure_sanity_check() unless @@namespaces.length > 0 raise ArgumentError, "Namespaces needs to be provided." diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index d4668e4c..05a06984 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -168,8 +168,26 @@ def index_as(index_key,max=nil) return rclient.llen(final_key) end + def paginated_all(page_size=1000) + page = 1 + page_size = 10000 + result = [] + old_count = -1 + count = 0 + while count != old_count + old_count = count + @page_i = page + @page_size = page_size + result += process_query(count=false) + page += 1 + count = result.length + end + result + end + def all - process_query unless @result + return @result if @result + process_query @result end alias_method :to_a, :all diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index 2019893c..8c2839ce 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -24,11 +24,14 @@ def config(&block) @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' + @settings.query_logging ||= ENV['QUERIES_LOGGING'] || false + @settings.query_logging_file||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log' @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" puts "(GOO) >> Using term search server at #{@settings.search_server_url}" puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" + puts "(GOO) >> Using Query logging: #{@settings.query_logging_file}" if @settings.query_logging connect_goo end @@ -42,9 +45,10 @@ def connect_goo query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", - options: { rules: :NONE }) + options: { rules: :NONE}) conf.add_search_backend(:main, service: @settings.search_server_url) conf.add_redis_backend(host: @settings.goo_redis_host, port: @settings.goo_redis_port) + conf.add_query_logger(enabled: @settings.query_logging, file: @settings.query_logging_file) conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 9701d131..f04d5dbc 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -112,12 +112,19 @@ def append_triples_no_bnodes(graph, file_path, mime_type_in) else bnodes_filter, dir = bnodes_filter_file(file_path, mime_type_in) end - chunk_lines = 50_000 # number of line + + if Goo.backend_vo? || Goo.backend_ag? + chunk_lines = 50_000 # number of line + else + chunk_lines = 500_000 # number of line + end + file = File.foreach(bnodes_filter) lines = [] line_count = 0 file.each_entry do |line| lines << line + if lines.size == chunk_lines response = append_triples_batch(graph, lines, mime_type_in, line_count) line_count += lines.size diff --git a/test/test_logging.rb b/test/test_logging.rb new file mode 100644 index 00000000..112efd08 --- /dev/null +++ b/test/test_logging.rb @@ -0,0 +1,54 @@ +require_relative 'test_case' +require_relative 'models' + +class TestLogging < MiniTest::Unit::TestCase + + def self.before_suite + GooTestData.create_test_case_data + Goo.use_cache = true + Goo.redis_client.flushdb + Goo.add_query_logger(enabled: true, file: "test.log") + end + + def self.after_suite + GooTestData.delete_test_case_data + Goo.add_query_logger(enabled: false, file: nil) + File.delete("test.log") if File.exist?("test.log") + Goo.redis_client.flushdb + Goo.use_cache = false + end + + def setup + Goo.redis_client.flushdb + end + + def test_logging + Goo.logger.info("Test logging") + University.all + recent_logs = Goo.logger.get_logs + assert_equal 3, recent_logs.length + assert recent_logs.any? { |x| x['query'].include?("Test logging") } + assert File.read("test.log").include?("Test logging") + end + + def test_last_10s_logs + Goo.logger.info("Test logging 2") + University.all + recent_logs = Goo.logger.queries_last_n_seconds(1) + assert_equal 3, recent_logs.length + assert recent_logs.any? { |x| x['query'].include?("Test logging 2") } + assert File.read("test.log").include?("Test logging 2") + sleep 1 + recent_logs = Goo.logger.queries_last_n_seconds(1) + assert_equal 0, recent_logs.length + end + + def test_auto_clean_logs + Goo.logger.info("Test logging 3") + (1..3000).each do |_i| + University.all + end + recent_logs = Goo.logger.get_logs + assert recent_logs.length < 2000 + end +end From 27300f28ca6c656c7e78af65013d88b792a6312f Mon Sep 17 00:00:00 2001 From: Bilel Kihal <61744974+Bilelkihal@users.noreply.github.com> Date: Wed, 22 Jan 2025 02:13:09 +0100 Subject: [PATCH 071/106] fix when label is url issue in ontology processing (#66) --- lib/goo/sparql/mixins/solution_lang_filter.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index c291bea0..47f0ade3 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -14,10 +14,10 @@ def initialize(requested_lang: RequestStore.store[:requested_lang], unmapped: fa def fill_models_with_all_languages(models_by_id) objects_by_lang.each do |id, predicates| + model = models_by_id[id] predicates.each do |predicate, values| - - if values.values.all? { |v| v.all? { |x| literal?(x) && x.plain?} } + if values.values.any? { |v| v.all? { |x| literal?(x) && x.plain?} } pull_stored_values(model, values, predicate, @unmapped) end end @@ -142,10 +142,10 @@ def pull_stored_values(model, values, predicate, unmapped) add_unmapped_to_model(model, predicate, values) else values = values.map do |language, values_literals| - values_string = values_literals.map{|x| x.object} + values_string = values_literals.select{|x| literal?(x) && x.plain?}.map{|x| x.object} values_string = values_string.first unless list_attributes?(predicate) [language, values_string] - end.to_h + end.to_h.reject { |_key, value| value.empty? } model.send("#{predicate}=", values, on_load: true) end @@ -181,4 +181,4 @@ def get_language(languages) end end end -end +end From 5825dc1f9d0ff439b1ba9d8f78fa7bb20b1c65d0 Mon Sep 17 00:00:00 2001 From: Bilel Kihal <61744974+Bilelkihal@users.noreply.github.com> Date: Wed, 22 Jan 2025 15:39:42 +0100 Subject: [PATCH 072/106] fix: parsing ontologies issue, when concept label is url (suite) (#68) --- lib/goo/sparql/mixins/solution_lang_filter.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb index 47f0ade3..cf97fd07 100644 --- a/lib/goo/sparql/mixins/solution_lang_filter.rb +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -17,7 +17,9 @@ def fill_models_with_all_languages(models_by_id) model = models_by_id[id] predicates.each do |predicate, values| - if values.values.any? { |v| v.all? { |x| literal?(x) && x.plain?} } + + if values.values.any? { |v| v.any? { |x| literal?(x) && x.plain?} } + pull_stored_values(model, values, predicate, @unmapped) end end From e02a1a07118a938385364c7729b3f478a3a234cd Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Wed, 5 Feb 2025 13:21:23 +0100 Subject: [PATCH 073/106] Fix: bring attributes that has the handler (#73) * bring attributes that has handler in the bring method * fix test_method_handler test --- lib/goo/base/resource.rb | 10 +++++----- lib/goo/base/where.rb | 4 ++-- test/test_model_complex.rb | 25 ++++++++++++++++--------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index e92c7d60..5f59713e 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -179,13 +179,13 @@ def delete(*args) def bring(*opts) opts.each do |k| if k.kind_of?(Hash) - k.each do |k2,v| - raise ArgumentError, "Unable to bring a method based attr #{k2}" if self.class.handler?(k2) - self.instance_variable_set("@#{k2}",nil) + k.each do |k2,_| + instance_variable_set("@#{k2}", nil) + send(k2) if self.class.handler?(k2) end else - raise ArgumentError, "Unable to bring a method based attr #{k}" if self.class.handler?(k) - self.instance_variable_set("@#{k}",nil) + instance_variable_set("@#{k}", nil) + send(k) if self.class.handler?(k) end end query = self.class.where.models([self]).include(*opts) diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 05a06984..8380424a 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -260,13 +260,13 @@ def include(*options) options.each do |opt| if opt.instance_of?(Symbol) if @klass.handler?(opt) - raise ArgumentError, "Method based attribute cannot be included" + next end end if opt.instance_of?(Hash) opt.each do |k,v| if @klass.handler?(k) - raise ArgumentError, "Method based attribute cannot be included" + next end end end diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index 38a282d6..b11a348c 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -98,19 +98,26 @@ def test_method_handler x.id = RDF::URI.new "http://someiri.org/term/x" x.prefLabel = "x" x.save - assert_raises ArgumentError do - y = Term.find(x.id).in(sub).include(:methodBased).first - end + # Chech the methodBased is not included + y = Term.find(x.id).in(sub).include(:methodBased).first + assert_kind_of TestComplex::Term, y + refute y.loaded_attributes.include?(:methodBased) + assert_raises ArgumentError do y = Term.find(x.id).in(sub).include(methodBased: [:prefLabel]).first end - assert_raises ArgumentError do - y = Term.where.in(sub).include(:methodBased).all - end + + # Chech there is result and the methodBased is not included + y = Term.where.in(sub).include(:methodBased).all + assert_kind_of Array, y + refute_empty y + assert_kind_of TestComplex::Term, y.first + refute y.first.loaded_attributes.include?(:methodBased) + + # Chech the methodBased is brought by the bring y = Term.find(x.id).in(sub).first - assert_raises ArgumentError do - y.bring(:methodBased) - end + y.bring(:methodBased) + assert_includes y.loaded_attributes.to_a, :methodBased y.delete sub.delete end From 5617d26bf4ea4feadd41d33cb9e81ea5c25dab24 Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Wed, 5 Feb 2025 14:45:39 +0100 Subject: [PATCH 074/106] Fix: handle default attribute in case it's not proc (#71) * handle default attribute in case it's not proc * add tests for the default values in the yaml scheme file --- lib/goo/base/resource.rb | 3 +- test/data/yaml_scheme_model_test.yml | 19 ++++++++++ test/test_dsl_settings.rb | 57 ++++++++++++++++++---------- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 5f59713e..e3459b97 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -255,7 +255,8 @@ def save(*opts) self.class.attributes_with_defaults.each do |attr| value = self.send("#{attr}") if value.nil? - value = self.class.default(attr).call(self) + value = self.class.default(attr) + value = value.call(self) if value.is_a?(Proc) self.send("#{attr}=", value) end end diff --git a/test/data/yaml_scheme_model_test.yml b/test/data/yaml_scheme_model_test.yml index fd8c4921..b184a117 100644 --- a/test/data/yaml_scheme_model_test.yml +++ b/test/data/yaml_scheme_model_test.yml @@ -9,3 +9,22 @@ nationality: enforcedValues: {'fr': 'france', 'us': 'USA'} +test_string: + default: "Test String" + +test_integer: + default: 2 + +test_list: + default: + - item1 + - item2 + +test_float: + default: 3.14 + +test_uri: + default: https://example.com/term1 + +test_boolean: + default: false \ No newline at end of file diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index 3b8f493b..05ad1133 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -60,6 +60,13 @@ class YamlSchemeModelTest < Goo::Base::Resource attribute :friends, enforce: [ :existence , PersonModel] attribute :status, enforce: [ :existence, :status ], default: lambda { |record| StatusModel.find("single") } + + attribute :test_string, enforce: [:string] + attribute :test_integer, enforce: [:integer] + attribute :test_list, enforce: [:list] + attribute :test_float, enforce: [:float] + attribute :test_uri, enforce: [:uri] + attribute :test_boolean, enforce: [:boolean] end @@ -82,6 +89,36 @@ def test_default_value assert_equal nil, person.created end + def test_model_with_yaml_scheme + + settings = YamlSchemeModelTest.model_settings + attributes_settings = settings[:attributes] + + + assert_equal "test/data/yaml_scheme_model_test.yml", settings[:scheme] + + assert_equal 'Name', attributes_settings[:name][:label] + assert_equal 'Person name', attributes_settings[:name][:description] + assert_equal %w[test:name test2:name test3:person_name], attributes_settings[:name][:equivalents] + assert_equal 'Put the person name as string', attributes_settings[:name][:help] + assert_equal 'John', attributes_settings[:name][:example] + + + assert_equal 'Person nationality', attributes_settings[:nationality][:label] + hash = {fr: 'france', us: 'USA'} + assert_equal hash, attributes_settings[:nationality][:enforcedValues] + end + + def test_default_value_with_yaml_scheme + settings = YamlSchemeModelTest.model_settings + attributes_settings = settings[:attributes] + assert_equal 'Test String', attributes_settings[:test_string][:default] + assert_equal 2, attributes_settings[:test_integer][:default] + assert_equal ["item1", "item2"], attributes_settings[:test_list][:default] + assert_equal 3.14, attributes_settings[:test_float][:default] + assert_equal "https://example.com/term1", attributes_settings[:test_uri][:default] + assert_equal false, attributes_settings[:test_boolean][:default] + end private def _test_attributes_enforce(model) @@ -187,25 +224,5 @@ def _test_attributes_enforce(model) assert !person.valid? end - def test_model_with_yaml_scheme - - settings = YamlSchemeModelTest.model_settings - attributes_settings = settings[:attributes] - - - assert_equal "test/data/yaml_scheme_model_test.yml", settings[:scheme] - - assert_equal 'Name', attributes_settings[:name][:label] - assert_equal 'Person name', attributes_settings[:name][:description] - assert_equal %w[test:name test2:name test3:person_name], attributes_settings[:name][:equivalents] - assert_equal 'Put the person name as string', attributes_settings[:name][:help] - assert_equal 'John', attributes_settings[:name][:example] - - - assert_equal 'Person nationality', attributes_settings[:nationality][:label] - hash = {fr: 'france', us: 'USA'} - assert_equal hash, attributes_settings[:nationality][:enforcedValues] - - end end From b6f65c427fd148b57590c47f64fcf3a4f5adc0be Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Wed, 5 Feb 2025 18:46:56 +0100 Subject: [PATCH 075/106] allow saving models if it's not modified and not persistent (#76) --- lib/goo/base/resource.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index e3459b97..8aeeb570 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -246,7 +246,7 @@ def save(*opts) end if !batch_file - return self if not modified? + return self if !modified? && persistent? raise Goo::Base::NotValidException, "Object is not valid. Check errors." unless valid? end From e48a2d13a65cc2dd1c12d116cfc9da9061106861 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 7 Feb 2025 00:01:52 +0100 Subject: [PATCH 076/106] Feature: Migrate to ruby 3.2 (#74) * unpun rdf version and use latest rdf-raptor version * pin gem public_suffix for ruby 2.7 * remove cube and ruby 2 pinned gems * remove ruby 2 CI tests --- .github/workflows/ruby-unit-test.yml | 5 +- Gemfile | 8 ++- Gemfile.lock | 78 ++++++++++++++++++---------- config/config.rb.sample | 4 +- goo.gemspec | 2 +- lib/goo.rb | 8 +-- lib/goo/config/config.rb | 7 +-- test/test_logging.rb | 4 +- 8 files changed, 70 insertions(+), 46 deletions(-) diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml index ac40314b..7c5c6153 100644 --- a/.github/workflows/ruby-unit-test.yml +++ b/.github/workflows/ruby-unit-test.yml @@ -12,8 +12,8 @@ jobs: strategy: fail-fast: false matrix: - goo-slice: [ '20', '100', '500' ] - ruby-version: [ '2.7', '3.0' ] + goo-slice: [ '100'] + ruby-version: [ '3.2.0' ] triplestore: [ 'fs', 'ag', 'vo', 'gb' ] steps: @@ -34,4 +34,3 @@ jobs: run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 - diff --git a/Gemfile b/Gemfile index 4d5a2b5b..5d0b4b88 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,6 @@ source 'https://rubygems.org' gemspec gem "activesupport" -gem "cube-ruby", require: "cube" gem "rake" gem "uuid" gem "request_store" @@ -23,4 +22,9 @@ group :profiling do end gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'development' -gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 \ No newline at end of file +gem "rdf-raptor", github: "ruby-rdf/rdf-raptor", ref: "6392ceabf71c3233b0f7f0172f662bd4a22cd534" # use version 3.3.0 when available +gem 'net-ftp' + +# # to remove if no more supporting ruby 2.7 +# gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 +# gem 'public_suffix', '~> 5.1.1' diff --git a/Gemfile.lock b/Gemfile.lock index e5d8d339..94138bc0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,19 +1,28 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: d4737ff08f33517cf93b4d82c78a471017991d97 + revision: 736b7650e28db3ce5e3e49511ac30f958a29e8f1 branch: development specs: sparql-client (3.2.2) net-http-persistent (~> 4.0, >= 4.0.2) rdf (~> 3.2, >= 3.2.11) +GIT + remote: https://github.com/ruby-rdf/rdf-raptor.git + revision: 6392ceabf71c3233b0f7f0172f662bd4a22cd534 + ref: 6392ceabf71c3233b0f7f0172f662bd4a22cd534 + specs: + rdf-raptor (3.3.0) + ffi (~> 1.15) + rdf (~> 3.3) + PATH remote: . specs: goo (0.0.2) addressable (~> 2.8) pry - rdf (= 3.2.11) + rdf rdf-raptor rdf-rdfxml rdf-vocab @@ -35,27 +44,32 @@ GEM addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) base64 (0.2.0) + bcp47_spec (0.2.1) + bigdecimal (3.1.9) builder (3.3.0) coderay (1.1.3) concurrent-ruby (1.3.5) connection_pool (2.5.0) - cube-ruby (0.0.3) daemons (1.4.1) + date (3.4.1) docile (1.4.1) domain_name (0.6.20240107) eventmachine (1.2.7) - faraday (2.7.11) - base64 - faraday-net_http (>= 2.0, < 3.1) - ruby2_keywords (>= 0.0.4) - faraday-net_http (3.0.2) - ffi (1.17.1) + faraday (2.12.2) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.0) + net-http (>= 0.5.0) + ffi (1.17.1-arm64-darwin) + ffi (1.17.1-x86_64-linux-gnu) htmlentities (4.3.4) http-accept (1.7.0) http-cookie (1.0.8) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) + json (2.9.1) link_header (0.0.8) logger (1.6.5) macaddr (1.7.2) @@ -64,18 +78,25 @@ GEM mime-types (3.6.0) logger mime-types-data (~> 3.2015) - mime-types-data (3.2025.0107) + mime-types-data (3.2025.0204) minitest (4.7.5) multi_json (1.15.0) mustermann (3.0.3) ruby2_keywords (~> 0.0.1) + net-ftp (0.3.8) + net-protocol + time + net-http (0.6.0) + uri net-http-persistent (4.0.5) connection_pool (~> 2.2) + net-protocol (0.2.2) + timeout netrc (0.11.0) pry (0.15.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.1.1) + public_suffix (6.0.1) rack (2.2.10) rack-accept (0.4.5) rack (>= 0.4) @@ -85,20 +106,19 @@ GEM base64 (>= 0.1.0) rack (~> 2.2, >= 2.2.4) rake (13.2.1) - rdf (3.2.11) + rdf (3.3.2) + bcp47_spec (~> 0.2) + bigdecimal (~> 3.1, >= 3.1.5) link_header (~> 0.0, >= 0.0.8) - rdf-raptor (3.2.0) - ffi (~> 1.15) - rdf (~> 3.2) - rdf-rdfxml (3.2.2) - builder (~> 3.2) + rdf-rdfxml (3.3.0) + builder (~> 3.2, >= 3.2.4) htmlentities (~> 4.3) - rdf (~> 3.2) - rdf-xsd (~> 3.2) - rdf-vocab (3.2.7) - rdf (~> 3.2, >= 3.2.4) - rdf-xsd (3.2.1) - rdf (~> 3.2) + rdf (~> 3.3) + rdf-xsd (~> 3.3) + rdf-vocab (3.3.2) + rdf (~> 3.3) + rdf-xsd (3.3.0) + rdf (~> 3.3) rexml (~> 3.2) redis (5.3.0) redis-client (>= 0.22.0) @@ -137,24 +157,28 @@ GEM rack (>= 1, < 3) thread_safe (0.3.6) tilt (2.6.0) + time (0.4.1) + date + timeout (0.4.3) tzinfo (0.3.62) + uri (1.0.2) uuid (2.3.9) macaddr (~> 1.0) PLATFORMS - x86_64-darwin-23 + arm64-darwin-24 x86_64-linux DEPENDENCIES activesupport - cube-ruby - faraday (= 2.7.11) goo! minitest (< 5.0) + net-ftp pry rack-accept rack-post-body-to-params rake + rdf-raptor! request_store simplecov simplecov-cobertura @@ -164,4 +188,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.4.22 + 2.6.3 diff --git a/config/config.rb.sample b/config/config.rb.sample index 12abdccb..0429e9cd 100644 --- a/config/config.rb.sample +++ b/config/config.rb.sample @@ -15,9 +15,9 @@ Goo.config do |config| # config.goo_path_data = "/repositories/ontoportal/statements/" # config.goo_path_update = "/repositories/ontoportal/statements/" - config.search_server_url = 'http://localhost:8983/solr/term_search_core1' + config.search_server_url = 'http://localhost:8983/solr/' config.redis_host = 'localhost' config.redis_port = 6379 config.bioportal_namespace = 'http://data.bioontology.org/' config.queries_debug = false -end \ No newline at end of file +end diff --git a/goo.gemspec b/goo.gemspec index b7175779..b053e72a 100644 --- a/goo.gemspec +++ b/goo.gemspec @@ -8,7 +8,7 @@ Gem::Specification.new do |s| s.homepage = "http://github.com/ncbo/goo" s.add_dependency("addressable", "~> 2.8") s.add_dependency("pry") - s.add_dependency("rdf", "3.2.11") #unpin when we support only Ruby >= 3.0 + s.add_dependency("rdf") s.add_dependency("rdf-vocab") s.add_dependency("rdf-rdfxml") s.add_dependency("rdf-raptor") diff --git a/lib/goo.rb b/lib/goo.rb index 41b2e808..59441811 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -12,7 +12,6 @@ require 'rest_client' require 'redis' require 'uuid' -require "cube" require_relative "goo/config/config" require_relative "goo/sparql/sparql" @@ -46,7 +45,6 @@ module Goo @@default_namespace = nil @@id_prefix = nil @@redis_client = nil - @@cube_options = nil @@namespaces = {} @@pluralize_models = false @@uuid = UUID.new @@ -130,15 +128,13 @@ def self.add_sparql_backend(name, *opts) headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, read_timeout: 10000, validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options) + redis_cache: @@redis_client) @@sparql_backends[name][:data] = Goo::SPARQL::Client.new(opts[:data], protocol: "1.1", headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, read_timeout: 10000, validate: false, - redis_cache: @@redis_client, - cube_options: @@cube_options) + redis_cache: @@redis_client) @@sparql_backends[name][:backend_name] = opts[:backend_name] @@sparql_backends.freeze end diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index 8c2839ce..d842b152 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -24,10 +24,10 @@ def config(&block) @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' - @settings.query_logging ||= ENV['QUERIES_LOGGING'] || false - @settings.query_logging_file||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log' + @settings.query_logging ||= ENV['QUERIES_LOGGING'] || false + @settings.query_logging_file ||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log' @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false - @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 + @settings.slice_loading_size ||= ENV['GOO_SLICES']&.to_i || 500 puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" puts "(GOO) >> Using term search server at #{@settings.search_server_url}" puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" @@ -62,6 +62,7 @@ def connect_goo conf.add_namespace(:nemo, RDF::Vocabulary.new("http://purl.bioontology.org/NEMO/ontology/NEMO_annotation_properties.owl#")) conf.add_namespace(:bioportal, RDF::Vocabulary.new(@settings.bioportal_namespace)) conf.use_cache = false + conf.slice_loading_size = @settings.slice_loading_size end rescue StandardError => e abort("EXITING: Goo cannot connect to triplestore and/or search server:\n #{e}\n#{e.backtrace.join("\n")}") diff --git a/test/test_logging.rb b/test/test_logging.rb index 112efd08..92018357 100644 --- a/test/test_logging.rb +++ b/test/test_logging.rb @@ -26,7 +26,7 @@ def test_logging Goo.logger.info("Test logging") University.all recent_logs = Goo.logger.get_logs - assert_equal 3, recent_logs.length + assert_equal 2, recent_logs.length assert recent_logs.any? { |x| x['query'].include?("Test logging") } assert File.read("test.log").include?("Test logging") end @@ -35,7 +35,7 @@ def test_last_10s_logs Goo.logger.info("Test logging 2") University.all recent_logs = Goo.logger.queries_last_n_seconds(1) - assert_equal 3, recent_logs.length + assert_equal 2, recent_logs.length assert recent_logs.any? { |x| x['query'].include?("Test logging 2") } assert File.read("test.log").include?("Test logging 2") sleep 1 From 04680ed78dfd98cfe004d9a1d7019f3f06e9b667 Mon Sep 17 00:00:00 2001 From: Syphax Date: Sat, 26 Apr 2025 02:42:49 +0200 Subject: [PATCH 077/106] fix: virtuoso docker not using to correct host name --- lib/goo/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index f04d5dbc..5ac67ed4 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -234,7 +234,7 @@ def params_for_backend(graph, data_file, mime_type_in, method = :post) # for some reason \\\\ breaks parsing params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\", "") }.join("\n") elsif Goo.backend_vo? - params[:url] = "http://localhost:8890/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" + params[:url] = "#{url.parent}/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" params[:payload] = data_file else params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" From 0a1c9fee157bc360cda10abb24abca71d5ec0a38 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 16 Dec 2025 12:37:55 -0800 Subject: [PATCH 078/106] initial commit --- .ruby-version | 1 - Gemfile | 2 +- Gemfile.lock | 9 +++++---- 3 files changed, 6 insertions(+), 6 deletions(-) delete mode 100644 .ruby-version diff --git a/.ruby-version b/.ruby-version deleted file mode 100644 index 6a81b4c8..00000000 --- a/.ruby-version +++ /dev/null @@ -1 +0,0 @@ -2.7.8 diff --git a/Gemfile b/Gemfile index 5d0b4b88..31a4b42a 100644 --- a/Gemfile +++ b/Gemfile @@ -21,7 +21,7 @@ group :profiling do gem "thin" end -gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'development' +gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'ontoportal-lirmm-development' gem "rdf-raptor", github: "ruby-rdf/rdf-raptor", ref: "6392ceabf71c3233b0f7f0172f662bd4a22cd534" # use version 3.3.0 when available gem 'net-ftp' diff --git a/Gemfile.lock b/Gemfile.lock index 94138bc0..553db9c6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ GIT - remote: https://github.com/ontoportal-lirmm/sparql-client.git + remote: https://github.com/ncbo/sparql-client.git revision: 736b7650e28db3ce5e3e49511ac30f958a29e8f1 - branch: development + branch: ontoportal-lirmm-development specs: sparql-client (3.2.2) net-http-persistent (~> 4.0, >= 4.0.2) @@ -88,8 +88,8 @@ GEM time net-http (0.6.0) uri - net-http-persistent (4.0.5) - connection_pool (~> 2.2) + net-http-persistent (4.0.6) + connection_pool (~> 2.2, >= 2.2.4) net-protocol (0.2.2) timeout netrc (0.11.0) @@ -166,6 +166,7 @@ GEM macaddr (~> 1.0) PLATFORMS + arm64-darwin-23 arm64-darwin-24 x86_64-linux From 1a6d2080691a6a3b2542ac48590d552db35d7136 Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 8 Jan 2026 16:16:16 -0800 Subject: [PATCH 079/106] re-applied NCBO validators, ncbo/goo#166 --- Gemfile.lock | 2 +- lib/goo/validators/enforce.rb | 12 ++- lib/goo/validators/implementations/email.rb | 39 +++++++-- .../validators/implementations/safe_text.rb | 54 ++++++++++++ .../validators/implementations/username.rb | 45 ++++++++++ test/test_email_validator.rb | 85 +++++++++++++++++++ test/test_validators.rb | 77 ++++++++++++++++- 7 files changed, 301 insertions(+), 13 deletions(-) create mode 100644 lib/goo/validators/implementations/safe_text.rb create mode 100644 lib/goo/validators/implementations/username.rb create mode 100644 test/test_email_validator.rb diff --git a/Gemfile.lock b/Gemfile.lock index 553db9c6..2817e9ba 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/sparql-client.git - revision: 736b7650e28db3ce5e3e49511ac30f958a29e8f1 + revision: 2ac20b217bb7ad2b11305befe0ee77d75e44eac5 branch: ontoportal-lirmm-development specs: sparql-client (3.2.2) diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index 5c157fb1..0e0b179a 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -28,13 +28,13 @@ def enforce(inst,attr,value) when :existence check Goo::Validators::Existence, inst, attr, value, opt when :list, Array - check Goo::Validators::DataType, inst, attr, value,opt, Array + check Goo::Validators::DataType, inst, attr, value, opt, Array when :uri, RDF::URI - check Goo::Validators::DataType, inst, attr, value,opt, RDF::URI + check Goo::Validators::DataType, inst, attr, value, opt, RDF::URI when :string, String - check Goo::Validators::DataType, inst, attr, value,opt, String + check Goo::Validators::DataType, inst, attr, value, opt, String when :integer, Integer - check Goo::Validators::DataType, inst, attr, value,opt, Integer + check Goo::Validators::DataType, inst, attr, value, opt, Integer when :boolean check Goo::Validators::DataType, inst, attr, value, opt,:boolean when :date_time, DateTime @@ -45,6 +45,8 @@ def enforce(inst,attr,value) check Goo::Validators::Symmetric, inst, attr, value, opt when :email check Goo::Validators::Email, inst, attr, value, opt + when :username + check Goo::Validators::Username, inst, attr, value, opt when /^distinct_of_/ check Goo::Validators::DistinctOf, inst, attr, value, opt, opt when /^superior_equal_to_/ @@ -56,6 +58,8 @@ def enforce(inst,attr,value) when /^max_/, /^min_/ type = opt.to_s.index("max_") ? :max : :min check Goo::Validators::ValueRange, inst, attr, value, type, opt.to_s + when /^safe_text/ + check Goo::Validators::SafeText, inst, attr, value, opt, opt.to_s else if object_type?(opt) check_object_type inst, attr, value, opt diff --git a/lib/goo/validators/implementations/email.rb b/lib/goo/validators/implementations/email.rb index f8405714..c74a9c79 100644 --- a/lib/goo/validators/implementations/email.rb +++ b/lib/goo/validators/implementations/email.rb @@ -2,20 +2,47 @@ module Goo module Validators class Email < ValidatorBase include Validator - EMAIL_REGEXP = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i + # Matches reasonably valid emails (no double dots, no leading/trailing dots or hyphens, valid domain) + EMAIL_REGEXP = /\A + [a-z0-9!#$%&'*+\/=?^_`{|}~-]+ # local part + (?:\.[a-z0-9!#$%&'*+\/=?^_`{|}~-]+)* # dot-separated continuation in local + @ + (?:(?!-)[a-z0-9-]{1,63}(?(obj) { if @value.kind_of? Array - return "All values in attribute `#{@attr}` must be a valid emails" + return "All values in attribute `#{@attr}` must be valid email addresses" else - return "Attribute `#{@attr}` with the value `#{@value}` must be a valid email" - + return "Attribute `#{@attr}` with the value `#{@value}` must be a valid email address" end } - validity_check -> (obj) do - @value.nil? || @value.match?(EMAIL_REGEXP) + private + + validity_check ->(obj) do + return true if @value.nil? + + values = @value.is_a?(Array) ? @value : [@value] + + values.all? do |email| + next false unless email.is_a?(String) + next false unless email.length.between?(MIN_LENGTH, MAX_LENGTH) + + local, domain = email.split('@', 2) + next false if local.nil? || domain.nil? + next false if local.length > LOCAL_PART_MAX || domain.length > DOMAIN_PART_MAX + + email.match?(EMAIL_REGEXP) + end end end end diff --git a/lib/goo/validators/implementations/safe_text.rb b/lib/goo/validators/implementations/safe_text.rb new file mode 100644 index 00000000..28ae794e --- /dev/null +++ b/lib/goo/validators/implementations/safe_text.rb @@ -0,0 +1,54 @@ +module Goo + module Validators + class SafeText < ValidatorBase + include Validator + + SAFE_TEXT_REGEX = /\A[\p{L}\p{N} .,'\-@()&!$%\/\[\]:;*+=?#^_{}|~"]+\z/u.freeze + DISALLOWED_UNICODE = /[\u0000-\u001F\u007F\u00A0\u200B-\u200F\u2028-\u202F\u202E\u2066-\u2069]/u.freeze + + key :safe_text + + error_message ->(obj) { + # Truncate long string values for clarity + truncated_value = if @value.is_a?(String) && @value.length > 60 + "#{@value[0...57]}..." + else + @value + end + + prefix = if @value.is_a?(Array) + "All values in attribute `#{@attr}`" + else + "Attribute `#{@attr}` with the value `#{truncated_value}`" + end + + suffix = "must be safe text (no control or invisible Unicode characters, newlines, or disallowed punctuation)" + length_note = @max_length ? " and must not exceed #{@max_length} characters" : "" + + "#{prefix} #{suffix}#{length_note}" + } + + validity_check ->(obj) do + return true if @value.nil? + + Array(@value).all? do |val| + next false unless val.is_a?(String) + + length_ok = @max_length.nil? || val.length <= @max_length + length_ok && + val !~ /\R/ && + val =~ SAFE_TEXT_REGEX && + val !~ DISALLOWED_UNICODE + end + end + + def initialize(inst, attr, value, opt) + @max_length = nil + super(inst, attr, value) + match = opt.match(/_(\d+)$/) + @max_length = match[1].to_i if match && match[1] + end + + end + end +end \ No newline at end of file diff --git a/lib/goo/validators/implementations/username.rb b/lib/goo/validators/implementations/username.rb new file mode 100644 index 00000000..8e9e858c --- /dev/null +++ b/lib/goo/validators/implementations/username.rb @@ -0,0 +1,45 @@ +module Goo + module Validators + class Username < ValidatorBase + include Validator + + RESERVED_NAMES = %w[ + admin administrator root support system test guest owner user + webmaster help contact host mail ftp info api noc security + ].freeze + + USERNAME_LENGTH_RANGE = (3..32).freeze + + ASCII_ONLY_REGEX = /\A[\x20-\x7E]+\z/ + USERNAME_PATTERN = /\A[a-zA-Z](?!.*[._]{2})[a-zA-Z0-9._]{1,30}[a-zA-Z0-9]\z/ + INVISIBLE_CHARS = /[\u200B-\u200D\uFEFF]/ + + key :username + + error_message ->(obj) { + base_msg = if @value.is_a?(Array) + "All values in attribute `#{@attr}` must be valid usernames" + else + "Attribute `#{@attr}` with the value `#{@value}` must be a valid username" + end + "#{base_msg} (must be 3–32 chars, start with a letter, contain only ASCII letters/digits/dots/underscores, no invisible or reserved terms)" + } + + validity_check ->(obj) do + return true if @value.nil? + + Array(@value).all? do |username| + next false unless username.is_a?(String) + + username = username.strip + + USERNAME_LENGTH_RANGE.cover?(username.length) && + username.match?(ASCII_ONLY_REGEX) && + username.match?(USERNAME_PATTERN) && + !username.match?(INVISIBLE_CHARS) && + !RESERVED_NAMES.include?(username.downcase) + end + end + end + end +end \ No newline at end of file diff --git a/test/test_email_validator.rb b/test/test_email_validator.rb new file mode 100644 index 00000000..eed1572b --- /dev/null +++ b/test/test_email_validator.rb @@ -0,0 +1,85 @@ +require_relative 'test_case.rb' + +module Goo + module Validators + class TestEmail < MiniTest::Unit::TestCase + + def dummy_instance + @dummy ||= Object.new + end + + def validate(value) + Email.new(dummy_instance, :email, value) + end + + def assert_valid(value) + validator = validate(value) + assert validator.valid?, "Expected #{value.inspect} to be valid" + end + + def assert_invalid(value) + validator = validate(value) + refute validator.valid?, "Expected #{value.inspect} to be invalid" + end + + def test_valid_emails + assert_valid nil + assert_valid "user@example.com" + assert_valid "john.doe+test@sub.domain.org" + assert_valid "a_b-c@foo-bar.co.uk" + assert_valid "user123@domain.io" + end + + def test_invalid_emails_structure + assert_invalid "" + assert_invalid "plainaddress" + assert_invalid "user@localhost" + assert_invalid "user@com" + assert_invalid "user@.com" + assert_invalid "user@com." + assert_invalid "user@-domain.com" + assert_invalid "user@domain-.com" + assert_invalid "user.@example.com" + assert_invalid "user..user@example.com" + assert_invalid "user@domain..com" + assert_invalid "user@" + end + + def test_email_length_limits + too_short = "a@b.c" # 5 chars + assert_invalid too_short + + long_local = "a" * 65 + assert_invalid "#{long_local}@example.com" + + long_domain = ("a" * 63 + ".") * 4 + "com" + assert_invalid "user@#{long_domain}" + + too_long = "#{'a'*64}@#{'b'*189}.com" # 258 chars + assert_invalid too_long + end + + def test_array_with_all_valid_emails + validator = validate(["valid@example.com", "foo.bar@domain.co"]) + assert validator.valid? + end + + def test_array_with_one_invalid_email + validator = validate(["good@domain.com", "bad@domain..com"]) + refute validator.valid? + end + + def test_error_message_for_single_invalid_email + validator = validate("invalid-email") + refute validator.valid? + assert_match(/must be a valid email address/i, validator.error) + end + + def test_error_message_for_array_with_invalid + validator = validate(["invalid@", "also@bad"]) + refute validator.valid? + assert_match(/All values.*must be valid email addresses/i, validator.error) + end + end + end +end \ No newline at end of file diff --git a/test/test_validators.rb b/test/test_validators.rb index e5c3a9fe..d73fee76 100644 --- a/test/test_validators.rb +++ b/test/test_validators.rb @@ -5,6 +5,7 @@ class Person < Goo::Base::Resource model :person_model_validators, name_with: :name attribute :name, enforce: [:string, :existence] + attribute :username, enforce: [:string, :existence, :username] attribute :last_name, enforce: [:string] attribute :multiple_values, enforce: [ :list, :integer] attribute :one_number, enforce: [ :integer ] @@ -17,6 +18,12 @@ class Person < Goo::Base::Resource attribute :friends, enforce: [Person, :list] end +class SafeTextTestModel < Goo::Base::Resource + model :safe_text_test_model, name_with: :name + attribute :first_name, enforce: [:safe_text_5, :existence] + attribute :last_name, enforce: [:safe_text_8, :existence] + attribute :description, enforce: [:safe_text, :existence] +end class RangeTestModel < Goo::Base::Resource model :range_test_model, name_with: :name @@ -96,7 +103,6 @@ def self.after_suite GooTestData.delete_all [SymmetricTestModel, InverseOfTestModel] end - def test_unique_validator s = Student.new @@ -111,6 +117,71 @@ def test_unique_validator assert s.valid? end + def test_username_validator + p = Person.new + p.name = "Susan" + p.username = "goodusername" + assert p.valid? + + p.username = "good_username" + assert p.valid? + + p.username = "good.username" + assert p.valid? + + p.username = "bad-username" + refute p.valid? + + p.username = "1badusername" + refute p.valid? + + p.username = "badusername with spaces" + refute p.valid? + + p.username = "\\x3csVg/\\x3e\">" + refute p.valid? + end + + def test_safe_text_validator + m = SafeTextTestModel.new + m.first_name = 'Susan' + m.last_name = 'Johnson' + m.description = 'The name Susan carries a rich history and evokes a sense of grace, warmth, and intelligence. While its popularity has somewhat declined, it remains a classic name with a positive reputation and a strong legacy of accomplished individuals associated with it.' + assert m.valid? + + m.first_name = 'Michael' + refute m.valid? + assert_equal 1, m.errors.keys.length + assert m.errors[:first_name][:safe_text_5].include?('and must not exceed 5 characters') + + m.first_name = 'Joe' + m.description = 'The name Susan 🌍 carries a rich history' + refute m.valid? + assert_equal 1, m.errors.keys.length + assert_equal :description, m.errors.keys[0] + + m.description = "I am a valid description" + assert m.valid? + + m.description = "临床表现" + assert m.valid? + + m.description = "This string contains\na newline" + refute m.valid? + + m.description = "This string has a tab\tcharacter" + refute m.valid? + + m.description = "Price is < than $1!" + refute m.valid? + + m.description = "This has\u200Bhidden content" + refute m.valid? + + m.description = "Normal text\u202Eevil.com" + refute m.valid? + end + def test_existence_validator s = Student.new @@ -135,6 +206,7 @@ def test_existence_validator def test_datatype_validators p = Person.new p.name = 'test' + p.username = 'test_username' #nil values are valid assert p.valid? @@ -174,7 +246,7 @@ def test_datatype_validators def test_uri_datatype_validator p = Person.new p.name = 'test' - + p.username = 'test_username' assert p.valid? p.social = RDF::URI.new('') #empty uri @@ -190,6 +262,7 @@ def test_uri_datatype_validator def test_object_type_validator p = Person.new p.name = 'test' + p.username = 'test_username' p.friends = [1] refute p.valid? From 265e1e09eb57f3c1397a5606a9c49aeecf065383 Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 9 Jan 2026 16:50:17 -0800 Subject: [PATCH 080/106] re-applied :url validator, ncbo/goo#169 --- lib/goo/validators/enforce.rb | 2 + .../validators/implementations/data_type.rb | 79 ++++++++++--------- test/test_url_validator.rb | 63 +++++++++++++++ 3 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 test/test_url_validator.rb diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index 0e0b179a..dd10c35b 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -31,6 +31,8 @@ def enforce(inst,attr,value) check Goo::Validators::DataType, inst, attr, value, opt, Array when :uri, RDF::URI check Goo::Validators::DataType, inst, attr, value, opt, RDF::URI + when :url + check Goo::Validators::DataType, inst, attr, value, opt, :url when :string, String check Goo::Validators::DataType, inst, attr, value, opt, String when :integer, Integer diff --git a/lib/goo/validators/implementations/data_type.rb b/lib/goo/validators/implementations/data_type.rb index 04f46d0c..14440050 100644 --- a/lib/goo/validators/implementations/data_type.rb +++ b/lib/goo/validators/implementations/data_type.rb @@ -1,75 +1,80 @@ +require 'uri' + module Goo module Validators class DataType < ValidatorBase include Validator + MAX_URL_LENGTH = 2048 - keys [:list, :uri, :string, :integer, :boolean, :date_time, :float] + keys %i[list uri url string integer boolean date_time float] - error_message ->(obj) { - if @value.kind_of? Array - return "All values in attribute `#{@attr}` must be `#{@type}`" + error_message ->(_obj) { + if @value.is_a?(Array) + "All values in attribute `#{@attr}` must be `#{@type}`" else - return "Attribute `#{@attr}` with the value `#{@value}` must be `#{@type}`" - + "Attribute `#{@attr}` with the value `#{@value}` must be `#{@type}`" end } - validity_check -> (obj) do - self.enforce_type(@type, @value) - end + validity_check ->(_obj) { enforce_type(@type, @value) } def initialize(inst, attr, value, type) super(inst, attr, value) @type = type end - - def enforce_type(type, value) return true if value.nil? + return enforce_type_boolean(value) if type == :boolean + return enforce_type_uri(value) if [:uri, RDF::URI].include?(type) + return enforce_type_url(value) if type == :url + return value.is_a?(Array) if type == Array + return value.all? { |x| x.is_a?(type) } if value.is_a?(Array) - if type == :boolean - self.enforce_type_boolean(value) - elsif type.eql?(:uri) || type.eql?(RDF::URI) - self.enforce_type_uri(value) - elsif type.eql?(:uri) || type.eql?(Array) - value.is_a? Array - else - if value.is_a? Array - value.select{|x| !x.is_a?(type)}.empty? - else - value.is_a? type - end - end - + value.is_a?(type) end def enforce_type_uri(value) - return true if value.nil? + return true if value.nil? + return value.all? { |x| uri?(x) } if value.is_a?(Array) - if value.kind_of? Array - value.select { |x| !is_a_uri?(x) }.empty? - else - is_a_uri?(value) - end + uri?(value) + end + + def enforce_type_url(value) + return true if value.nil? + return value.all? { |x| url?(x) } if value.is_a?(Array) + url?(value) end def enforce_type_boolean(value) - if value.kind_of? Array - value.select { |x| !is_a_boolean?(x) }.empty? + if value.is_a?(Array) + value.all? { |x| boolean?(x) } else - is_a_boolean?(value) + boolean?(value) end end - def is_a_boolean?(value) - (value.class == TrueClass) || (value.class == FalseClass) + private + + def boolean?(value) + value.instance_of?(TrueClass) || value.instance_of?(FalseClass) end - def is_a_uri?(value) + def uri?(value) value.is_a?(RDF::URI) && value.valid? end + + def url?(value) + s = value.to_s + return false if s.empty? || s.length > MAX_URL_LENGTH + + uri = URI.parse(s) + uri.is_a?(URI::HTTP) && uri.host && !uri.host.empty? + rescue URI::InvalidURIError + false + end end end end \ No newline at end of file diff --git a/test/test_url_validator.rb b/test/test_url_validator.rb new file mode 100644 index 00000000..ac7af9a3 --- /dev/null +++ b/test/test_url_validator.rb @@ -0,0 +1,63 @@ +require_relative 'test_case' +require 'goo/validators/validator' +require 'goo/validators/implementations/data_type' +require 'rdf' + +class UrlTestModel < Goo::Base::Resource + model :url_test_model, name_with: :name + attribute :url, enforce: %i[url] + attribute :urls, enforce: %i[list url] +end + +class UrlValidatorTest < Minitest::Unit::TestCase + def test_url_scalar + u = UrlTestModel.new + u.url = RDF::URI.new('https://example.com/path?x=1') + assert u.valid?, "expected https URL to be valid, got errors: #{u.errors.inspect}" + + u.url = [RDF::URI.new('https://example.com/path?x=1')] + refute u.valid?, "expected to reject array, got errors: #{u.errors.inspect}" + assert u.errors[:url][:no_list], "errors: #{u.errors.inspect}" + end + + def test_url_scalar_rejects_non_http_schemes + [ + '', 'http://', 'wrong/uri', 'mailto:user@nodomain.org', 'ftp://test.com/', + 'urn:isbn:123456', 'ssh://root@localhost:22', 'file:///etc/passwd', + 'http://', 'http://[::gggg]', + '//example.org/path', + "https://example.com/too_long_url/#{'a' * 2050}" + ].each do |bad| + u = UrlTestModel.new + u.url = RDF::URI.new(bad) + refute u.valid?, "expected invalid for #{bad.inspect}" + assert u.errors[:url][:url], "expected :url error key for #{bad.inspect}" + end + end + + def test_url_list + u = UrlTestModel.new + u.urls = [RDF::URI.new('http://example.com/'), + RDF::URI.new('https://example2.com/ok')] + assert u.valid?, "expected valid list of URLs, got: #{u.errors.inspect}" + + u.urls = [RDF::URI.new('https://example.com/')] + assert u.valid? + + u.urls = RDF::URI.new('http://example.com/') + refute u.valid? + assert u.errors[:urls] + end + + def test_url_list_must_all_be_valid + u = UrlTestModel.new + u.urls = [RDF::URI.new('https://ok.example'), + RDF::URI.new('mailto:bad@example.org')] + refute u.valid? + assert u.errors[:urls][:url] + + u.urls = [RDF::URI.new('https://ok.example'), true] + refute u.valid? + assert u.errors[:urls][:url] + end +end \ No newline at end of file From 027268491b095a13efe288790f16557d49c7bb1d Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 9 Jan 2026 16:52:36 -0800 Subject: [PATCH 081/106] re-applied :bypass_cache option, ncbo/goo#170 --- lib/goo/sparql/solutions_mapper.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 090db691..5848d0f7 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -40,6 +40,8 @@ def map_each_solutions(select) select.prefix('franzOption_allowCachingResults: ') end + select.options[:bypass_cache] = @options[:bypass_cache] if @options.has_key?(:bypass_cache) + select.each_solution do |sol| next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] From edddd813a857446314a00d283d162c5ce53f945d Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 21 Jan 2026 15:34:58 -0800 Subject: [PATCH 082/106] added Solr 'default' value to schema; added :url dynamic field --- lib/goo/search/solr/solr_query.rb | 2 +- lib/goo/search/solr/solr_schema_generator.rb | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/goo/search/solr/solr_query.rb b/lib/goo/search/solr/solr_query.rb index ed194950..d9e342fc 100644 --- a/lib/goo/search/solr/solr_query.rb +++ b/lib/goo/search/solr/solr_query.rb @@ -16,7 +16,7 @@ def dynamic_field(type:, is_list:, is_fuzzy_search: false) return is_list ? '*_texts' : '*_text' if is_fuzzy_search dynamic_type = case type - when :uri, :string, nil + when :uri, :url, :string, nil '*_t' when :integer '*_i' diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index ba23e70b..63df5b81 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -8,10 +8,13 @@ def initialize @schema = {} end - def add_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil) + def add_field(name, type, indexed: true, stored: true, multi_valued: false, omit_norms: nil, default: nil) @schema['add-field'] ||= [] af = { name: name.to_s, type: type, indexed: indexed, stored: stored, multiValued: multi_valued} af[:omitNorms] = omit_norms unless omit_norms.nil? + # Solr Schema API expects `default` as a STRING value; sending JSON booleans/numbers + # can trigger ClassCastException on Solr side. + af[:default] = default.is_a?(String) ? default : default.to_s unless default.nil? @schema['add-field'] << af end From 326eb26687f7307b6ae7e6b8f2e215057606180e Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 22 Jan 2026 22:59:19 -0800 Subject: [PATCH 083/106] minor fix to remove warning: already initialized constant RDF::Literal::DateTime::FORMAT --- lib/goo/mixins/sparql_client.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/goo/mixins/sparql_client.rb b/lib/goo/mixins/sparql_client.rb index d4d98523..587942ab 100644 --- a/lib/goo/mixins/sparql_client.rb +++ b/lib/goo/mixins/sparql_client.rb @@ -34,7 +34,10 @@ def validate? class Literal class DateTime < Temporal - FORMAT = '%Y-%m-%dT%H:%M:%S'.freeze # the format that is supported by 4store + # Override RDF's default DateTime format to match what 4store accepts. + # Remove first to avoid Ruby's "already initialized constant" warning. + remove_const(:FORMAT) if const_defined?(:FORMAT) + FORMAT = '%Y-%m-%dT%H:%M:%S'.freeze end def initialize(value, language: nil, datatype: nil, lexical: nil, validate: false, canonicalize: false, **options) From e0109a9ea9b0e0119c3e5cd19146514c2282fab0 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 27 Jan 2026 15:26:47 -0800 Subject: [PATCH 084/106] added a comment to explain how indexable objects are merged --- lib/goo/search/search.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 80da1537..9170d318 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -85,6 +85,11 @@ def indexable_object(to_set = nil) end end + # Merge sub-objects into a single document. For example, OntologySubmission.ontology + # All Ontology object's properties are merged into a single document for OntologySubmission + # Solr datatypes are also added, i.e: + # {"name": "*_txt", "type": "text_general", stored: true, "multiValued": true} + # {"name": "*_text", "type": "text_general", stored: true, "multiValued": false }, document = document.reduce({}) do |h, (k, v)| if v.is_a?(Hash) v.each { |k2, v2| h["#{k}_#{k2}".to_sym] = v2 } From 98bd8702cbd9a80c58c0ac5789c0188f2e854af7 Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 29 Jan 2026 22:22:29 -0800 Subject: [PATCH 085/106] fixed improperly defined text_suggest_ngram field, which was causing bad search results --- lib/goo/search/solr/solr_schema_generator.rb | 67 ++++++++++++++++---- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index 63df5b81..f19820f7 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -78,7 +78,7 @@ def init_fields_types "name": "text_suggest_ngram", "class": "solr.TextField", "positionIncrementGap": "100", - "analyzer": { + "indexAnalyzer": { "tokenizer": { "class": "solr.StandardTokenizerFactory" }, @@ -87,9 +87,61 @@ def init_fields_types "class": "solr.LowerCaseFilterFactory" }, { - "class": "solr.EdgeNGramTokenizerFactory", + "class": "solr.ASCIIFoldingFilterFactory" + }, + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "1", + "generateNumberParts": "1", + "catenateWords": "0", + "catenateNumbers": "0", + "catenateAll": "0", + "splitOnCaseChange": "1" + }, + { + "class": "solr.EdgeNGramFilterFactory", "minGramSize": 1, - "maxGramSize": 25 + "maxGramSize": 20 + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + } + ] + }, + "queryAnalyzer": { + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.WordDelimiterGraphFilterFactory", + "generateWordParts": "0", + "generateNumberParts": "0", + "catenateWords": "0", + "catenateNumbers": "0", + "catenateAll": "0", + "splitOnCaseChange": "0" + }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.ASCIIFoldingFilterFactory" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "replacement": "", + "replace": "all" + }, + { + "class": "solr.PatternReplaceFilterFactory", + "pattern": "^(.{20})(.*)?", + "replacement": "$1", + "replace": "all" } ] } @@ -157,12 +209,6 @@ def init_fields_types "pattern": "([^\\w\\d\\*æøåÆØÅ ])", "replacement": "", "replace": "all" - }, - { - "class": "solr.PatternReplaceFilterFactory", - "pattern": "^(.{30})(.*)?", - "replacement": "$1", - "replace": "all" } ] } @@ -222,8 +268,7 @@ def init_fields_types "catenateWords": "0", "catenateNumbers": "0", "catenateAll": "0", - "splitOnCaseChange": "0", - "splitOnNumerics": "0" + "splitOnCaseChange": "0" }, { "class": "solr.LowerCaseFilterFactory" From 256afce837da44d31e8da1e08a6c7c56bbd1820f Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 29 Jan 2026 22:44:36 -0800 Subject: [PATCH 086/106] changed the order of filters for text_suggest_ngram type to reflect the original order --- lib/goo/search/solr/solr_schema_generator.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index f19820f7..302a4172 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -83,12 +83,6 @@ def init_fields_types "class": "solr.StandardTokenizerFactory" }, "filters": [ - { - "class": "solr.LowerCaseFilterFactory" - }, - { - "class": "solr.ASCIIFoldingFilterFactory" - }, { "class": "solr.WordDelimiterGraphFilterFactory", "generateWordParts": "1", @@ -98,6 +92,12 @@ def init_fields_types "catenateAll": "0", "splitOnCaseChange": "1" }, + { + "class": "solr.LowerCaseFilterFactory" + }, + { + "class": "solr.ASCIIFoldingFilterFactory" + }, { "class": "solr.EdgeNGramFilterFactory", "minGramSize": 1, From a822745ac3701038bb78c4ee62ccbf3259ac5385 Mon Sep 17 00:00:00 2001 From: mdorf Date: Sat, 31 Jan 2026 00:16:59 -0800 Subject: [PATCH 087/106] prevent search from filtering out unicode letters and numbers so it can handle non-Latin languages --- lib/goo/search/solr/solr_schema_generator.rb | 12 ++++++------ test/test_search.rb | 19 ++++++++++++++----- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index 302a4172..fda215e6 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -105,7 +105,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": "", "replace": "all" } @@ -133,7 +133,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": "", "replace": "all" }, @@ -178,7 +178,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": "", "replace": "all" } @@ -206,7 +206,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\\w\\d\\*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": "", "replace": "all" } @@ -244,7 +244,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\w\d*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": " ", "replace": "all" } @@ -275,7 +275,7 @@ def init_fields_types }, { "class": "solr.PatternReplaceFilterFactory", - "pattern": "([^\w\d*æøåÆØÅ ])", + "pattern": "([^\\p{L}\\p{N}\\* ])", "replacement": " ", "replace": "all" } diff --git a/test/test_search.rb b/test/test_search.rb index 0bba79d9..30916111 100644 --- a/test/test_search.rb +++ b/test/test_search.rb @@ -101,7 +101,7 @@ def setup id: RDF::URI.new("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Melanoma"), prefLabel: "Melanoma", synonym: [ - "Cutaneous Melanoma", + "Cancerous Melanoma", "Skin Cancer", "Malignant Melanoma" ], @@ -116,6 +116,7 @@ def setup prefLabel: "Neoplasm", synonym: [ "tumor", + "肿瘤", "Neoplasms", "NEOPLASMS BENIGN", "MALIGNANT AND UNSPECIFIED (INCL CYSTS AND POLYPS)", @@ -137,7 +138,7 @@ def setup prefLabel: "Melanoma with cutaneous melanoma syndrome", synonym: [ "Cutaneous Melanoma", - "Skin Cancer", + "Melanocytes Skin Cancer", "Malignant Melanoma" ], definition: "Melanoma refers to a malignant skin cancer", @@ -171,12 +172,20 @@ def test_search_filters params = {"defType"=>"edismax", "stopwords"=>"true", "lowercaseOperators"=>"true", - "qf"=>"prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id cui semanticType", + "qf"=>"prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id cui semanticType", "pf"=>"prefLabelSuggest^50", } - resp = TermSearch.search("Melanoma wi", params) - assert_equal(3, resp["response"]["numFound"]) + resp = TermSearch.search("Cutaneous Melanoma", params) + assert_equal(2, resp["response"]["numFound"]) assert_equal @terms[2].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + # test NOT filtering out unicode characters + resp = TermSearch.search("肿瘤", params) + assert_equal(1, resp["response"]["numFound"]) + assert_equal @terms[1].prefLabel, resp["response"]["docs"][0]["prefLabel"] + + + end def test_search_exact_filter From 5f0759212e56f04f9baf93f47453f7773ef2a32f Mon Sep 17 00:00:00 2001 From: mdorf Date: Sat, 31 Jan 2026 00:18:02 -0800 Subject: [PATCH 088/106] prevent search from filtering out unicode letters and numbers so it can handle non-Latin languages --- test/test_search.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/test_search.rb b/test/test_search.rb index 30916111..9ca74ca6 100644 --- a/test/test_search.rb +++ b/test/test_search.rb @@ -183,9 +183,6 @@ def test_search_filters resp = TermSearch.search("肿瘤", params) assert_equal(1, resp["response"]["numFound"]) assert_equal @terms[1].prefLabel, resp["response"]["docs"][0]["prefLabel"] - - - end def test_search_exact_filter From 824872a443b074bb7ad85f7fa57274fdb2e2b39a Mon Sep 17 00:00:00 2001 From: mdorf Date: Sun, 1 Feb 2026 00:13:14 -0800 Subject: [PATCH 089/106] fixed test_query_flood; updated ag docker image to 8.3.1 --- docker-compose.yml | 2 +- test/test_chunks_write.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 463a1b92..4ddd6807 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,7 @@ services: command: bin/solr start -cloud -f agraph-ut: - image: franzinc/agraph:v8.1.0 + image: franzinc/agraph:v8.3.1 platform: linux/amd64 environment: - AGRAPH_SUPER_USER=test diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index bbf0c5ca..19e8455f 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -132,12 +132,12 @@ def test_query_flood tput = Thread.new { Goo.sparql_data_client.put_triples(ONT_ID_EXTRA, ntriples_file_path, mime_type="application/x-turtle") } - + threads = [] 25.times do |i| threads << Thread.new { 50.times do |j| - oq = "SELECT (count(?s) as ?c) WHERE { ?s a ?o }" + oq = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID}> { ?s a ?o } }" Goo.sparql_query_client.query(oq).each do |sol| refute_equal 0, sol[:c].to_i end @@ -145,7 +145,7 @@ def test_query_flood } end - threads.join + threads.each(&:join) if Goo.backend_4s? log_status = [] From 53a0d4c3bbe56f7a8caed9fab736f509ebecb61d Mon Sep 17 00:00:00 2001 From: mdorf Date: Sun, 1 Feb 2026 10:18:35 -0800 Subject: [PATCH 090/106] added code comment to the previous commit --- test/test_chunks_write.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index 19e8455f..945f01a2 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -137,6 +137,11 @@ def test_query_flood 25.times do |i| threads << Thread.new { 50.times do |j| + # The query WHERE { ?s a ?o } does not specify a graph, so it runs against the default graph. + # In AllegroGraph, the default graph is empty by default and does not include named graphs. + # In 4store/Virtuoso, the default graph is effectively a union of named graphs, + # so the original query works. Therefore, in AllegroGraph the count returns 0, causing + # refute_equal 0 to fail. This commit adds a named graph to the query. oq = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID}> { ?s a ?o } }" Goo.sparql_query_client.query(oq).each do |sol| refute_equal 0, sol[:c].to_i From 75e1df5ce04d877f2d2eb3a4e93686c4c12339b9 Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 4 Feb 2026 15:39:12 -0800 Subject: [PATCH 091/106] added :url to map_to_indexer_type --- lib/goo/search/solr/solr_schema.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/search/solr/solr_schema.rb b/lib/goo/search/solr/solr_schema.rb index 8c38fd2f..84471165 100644 --- a/lib/goo/search/solr/solr_schema.rb +++ b/lib/goo/search/solr/solr_schema.rb @@ -107,7 +107,7 @@ def clear_all_schema(generator = schema_generator) def map_to_indexer_type(orm_data_type) case orm_data_type - when :uri + when :uri, :url 'string' # Assuming a string field for URIs when :string, nil # Default to 'string' if no type is given 'text_general' # Assuming a generic text field for strings From 13a8559a2346cc8320d7d36046646907bfa8a27c Mon Sep 17 00:00:00 2001 From: = Date: Thu, 5 Feb 2026 14:40:13 -0800 Subject: [PATCH 092/106] fixed test_chunks_write/test_query_flood, which was failing in 4store --- test/test_chunks_write.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index 945f01a2..9b2294e2 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -154,7 +154,7 @@ def test_query_flood if Goo.backend_4s? log_status = [] - Thread.new { + status_thread = Thread.new { 10.times do |i| log_status << Goo.sparql_query_client.status end @@ -164,8 +164,8 @@ def test_query_flood t.join end tput.join + status_thread.join - assert log_status.map { |x| x[:outstanding] }.max > 0 assert_equal 16, log_status.map { |x| x[:running] }.max end end From 6d3c065a15e0e0492abe3fd0309c7b2a379113a3 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Feb 2026 10:46:28 -0800 Subject: [PATCH 093/106] chore: refactor docker-based test runner and add linux container tests - remove custom wait logic; rely on docker compose --wait healthchecks - add linux overrides to run unit tests in a container (useful on macOS) - add no-ports override for containerized runs - update compose mounts and move backend fixtures to test/fixtures - document docker test commands and env vars --- Dockerfile | 34 ++ README.md | 36 ++ dev/compose/linux/ag.yml | 18 + dev/compose/linux/fs.yml | 15 + dev/compose/linux/gd.yml | 19 ++ dev/compose/linux/no-ports.yml | 13 + dev/compose/linux/vo.yml | 18 + docker-compose.yml | 135 +++++--- rakelib/docker_based_test.rake | 323 ++++++++++++------ .../backends/graphdb}/graphdb-repo-config.ttl | 6 +- .../backends/graphdb}/graphdb-test-load.nt | 0 .../virtuoso-grant-write-sparql-access.sql | 3 + 12 files changed, 463 insertions(+), 157 deletions(-) create mode 100644 Dockerfile create mode 100644 dev/compose/linux/ag.yml create mode 100644 dev/compose/linux/fs.yml create mode 100644 dev/compose/linux/gd.yml create mode 100644 dev/compose/linux/no-ports.yml create mode 100644 dev/compose/linux/vo.yml rename test/{data => fixtures/backends/graphdb}/graphdb-repo-config.ttl (95%) rename test/{data => fixtures/backends/graphdb}/graphdb-test-load.nt (100%) create mode 100644 test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..1dd2150b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +ARG RUBY_VERSION=3.2 +ARG DISTRO=bullseye + +FROM ruby:$RUBY_VERSION-$DISTRO + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + libxml2 \ + libxslt-dev \ + libxslt1-dev zlib1g-dev \ + # openjdk-11-jre-headless \ + raptor2-utils \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY Gemfile* *.gemspec ./ + +#Install the exact Bundler version from Gemfile.lock (if it exists) +RUN gem update --system && \ + if [ -f Gemfile.lock ]; then \ + BUNDLER_VERSION=$(grep -A 1 "BUNDLED WITH" Gemfile.lock | tail -n 1 | tr -d ' '); \ + gem install bundler -v "$BUNDLER_VERSION"; \ + else \ + gem install bundler; \ + fi + +RUN bundle config set --global no-document 'true' +RUN bundle install --jobs 4 --retry 3 + +COPY . ./ + +CMD ["bundle", "exec", "rake"] diff --git a/README.md b/README.md index b1553b79..3ea16b6e 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,42 @@ To see Goo in action browse to the following links: - [BioPortal New API Documentation](http://stagedata.bioontology.org/documentation) - [BioPortal New API Access](http://stagedata.bioontology.org/) +## Tests + +Run the unit tests: + +``` +bundle exec rake test +``` + +Run docker-backed tests (host Ruby): + +``` +bundle exec rake test:docker:fs +bundle exec rake test:docker:ag +bundle exec rake test:docker:vo +bundle exec rake test:docker:gd +``` + +Run docker-backed tests inside the Linux container: + +``` +bundle exec rake test:docker:fs:linux +bundle exec rake test:docker:ag:linux +bundle exec rake test:docker:vo:linux +bundle exec rake test:docker:gd:linux +``` + +Start a shell in the Linux test container (default backend `fs`): + +``` +bundle exec rake test:docker:shell +bundle exec rake test:docker:shell[ag] +``` + +Set `OP_KEEP_CONTAINERS=1` to keep services up after tests or shell exit. +Set `OP_TEST_DOCKER_BACKEND=ag` (or `fs`, `vo`, `gd`) to change the default backend for `test:docker:shell`, `test:docker:up`, and `test:docker:down`. + ## Schema Definitions (DSL) diff --git a/dev/compose/linux/ag.yml b/dev/compose/linux/ag.yml new file mode 100644 index 00000000..89ca5090 --- /dev/null +++ b/dev/compose/linux/ag.yml @@ -0,0 +1,18 @@ +services: + test-linux: + environment: + GOO_BACKEND_NAME: allegrograph + GOO_PORT: 10035 + GOO_HOST: agraph-ut + GOO_PATH_QUERY: /repositories/ontoportal_test + GOO_PATH_DATA: /repositories/ontoportal_test/statements + GOO_PATH_UPDATE: /repositories/ontoportal_test/statements + # profiles: + # - ag + depends_on: + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + agraph-ut: + condition: service_healthy diff --git a/dev/compose/linux/fs.yml b/dev/compose/linux/fs.yml new file mode 100644 index 00000000..c2bade88 --- /dev/null +++ b/dev/compose/linux/fs.yml @@ -0,0 +1,15 @@ +services: + test-linux: + environment: + GOO_BACKEND_NAME: '4store' + GOO_HOST: 4store-ut + GOO_PORT: 9000 + # profiles: + # - fs + depends_on: + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + 4store-ut: + condition: service_healthy diff --git a/dev/compose/linux/gd.yml b/dev/compose/linux/gd.yml new file mode 100644 index 00000000..f83a32f6 --- /dev/null +++ b/dev/compose/linux/gd.yml @@ -0,0 +1,19 @@ +services: + test-linux: + environment: + GOO_BACKEND_NAME: graphdb + GOO_PORT: 7200 + GOO_HOST: graphdb-ut + GOO_PATH_QUERY: /repositories/ontoportal_test + GOO_PATH_DATA: /repositories/ontoportal_test/statements + GOO_PATH_UPDATE: /repositories/ontoportal_test/statements + depends_on: + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + graphdb-ut: + condition: service_healthy + + graphdb-ut: + ports: diff --git a/dev/compose/linux/no-ports.yml b/dev/compose/linux/no-ports.yml new file mode 100644 index 00000000..f42191b9 --- /dev/null +++ b/dev/compose/linux/no-ports.yml @@ -0,0 +1,13 @@ +services: + redis-ut: + ports: [] + solr-ut: + ports: [] + agraph-ut: + ports: [] + 4store-ut: + ports: [] + virtuoso-ut: + ports: [] + graphdb-ut: + ports: [] diff --git a/dev/compose/linux/vo.yml b/dev/compose/linux/vo.yml new file mode 100644 index 00000000..37374616 --- /dev/null +++ b/dev/compose/linux/vo.yml @@ -0,0 +1,18 @@ +services: + test-linux: + environment: + GOO_BACKEND_NAME: 'virtuoso' + GOO_HOST: virtuoso-ut + GOO_PORT: 8890 + GOO_PATH_QUERY: /sparql + GOO_PATH_DATA: /sparql + GOO_PATH_UPDATE: /sparql + # profiles: + # - vo + depends_on: + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + virtuoso-ut: + condition: service_healthy diff --git a/docker-compose.yml b/docker-compose.yml index 4ddd6807..2b1ae3b0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,75 +1,110 @@ +# unit tests in containerased env services: + test-linux: + build: + context: . + args: + RUBY_VERSION: '3.2' + command: ["bash", "-lc", "bundle exec rake test"] + environment: + COVERAGE: 'true' # enable simplecov code coverage + REDIS_HOST: redis-ut + SEARCH_SERVER_URL: http://solr-ut:8983/solr + depends_on: + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + profiles: + - linux + redis-ut: image: redis ports: - 6379:6379 + command: ["redis-server", "--save", "", "--appendonly", "no"] healthcheck: test: redis-cli ping - interval: 1s + interval: 10s timeout: 3s - retries: 30 + retries: 10 solr-ut: - image: solr:8.11.2 + image: solr:9 + command: bin/solr start -cloud -f ports: - 8983:8983 - command: bin/solr start -cloud -f + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:8983/solr/admin/info/system?wt=json"] + start_period: 5s + interval: 10s + timeout: 5s + retries: 5 - agraph-ut: - image: franzinc/agraph:v8.3.1 + 4store-ut: + image: bde2020/4store platform: linux/amd64 + ports: + - 9000:9000 + command: > + bash -c "4s-backend-setup --segments 4 ontoportal_test + && 4s-backend ontoportal_test + && 4s-httpd -D -s-1 -p 9000 ontoportal_test" + healthcheck: + test: ["CMD", "4s-backend-info", "ontoportal_test"] + start_period: 5s + interval: 10s + timeout: 10s + retries: 5 + profiles: + - fs + + agraph-ut: + image: franzinc/agraph:v8.4.3 + platform: linux/amd64 #agraph doesn't provide arm platform environment: - AGRAPH_SUPER_USER=test - AGRAPH_SUPER_PASSWORD=xyzzy shm_size: 1g ports: - # - 10035:10035 - - 10000-10035:10000-10035 - volumes: - - agdata:/agraph/data - # - ./agraph/etc:/agraph/etc + - 10035:10035 command: > - bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start - ; agtool repos create ontoportal_test --supersede - ; agtool users add anonymous - ; agtool users grant anonymous root:ontoportal_test:rw - ; tail -f /agraph/data/agraph.log" - # healthcheck: - # test: ["CMD-SHELL", "curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1"] - # start_period: 10s - # interval: 10s - # timeout: 5s - # retries: 5 + bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start + ; agtool repos create --supersede ontoportal_test + ; agtool users add anonymous + ; agtool users grant anonymous root:ontoportal_test:rw + ; tail -f /agraph/data/agraph.log" + healthcheck: + test: ["CMD", "agtool", "storage-report", "ontoportal_test"] + start_period: 30s #AllegroGraph can take a loooooong time to start + interval: 20s + timeout: 10s + retries: 20 profiles: - ag - 4store-ut: - image: bde2020/4store - platform: linux/amd64 - ports: - - 9000:9000 - command: > - bash -c "4s-backend-setup --segments 4 ontoportal_kb - && 4s-backend ontoportal_kb - && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" - profiles: - - fs virtuoso-ut: - image: tenforce/virtuoso:virtuoso7.2.5 - platform: linux/amd64 + image: openlink/virtuoso-opensource-7:7.2.16 environment: - SPARQL_UPDATE=true + - DBA_PASSWORD=dba + - DAV_PASSWORD=dba ports: - 1111:1111 - 8890:8890 - + volumes: + - ./test/fixtures/backends/virtuoso_initdb_d:/initdb.d + healthcheck: + test: [ "CMD-SHELL", "echo 'status();' | isql localhost:1111 dba dba || exit 1" ] + start_period: 10s + interval: 10s + timeout: 5s + retries: 3 profiles: - vo - graphdb: - image: ontotext/graphdb:10.3.3 - platform: linux/amd64 - privileged: true + graphdb-ut: + image: ontotext/graphdb:10.8.12 environment: GDB_HEAP_SIZE: 5G GDB_JAVA_OPTS: >- @@ -77,18 +112,14 @@ services: ports: - 7200:7200 - 7300:7300 + healthcheck: + test: [ "CMD", "curl", "-sf", "http://localhost:7200/repositories/ontoportal_test/health" ] + start_period: 10s + interval: 10s volumes: - - ./test/data/graphdb-repo-config.ttl:/opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl - - ./test/data/graphdb-test-load.nt:/opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt - + - ./test/fixtures/backends/graphdb:/opt/graphdb/dist/configs/templates/data entrypoint: > - bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt ; graphdb -Ddefault.min.distinct.threshold=3000 " + bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt + ; graphdb -Ddefault.min.distinct.threshold=3000 " profiles: - - gb - -volumes: - agdata: - - - - + - gd diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake index c84879a9..c3cd32aa 100644 --- a/rakelib/docker_based_test.rake +++ b/rakelib/docker_based_test.rake @@ -1,121 +1,240 @@ -# Rake tasks for running unit tests with backend services running as docker containers - -desc 'Run unit tests with docker based backend' +# Docker compose driven unit test orchestration +# +# Notes: +# - Backend names match compose profile names (ag, fs, vo, gd). +# - Hostnames are NOT set here. The app defaults them (localhost for host runs). +# - Linux container env is provided via compose override files: +# dev/compose/linux/ag.yml +# dev/compose/linux/fs.yml +# dev/compose/linux/vo.yml +# dev/compose/linux/gd.yml namespace :test do namespace :docker do - task :up do - system("docker compose up -d") || abort("Unable to start docker containers") - unless system("curl -sf http://localhost:8983/solr || exit 1") - printf("waiting for Solr container to initialize") - sec = 0 - until system("curl -sf http://localhost:8983/solr || exit 1") do - sleep(1) - printf(".") - sec += 1 - if sec > 30 - abort(" Solr container hasn't initialized properly") - end - end - printf("\n") - end + BASE_COMPOSE = 'docker-compose.yml' + LINUX_OVERRIDE_DIR = 'dev/compose/linux' + LINUX_NO_PORTS_OVERRIDE = "#{LINUX_OVERRIDE_DIR}/no-ports.yml" + TIMEOUT = (ENV['OP_TEST_DOCKER_TIMEOUT'] || '600').to_i + DEFAULT_BACKEND = (ENV['OP_TEST_DOCKER_BACKEND'] || 'fs').to_sym + + # Minimal per-backend config for host runs only. + # Do not set hostnames here. The app defaults them. + BACKENDS = { + ag: { + host_env: { + 'GOO_BACKEND_NAME' => 'allegrograph', + 'GOO_PORT' => '10035', + 'GOO_PATH_QUERY' => '/repositories/ontoportal_test', + 'GOO_PATH_DATA' => '/repositories/ontoportal_test/statements', + 'GOO_PATH_UPDATE' => '/repositories/ontoportal_test/statements' + } + }, + fs: { + host_env: { + 'GOO_BACKEND_NAME' => '4store', + 'GOO_PORT' => '9000', + 'GOO_PATH_QUERY' => '/sparql/', + 'GOO_PATH_DATA' => '/data/', + 'GOO_PATH_UPDATE' => '/update/' + } + }, + vo: { + host_env: { + 'GOO_BACKEND_NAME' => 'virtuoso', + 'GOO_PORT' => '8890', + 'GOO_PATH_QUERY' => '/sparql', + 'GOO_PATH_DATA' => '/sparql', + 'GOO_PATH_UPDATE' => '/sparql' + } + }, + gd: { + host_env: { + 'GOO_BACKEND_NAME' => 'graphdb', + 'GOO_PORT' => '7200', + 'GOO_PATH_QUERY' => '/repositories/ontoportal_test', + 'GOO_PATH_DATA' => '/repositories/ontoportal_test/statements', + 'GOO_PATH_UPDATE' => '/repositories/ontoportal_test/statements' + } + } + }.freeze + + def abort_with(msg) + warn(msg) + exit(1) + end + + def shell!(cmd) + system(cmd) || abort_with("Command failed: #{cmd}") + end + + def cfg!(key) + cfg = BACKENDS[key] + abort_with("Unknown backend key: #{key}. Supported: #{BACKENDS.keys.join(', ')}") unless cfg + cfg + end + + def compose_files(*files) + files.flatten.map { |f| "-f #{f}" }.join(' ') + end + + def linux_override_for(key) + "#{LINUX_OVERRIDE_DIR}/#{key}.yml" + end + + def compose_up(key, files:) + # Host tests use only the backend profile. Linux tests add the linux profile. + # `docker compose up --wait` only applies to services started by `up`, + # so linux runs still call `run` separately after this wait completes. + shell!("docker compose #{compose_files(files)} --profile #{key} up -d --wait --wait-timeout #{TIMEOUT}") + end + + def compose_down(files:) + return puts('OP_KEEP_CONTAINERS=1 set, skipping docker compose down') if ENV['OP_KEEP_CONTAINERS'] == '1' + + shell!( + "docker compose #{compose_files(files)} " \ + '--profile ag --profile fs --profile vo --profile gd --profile linux down' + ) + end + def apply_host_env(key) + cfg!(key)[:host_env].each { |k, v| ENV[k] = v } end - task :down do - #system("docker compose --profile fs --profile ag stop") - #system("docker compose --profile fs --profile ag kill") + + def run_host_tests(key) + apply_host_env(key) + files = [BASE_COMPOSE] + + compose_up(key, files: files) + Rake::Task['test'].invoke + end + + def run_linux_tests(key) + override = linux_override_for(key) + abort_with("Missing compose override file: #{override}") unless File.exist?(override) + abort_with("Missing compose override file: #{LINUX_NO_PORTS_OVERRIDE}") unless File.exist?(LINUX_NO_PORTS_OVERRIDE) + + files = [BASE_COMPOSE, override, LINUX_NO_PORTS_OVERRIDE] + # docker compose is handleling wait_for_healthy + compose_up(key, files: files) + + shell!( + "docker compose #{compose_files(files)} --profile linux --profile #{key} " \ + 'run --rm --build test-linux bundle exec rake test TESTOPTS="-v"' + ) end - desc "run tests with docker AG backend" + + def run_linux_shell(key) + override = linux_override_for(key) + abort_with("Missing compose override file: #{override}") unless File.exist?(override) + abort_with("Missing compose override file: #{LINUX_NO_PORTS_OVERRIDE}") unless File.exist?(LINUX_NO_PORTS_OVERRIDE) + + files = [BASE_COMPOSE, override, LINUX_NO_PORTS_OVERRIDE] + compose_up(key, files: files) + + shell!( + "docker compose #{compose_files(files)} --profile linux --profile #{key} " \ + 'run --rm --build test-linux bash' + ) + end + + # + # Public tasks + # + + desc 'Run unit tests with AllegroGraph backend (docker deps, host Ruby)' task :ag do - ENV["GOO_BACKEND_NAME"]="allegrograph" - ENV["GOO_PORT"]="10035" - ENV["GOO_PATH_QUERY"]="/repositories/ontoportal_test" - ENV["GOO_PATH_DATA"]="/repositories/ontoportal_test/statements" - ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal_test/statements" - ENV["COMPOSE_PROFILES"]="ag" - Rake::Task["test:docker:up"].invoke - # AG takes some time to start and create databases/accounts - # TODO: replace system curl command with native ruby code - unless system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") - printf("waiting for AllegroGraph container to initialize") - sec = 0 - until system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") do - sleep(1) - printf(".") - sec += 1 - end + run_host_tests(:ag) + ensure + Rake::Task['test'].reenable + compose_down(files: [BASE_COMPOSE]) + end + + desc 'Run unit tests with AllegroGraph backend (docker deps, Linux container)' + task 'ag:linux' do + files = [BASE_COMPOSE, linux_override_for(:ag)] + begin + run_linux_tests(:ag) + ensure + compose_down(files: files) end - puts - system("docker compose ps") # TODO: remove after GH actions troubleshooting is complete - Rake::Task["test"].invoke - Rake::Task["test:docker:down"].invoke end - desc "run tests with docker 4store backend" + desc 'Run unit tests with 4store backend (docker deps, host Ruby)' task :fs do - ENV["GOO_PORT"]="9000" - ENV["COMPOSE_PROFILES"]='fs' - Rake::Task["test:docker:up"].invoke - Rake::Task["test"].invoke - Rake::Task["test:docker:down"].invoke + run_host_tests(:fs) + ensure + Rake::Task['test'].reenable + compose_down(files: [BASE_COMPOSE]) + end + + desc 'Run unit tests with 4store backend (docker deps, Linux container)' + task 'fs:linux' do + files = [BASE_COMPOSE, linux_override_for(:fs)] + begin + run_linux_tests(:fs) + ensure + compose_down(files: files) + end end - desc "run tests with docker Virtuoso backend" + desc 'Run unit tests with Virtuoso backend (docker deps, host Ruby)' task :vo do - ENV["GOO_BACKEND_NAME"]="virtuoso" - ENV["GOO_PORT"]="8890" - ENV["GOO_PATH_QUERY"]="/sparql" - ENV["GOO_PATH_DATA"]="/sparql" - ENV["GOO_PATH_UPDATE"]="/sparql" - ENV["COMPOSE_PROFILES"]="vo" - Rake::Task["test:docker:up"].invoke - # - unless system("curl -sf http://localhost:8890/sparql || exit 1") - printf("waiting for Virtuoso container to initialize") - sec = 0 - until system("curl -sf http://localhost:8890/sparql || exit 1") do - sleep(1) - printf(".") - sec += 1 - if sec > 30 - system("docker compose logs virtuoso-ut") - abort(" Virtuoso container hasn't initialized properly") - end - end + run_host_tests(:vo) + ensure + Rake::Task['test'].reenable + compose_down(files: [BASE_COMPOSE]) + end + + desc 'Run unit tests with Virtuoso backend (docker deps, Linux container)' + task 'vo:linux' do + files = [BASE_COMPOSE, linux_override_for(:vo)] + begin + run_linux_tests(:vo) + ensure + compose_down(files: files) end - Rake::Task["test"].invoke - Rake::Task["test:docker:down"].invoke - end - - - desc "run tests with docker GraphDb backend" - task :gb do - ENV["GOO_BACKEND_NAME"]="graphdb" - ENV["GOO_PORT"]="7200" - ENV["GOO_PATH_QUERY"]="/repositories/ontoportal" - ENV["GOO_PATH_DATA"]="/repositories/ontoportal/statements" - ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal/statements" - ENV["COMPOSE_PROFILES"]="gb" - Rake::Task["test:docker:up"].invoke - - #system("docker compose cp ./test/data/graphdb-repo-config.ttl graphdb:/opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl") - #system("docker compose cp ./test/data/graphdb-test-load.nt graphdb:/opt/graphdb/dist/configs/templates/graphdb-test-load.nt") - #system('docker compose exec graphdb sh -c "importrdf load -f -c /opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/graphdb-test-load.nt ;"') - unless system("curl -sf http://localhost:7200/repositories || exit 1") - printf("waiting for Graphdb container to initialize") - sec = 0 - until system("curl -sf http://localhost:7200/repositories || exit 1") do - sleep(1) - printf(".") - sec += 1 - if sec > 30 - system("docker compose logs graphdb") - abort(" Graphdb container hasn't initialized properly") - end - end + end + + desc 'Run unit tests with GraphDB backend (docker deps, host Ruby)' + task :gd do + run_host_tests(:gd) + ensure + Rake::Task['test'].reenable + compose_down(files: [BASE_COMPOSE]) + end + + desc 'Run unit tests with GraphDB backend (docker deps, Linux container)' + task 'gd:linux' do + files = [BASE_COMPOSE, linux_override_for(:gd)] + begin + run_linux_tests(:gd) + ensure + compose_down(files: files) end - Rake::Task["test"].invoke - Rake::Task["test:docker:down"].invoke end + desc 'Start a shell in the Linux test container (default backend: fs)' + task :shell, [:backend] do |_t, args| + key = (args[:backend] || DEFAULT_BACKEND).to_sym + cfg!(key) + files = [BASE_COMPOSE, linux_override_for(key), LINUX_NO_PORTS_OVERRIDE] + begin + run_linux_shell(key) + ensure + compose_down(files: files) + end + end + + desc 'Start backend services for development (default backend: fs)' + task :up, [:backend] do |_t, args| + key = (args[:backend] || DEFAULT_BACKEND).to_sym + cfg!(key) + compose_up(key, files: [BASE_COMPOSE]) + end + + desc 'Stop backend services for development (default backend: fs)' + task :down, [:backend] do |_t, args| + compose_down(files: [BASE_COMPOSE]) + end end end diff --git a/test/data/graphdb-repo-config.ttl b/test/fixtures/backends/graphdb/graphdb-repo-config.ttl similarity index 95% rename from test/data/graphdb-repo-config.ttl rename to test/fixtures/backends/graphdb/graphdb-repo-config.ttl index 9200da9a..84032a0b 100644 --- a/test/data/graphdb-repo-config.ttl +++ b/test/fixtures/backends/graphdb/graphdb-repo-config.ttl @@ -3,8 +3,8 @@ @prefix sail: . @prefix xsd: . -<#ontoportal> a rep:Repository; - rep:repositoryID "ontoportal"; +<#ontoportal_test> a rep:Repository; + rep:repositoryID "ontoportal_test"; rep:repositoryImpl [ rep:repositoryType "graphdb:SailRepository"; [ @@ -30,4 +30,4 @@ sail:sailType "owlim:Sail" ] ]; - rdfs:label "" . \ No newline at end of file + rdfs:label "" . diff --git a/test/data/graphdb-test-load.nt b/test/fixtures/backends/graphdb/graphdb-test-load.nt similarity index 100% rename from test/data/graphdb-test-load.nt rename to test/fixtures/backends/graphdb/graphdb-test-load.nt diff --git a/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql b/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql new file mode 100644 index 00000000..d509c6fb --- /dev/null +++ b/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql @@ -0,0 +1,3 @@ +GRANT EXECUTE ON DB.DBA.SPARQL_INSERT_DICT_CONTENT TO "SPARQL"; +GRANT SPARQL_UPDATE TO "SPARQL"; +DB.DBA.RDF_DEFAULT_USER_PERMS_SET ('nobody', 7); From b9b1437033a1815c4ca3a5659302801008370a06 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Feb 2026 10:57:24 -0800 Subject: [PATCH 094/106] change triplestore name for GraphDB from gb to gd --- .github/workflows/ruby-unit-test.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml index 7c5c6153..85233f9c 100644 --- a/.github/workflows/ruby-unit-test.yml +++ b/.github/workflows/ruby-unit-test.yml @@ -1,6 +1,12 @@ name: Ruby Unit Test -on: [push, pull_request] +on: + push: + branches: + - '**' + tags-ignore: + - '**' # ignore all tag pushes + pull_request: permissions: contents: read @@ -13,11 +19,11 @@ jobs: fail-fast: false matrix: goo-slice: [ '100'] - ruby-version: [ '3.2.0' ] - triplestore: [ 'fs', 'ag', 'vo', 'gb' ] + ruby-version: [ '3.2' ] + triplestore: [ 'fs', 'ag', 'vo', 'gd' ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt-get update && sudo apt-get -y install raptor2-utils - name: Set up Ruby @@ -29,8 +35,8 @@ jobs: # tempoaray workaround for the config.rb file requirement run: echo 'Goo.config do |config| end' > config/config.rb - name: List directory contents - run: ls -R ./test/data + run: ls -R ./test/fixtures - name: Run tests run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v5 From a8989fe876773a43cdfa5a50ac78d83099ffcba6 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Feb 2026 11:43:00 -0800 Subject: [PATCH 095/106] fix: correct redis config rename to goo_redis - ensure redis host is read from goo_redis (not redis) - update sample config to match --- config/config.rb.sample | 8 ++++---- lib/goo/config/config.rb | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/config/config.rb.sample b/config/config.rb.sample index 0429e9cd..edac79df 100644 --- a/config/config.rb.sample +++ b/config/config.rb.sample @@ -1,14 +1,14 @@ Goo.config do |config| # 4store config.goo_backend_name = '4store' - config.goo_port = 8080 + config.goo_port = 9000 config.goo_host = 'localhost' config.goo_path_query = '/sparql/' config.goo_path_data = '/data/' config.goo_path_update = '/update/' # AllegroGraph - # config.goo_backend_name = 'AG' + # config.goo_backend_name = 'allegrograph' # config.goo_port = 10035 # config.goo_host = 'localhost' # config.goo_path_query = "/repositories/ontoportal" @@ -16,8 +16,8 @@ Goo.config do |config| # config.goo_path_update = "/repositories/ontoportal/statements/" config.search_server_url = 'http://localhost:8983/solr/' - config.redis_host = 'localhost' - config.redis_port = 6379 + config.goo_redis_host = 'localhost' + config.goo_redis_port = 6379 config.bioportal_namespace = 'http://data.bioontology.org/' config.queries_debug = false end diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index d842b152..06cafae4 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -21,8 +21,8 @@ def config(&block) @settings.goo_path_data ||= ENV['GOO_PATH_DATA'] || '/data/' @settings.goo_path_update ||= ENV['GOO_PATH_UPDATE'] || '/update/' @settings.search_server_url ||= ENV['SEARCH_SERVER_URL'] || 'http://localhost:8983/solr' - @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' - @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 + @settings.goo_redis_host ||= ENV['REDIS_HOST'] || 'localhost' + @settings.goo_redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' @settings.query_logging ||= ENV['QUERIES_LOGGING'] || false @settings.query_logging_file ||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log' @@ -30,7 +30,7 @@ def config(&block) @settings.slice_loading_size ||= ENV['GOO_SLICES']&.to_i || 500 puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" puts "(GOO) >> Using term search server at #{@settings.search_server_url}" - puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" + puts "(GOO) >> Using Redis instance at #{@settings.goo_redis_host}:#{@settings.goo_redis_port}" puts "(GOO) >> Using Query logging: #{@settings.query_logging_file}" if @settings.query_logging connect_goo From d70fb4b92858e748199ed88ea02d30d16b4589cb Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Feb 2026 13:08:52 -0800 Subject: [PATCH 096/106] clear out unused settings --- dev/compose/linux/ag.yml | 2 -- dev/compose/linux/fs.yml | 2 -- dev/compose/linux/vo.yml | 2 -- 3 files changed, 6 deletions(-) diff --git a/dev/compose/linux/ag.yml b/dev/compose/linux/ag.yml index 89ca5090..b56d9c8e 100644 --- a/dev/compose/linux/ag.yml +++ b/dev/compose/linux/ag.yml @@ -7,8 +7,6 @@ services: GOO_PATH_QUERY: /repositories/ontoportal_test GOO_PATH_DATA: /repositories/ontoportal_test/statements GOO_PATH_UPDATE: /repositories/ontoportal_test/statements - # profiles: - # - ag depends_on: solr-ut: condition: service_healthy diff --git a/dev/compose/linux/fs.yml b/dev/compose/linux/fs.yml index c2bade88..27acf4b3 100644 --- a/dev/compose/linux/fs.yml +++ b/dev/compose/linux/fs.yml @@ -4,8 +4,6 @@ services: GOO_BACKEND_NAME: '4store' GOO_HOST: 4store-ut GOO_PORT: 9000 - # profiles: - # - fs depends_on: solr-ut: condition: service_healthy diff --git a/dev/compose/linux/vo.yml b/dev/compose/linux/vo.yml index 37374616..c47dd654 100644 --- a/dev/compose/linux/vo.yml +++ b/dev/compose/linux/vo.yml @@ -7,8 +7,6 @@ services: GOO_PATH_QUERY: /sparql GOO_PATH_DATA: /sparql GOO_PATH_UPDATE: /sparql - # profiles: - # - vo depends_on: solr-ut: condition: service_healthy From b1797df826d5c2d13b10b2a7cf7eceb1ac7d923e Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Feb 2026 13:09:17 -0800 Subject: [PATCH 097/106] test: centralize safety guard + move test_reset into test helper When adding rake tasks for running unit tests in linux containers, unit tests started failing due to unsafe target detection and state leakage. This refactor consolidates safety checks into TestSafety, moves test_reset into test helpers. It also adds a redis key-count guard to abort against non-test instances. --- lib/goo/config/config.rb | 13 ------- test/test_cache.rb | 2 +- test/test_case.rb | 76 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 15 deletions(-) diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index 06cafae4..37b59e01 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -69,17 +69,4 @@ def connect_goo end end - def self.test_reset - if @@sparql_backends[:main][:query].url.to_s["localhost"].nil? - raise Exception, "only for testing" - end - @@sparql_backends = {} - Goo.add_sparql_backend(:main, - backend_name: @settings.goo_backend_name, - query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", - data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", - update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", - options: { rules: :NONE }) - end - end diff --git a/test/test_cache.rb b/test/test_cache.rb index 11dccf41..aff0c426 100644 --- a/test/test_cache.rb +++ b/test/test_cache.rb @@ -154,7 +154,7 @@ def x.response *args #different query programs = Program.where(name: "BioInformatics X", university: [ name: "Stanford" ]).all end - Goo.test_reset + TestHelpers.test_reset Goo.use_cache=false end diff --git a/test/test_case.rb b/test/test_case.rb index af7f2a84..22fd77bc 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -22,6 +22,81 @@ require_relative "../lib/goo.rb" require_relative '../config/config' +# Safety guard for destructive tests: ensure test targets are safe (localhost or -ut suffix) +module TestSafety + SAFE_HOSTS = Regexp.new(/localhost|-ut/) + MAX_REDIS_KEYS = 10 + + def self.safe_host?(value) + value = value.to_s + return false if value.empty? + !!(value =~ SAFE_HOSTS) + end + + def self.targets + { + triplestore: Goo.settings.goo_host.to_s, + search: Goo.settings.search_server_url.to_s, + redis: Goo.settings.goo_redis_host.to_s + } + end + + def self.unsafe_targets? + t = targets + unsafe = !safe_host?(t[:triplestore]) || !safe_host?(t[:search]) || !safe_host?(t[:redis]) + [unsafe, t] + end + + def self.ensure_safe_test_targets! + return if @safety_checked + unsafe, t = unsafe_targets? + return if !unsafe || ENV['CI'] == 'true' + + if $stdin.tty? + puts "\n\n================================== WARNING ==================================\n" + puts "** TESTS CAN BE DESTRUCTIVE -- YOU ARE POINTING TO A POTENTIAL PRODUCTION/STAGE SERVER **" + puts "Servers:" + puts "triplestore -- #{t[:triplestore]}" + puts "search -- #{t[:search]}" + puts "redis -- #{t[:redis]}" + print "Type 'y' to continue: " + $stdout.flush + confirm = $stdin.gets + abort('Canceling tests...') unless confirm && confirm.strip == 'y' + puts 'Running tests...' + $stdout.flush + else + abort('Aborting tests: non-whitelisted targets and non-interactive session.') + end + ensure + @safety_checked = true + end + + def self.ensure_safe_redis_size! + redis = Goo.redis_client + return unless redis + count = redis.dbsize + return if count <= MAX_REDIS_KEYS + abort("Aborting tests: redis has #{count} keys, expected <= #{MAX_REDIS_KEYS} for a test instance.") + end +end + +TestSafety.ensure_safe_test_targets! + +module TestHelpers + def self.test_reset + TestSafety.ensure_safe_test_targets! + TestSafety.ensure_safe_redis_size! + Goo.class_variable_set(:@@sparql_backends, {}) + Goo.add_sparql_backend(:main, + backend_name: Goo.settings.goo_backend_name, + query: "http://#{Goo.settings.goo_host}:#{Goo.settings.goo_port}#{Goo.settings.goo_path_query}", + data: "http://#{Goo.settings.goo_host}:#{Goo.settings.goo_port}#{Goo.settings.goo_path_data}", + update: "http://#{Goo.settings.goo_host}:#{Goo.settings.goo_port}#{Goo.settings.goo_path_update}", + options: { rules: :NONE }) + end +end + class GooTest class Unit < MiniTest::Unit @@ -79,4 +154,3 @@ def self.count_pattern(pattern) end end - From fb4e00f8928867fc6bce953b4aa8f946c793e7cd Mon Sep 17 00:00:00 2001 From: = Date: Fri, 27 Feb 2026 16:09:31 -0800 Subject: [PATCH 098/106] non-deterministic test failures fix #1 of several --- lib/goo/base/resource.rb | 2 +- test/models.rb | 34 +++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index 8aeeb570..375ca32d 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -247,7 +247,7 @@ def save(*opts) if !batch_file return self if !modified? && persistent? - raise Goo::Base::NotValidException, "Object is not valid. Check errors." unless valid? + raise Goo::Base::NotValidException, "Object is not valid. Errors: #{errors}" unless valid? end #set default values before saving diff --git a/test/models.rb b/test/models.rb index 7d490a4a..78ec1393 100644 --- a/test/models.rb +++ b/test/models.rb @@ -64,11 +64,26 @@ class Student < Goo::Base::Resource end module GooTestData + TRACKED_FIXTURE_MODELS = [Student, University, Program, Category, Address].freeze + + def self.safe_model_count(model) + model.where.include(model.attributes).all.length + rescue StandardError => e + warn "[GooTestData] count failed for #{model.name}: #{e.class}: #{e.message}" + -1 + end + + def self.log_fixture_counts(stage) + summary = TRACKED_FIXTURE_MODELS.map { |m| "#{m.name}=#{safe_model_count(m)}" }.join(", ") + puts "[GooTestData] #{stage}: #{summary}" + end + def self.create_test_case_data + log_fixture_counts("before create_test_case_data") addresses = {} - addresses["Stanford"] = [ Address.new(line1: "bla", line2: "foo", country: "US").save ] - addresses["Southampton"] = [ Address.new(line1: "bla", line2: "foo", country: "UK").save ] - addresses["UPM"] = [ Address.new(line1: "bla", line2: "foo", country: "SP").save ] + addresses["Stanford"] = [Address.where(line1: "bla", line2: "foo", country: "US").first || Address.new(line1: "bla", line2: "foo", country: "US").save] + addresses["Southampton"] = [Address.where(line1: "bla", line2: "foo", country: "UK").first || Address.new(line1: "bla", line2: "foo", country: "UK").save] + addresses["UPM"] = [Address.where(line1: "bla", line2: "foo", country: "SP").first || Address.new(line1: "bla", line2: "foo", country: "SP").save] ["Stanford", "Southampton", "UPM"].each do |uni_name| if University.find(uni_name).nil? University.new(name: uni_name, address: addresses[uni_name]).save @@ -79,7 +94,9 @@ def self.create_test_case_data end prg = Program.new(name: p, category: categories, credits: credits, university: University.find(uni_name).include(:name).first ) - binding.pry if !prg.valid? + unless prg.valid? + raise "Program fixture is invalid for university=#{uni_name.inspect}, program=#{p.inspect}. Errors: #{prg.errors.inspect}" + end prg.save if !prg.exist? end end @@ -96,12 +113,19 @@ def self.create_test_case_data programs << pr end st.enrolled= programs - st.save rescue binding.pry + begin + st.save + rescue StandardError => e + raise "#{e.class}: failed saving student fixture #{st_data[0].inspect}. #{e.message}" + end end + log_fixture_counts("after create_test_case_data") end def self.delete_test_case_data + log_fixture_counts("before delete_test_case_data") delete_all [Student, University, Program, Category, Address] + log_fixture_counts("after delete_test_case_data") end def self.delete_all(objects) From 71385a8db443e5fcdfe4910404208e49f49f83b3 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Sat, 28 Feb 2026 01:53:27 -0800 Subject: [PATCH 099/106] Infra/ontoportal testkit integration (#174) --- .github/workflows/ruby-unit-test.yml | 42 --- .github/workflows/testkit-unit-tests.yml | 80 ++++++ .ontoportal-testkit.yml | 8 + .ruby-version | 1 + Dockerfile | 22 +- Gemfile | 5 +- Gemfile.lock | 18 +- config/config.test.rb | 2 + docker-compose.yml | 125 --------- rakelib/docker_based_test.rake | 240 ------------------ rakelib/ontoportal_testkit.rake | 2 + .../backends/graphdb/graphdb-repo-config.ttl | 33 --- .../backends/graphdb/graphdb-test-load.nt | 0 .../virtuoso-grant-write-sparql-access.sql | 3 - test/test_case.rb | 2 +- 15 files changed, 112 insertions(+), 471 deletions(-) delete mode 100644 .github/workflows/ruby-unit-test.yml create mode 100644 .github/workflows/testkit-unit-tests.yml create mode 100644 .ontoportal-testkit.yml create mode 100644 .ruby-version create mode 100644 config/config.test.rb delete mode 100644 docker-compose.yml delete mode 100644 rakelib/docker_based_test.rake create mode 100644 rakelib/ontoportal_testkit.rake delete mode 100644 test/fixtures/backends/graphdb/graphdb-repo-config.ttl delete mode 100644 test/fixtures/backends/graphdb/graphdb-test-load.nt delete mode 100644 test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml deleted file mode 100644 index 85233f9c..00000000 --- a/.github/workflows/ruby-unit-test.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Ruby Unit Test - -on: - push: - branches: - - '**' - tags-ignore: - - '**' # ignore all tag pushes - pull_request: - -permissions: - contents: read - -jobs: - test: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - goo-slice: [ '100'] - ruby-version: [ '3.2' ] - triplestore: [ 'fs', 'ag', 'vo', 'gd' ] - - steps: - - uses: actions/checkout@v4 - - name: Install Dependencies - run: sudo apt-get update && sudo apt-get -y install raptor2-utils - - name: Set up Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: ${{ matrix.ruby-version }} - bundler-cache: true # runs 'bundle install' and caches installed gems automatically - - name: Add config file - # tempoaray workaround for the config.rb file requirement - run: echo 'Goo.config do |config| end' > config/config.rb - - name: List directory contents - run: ls -R ./test/fixtures - - name: Run tests - run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 diff --git a/.github/workflows/testkit-unit-tests.yml b/.github/workflows/testkit-unit-tests.yml new file mode 100644 index 00000000..b23760fc --- /dev/null +++ b/.github/workflows/testkit-unit-tests.yml @@ -0,0 +1,80 @@ +name: Docker Unit Tests + +on: + push: + branches: + - '**' + tags-ignore: + - '**' + pull_request: + +env: + # CI execution mode for backend tests: + # - container: run `test:docker::container` (default) + # - native: run `test:docker:` on host Ruby + # OPTK_CI_RUN_MODE: ${{ vars.OPTK_CI_RUN_MODE || 'container' }} + # Example override to force native mode in this workflow file: + OPTK_CI_RUN_MODE: native + +jobs: + prepare: + runs-on: ubuntu-latest + outputs: + backends: ${{ steps.cfg.outputs.backends }} + steps: + - uses: actions/checkout@v4 + + - id: cfg + name: Read backend matrix from .ontoportal-testkit.yml + run: | + BACKENDS=$(ruby -ryaml -rjson -e 'c=YAML.safe_load_file(".ontoportal-testkit.yml") || {}; b=c["backends"] || %w[fs ag vo gd]; puts JSON.generate(b)') + echo "backends=$BACKENDS" >> "$GITHUB_OUTPUT" + + test: + needs: prepare + runs-on: ubuntu-latest + timeout-minutes: 45 + strategy: + fail-fast: false + matrix: + backend: ${{ fromJson(needs.prepare.outputs.backends) }} + + steps: + - uses: actions/checkout@v4 + + - name: Install native system dependencies + if: env.OPTK_CI_RUN_MODE == 'native' + run: | + sudo apt-get update + sudo apt-get install -y raptor2-utils + + - name: Set up Ruby from .ruby-version + uses: ruby/setup-ruby@v1 + with: + ruby-version: .ruby-version + bundler-cache: true + + - name: Run unit tests + env: + CI: "true" + TESTOPTS: "-v" + BACKEND: ${{ matrix.backend }} + run: | + MODE="${OPTK_CI_RUN_MODE:-container}" + TASK="test:docker:${BACKEND}" + if [ "$MODE" = "container" ]; then + TASK="${TASK}:container" + elif [ "$MODE" != "native" ]; then + echo "Invalid OPTK_CI_RUN_MODE=$MODE (expected container or native)" + exit 1 + fi + + bundle exec rake "$TASK" + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: unittests,${{ matrix.backend }} + verbose: true + fail_ci_if_error: false diff --git a/.ontoportal-testkit.yml b/.ontoportal-testkit.yml new file mode 100644 index 00000000..75588b98 --- /dev/null +++ b/.ontoportal-testkit.yml @@ -0,0 +1,8 @@ +component_name: goo +app_service: test-container +backends: + - fs + - ag + - vo + - gd +dependency_services: [] diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..e650c01d --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.2.9 diff --git a/Dockerfile b/Dockerfile index 1dd2150b..6c566230 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,32 +1,18 @@ ARG RUBY_VERSION=3.2 ARG DISTRO=bullseye - -FROM ruby:$RUBY_VERSION-$DISTRO - -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - git \ - libxml2 \ - libxslt-dev \ - libxslt1-dev zlib1g-dev \ - # openjdk-11-jre-headless \ - raptor2-utils \ - && rm -rf /var/lib/apt/lists/* +ARG TESTKIT_BASE_IMAGE=ontoportal/testkit-base:ruby${RUBY_VERSION}-${DISTRO} +FROM ${TESTKIT_BASE_IMAGE} WORKDIR /app COPY Gemfile* *.gemspec ./ -#Install the exact Bundler version from Gemfile.lock (if it exists) -RUN gem update --system && \ - if [ -f Gemfile.lock ]; then \ +# Respect the project's Bundler lock when present. +RUN if [ -f Gemfile.lock ]; then \ BUNDLER_VERSION=$(grep -A 1 "BUNDLED WITH" Gemfile.lock | tail -n 1 | tr -d ' '); \ gem install bundler -v "$BUNDLER_VERSION"; \ - else \ - gem install bundler; \ fi -RUN bundle config set --global no-document 'true' RUN bundle install --jobs 4 --retry 3 COPY . ./ diff --git a/Gemfile b/Gemfile index 31a4b42a..e7ee86c9 100644 --- a/Gemfile +++ b/Gemfile @@ -12,6 +12,7 @@ group :test do gem "pry" gem 'simplecov' gem 'simplecov-cobertura' # for submitting code coverage results to codecov.io + gem 'ontoportal_testkit', github: 'alexskr/ontoportal_testkit', branch: 'main' end group :profiling do @@ -24,7 +25,3 @@ end gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'ontoportal-lirmm-development' gem "rdf-raptor", github: "ruby-rdf/rdf-raptor", ref: "6392ceabf71c3233b0f7f0172f662bd4a22cd534" # use version 3.3.0 when available gem 'net-ftp' - -# # to remove if no more supporting ruby 2.7 -# gem 'faraday', '2.7.11' #unpin if we no more support ruby 2.7 -# gem 'public_suffix', '~> 5.1.1' diff --git a/Gemfile.lock b/Gemfile.lock index 2817e9ba..3a135890 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,11 @@ +GIT + remote: https://github.com/alexskr/ontoportal_testkit.git + revision: c154c17f2e8b1a7931b710febfcadb15e566f672 + branch: main + specs: + ontoportal_testkit (0.1.0) + rake (>= 13.0) + GIT remote: https://github.com/ncbo/sparql-client.git revision: 2ac20b217bb7ad2b11305befe0ee77d75e44eac5 @@ -105,7 +113,7 @@ GEM rack-protection (3.2.0) base64 (>= 0.1.0) rack (~> 2.2, >= 2.2.4) - rake (13.2.1) + rake (13.3.1) rdf (3.3.2) bcp47_spec (~> 0.2) bigdecimal (~> 3.1, >= 3.1.5) @@ -161,20 +169,20 @@ GEM date timeout (0.4.3) tzinfo (0.3.62) - uri (1.0.2) + uri (1.1.1) uuid (2.3.9) macaddr (~> 1.0) PLATFORMS - arm64-darwin-23 - arm64-darwin-24 - x86_64-linux + arm64-darwin + x86_64-linux-gnu DEPENDENCIES activesupport goo! minitest (< 5.0) net-ftp + ontoportal_testkit! pry rack-accept rack-post-body-to-params diff --git a/config/config.test.rb b/config/config.test.rb new file mode 100644 index 00000000..972ad2da --- /dev/null +++ b/config/config.test.rb @@ -0,0 +1,2 @@ +Goo.config do |config| +end diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 2b1ae3b0..00000000 --- a/docker-compose.yml +++ /dev/null @@ -1,125 +0,0 @@ -# unit tests in containerased env -services: - test-linux: - build: - context: . - args: - RUBY_VERSION: '3.2' - command: ["bash", "-lc", "bundle exec rake test"] - environment: - COVERAGE: 'true' # enable simplecov code coverage - REDIS_HOST: redis-ut - SEARCH_SERVER_URL: http://solr-ut:8983/solr - depends_on: - solr-ut: - condition: service_healthy - redis-ut: - condition: service_healthy - profiles: - - linux - - redis-ut: - image: redis - ports: - - 6379:6379 - command: ["redis-server", "--save", "", "--appendonly", "no"] - healthcheck: - test: redis-cli ping - interval: 10s - timeout: 3s - retries: 10 - - solr-ut: - image: solr:9 - command: bin/solr start -cloud -f - ports: - - 8983:8983 - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:8983/solr/admin/info/system?wt=json"] - start_period: 5s - interval: 10s - timeout: 5s - retries: 5 - - 4store-ut: - image: bde2020/4store - platform: linux/amd64 - ports: - - 9000:9000 - command: > - bash -c "4s-backend-setup --segments 4 ontoportal_test - && 4s-backend ontoportal_test - && 4s-httpd -D -s-1 -p 9000 ontoportal_test" - healthcheck: - test: ["CMD", "4s-backend-info", "ontoportal_test"] - start_period: 5s - interval: 10s - timeout: 10s - retries: 5 - profiles: - - fs - - agraph-ut: - image: franzinc/agraph:v8.4.3 - platform: linux/amd64 #agraph doesn't provide arm platform - environment: - - AGRAPH_SUPER_USER=test - - AGRAPH_SUPER_PASSWORD=xyzzy - shm_size: 1g - ports: - - 10035:10035 - command: > - bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start - ; agtool repos create --supersede ontoportal_test - ; agtool users add anonymous - ; agtool users grant anonymous root:ontoportal_test:rw - ; tail -f /agraph/data/agraph.log" - healthcheck: - test: ["CMD", "agtool", "storage-report", "ontoportal_test"] - start_period: 30s #AllegroGraph can take a loooooong time to start - interval: 20s - timeout: 10s - retries: 20 - profiles: - - ag - - virtuoso-ut: - image: openlink/virtuoso-opensource-7:7.2.16 - environment: - - SPARQL_UPDATE=true - - DBA_PASSWORD=dba - - DAV_PASSWORD=dba - ports: - - 1111:1111 - - 8890:8890 - volumes: - - ./test/fixtures/backends/virtuoso_initdb_d:/initdb.d - healthcheck: - test: [ "CMD-SHELL", "echo 'status();' | isql localhost:1111 dba dba || exit 1" ] - start_period: 10s - interval: 10s - timeout: 5s - retries: 3 - profiles: - - vo - - graphdb-ut: - image: ontotext/graphdb:10.8.12 - environment: - GDB_HEAP_SIZE: 5G - GDB_JAVA_OPTS: >- - -Xms5g -Xmx5g - ports: - - 7200:7200 - - 7300:7300 - healthcheck: - test: [ "CMD", "curl", "-sf", "http://localhost:7200/repositories/ontoportal_test/health" ] - start_period: 10s - interval: 10s - volumes: - - ./test/fixtures/backends/graphdb:/opt/graphdb/dist/configs/templates/data - entrypoint: > - bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt - ; graphdb -Ddefault.min.distinct.threshold=3000 " - profiles: - - gd diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake deleted file mode 100644 index c3cd32aa..00000000 --- a/rakelib/docker_based_test.rake +++ /dev/null @@ -1,240 +0,0 @@ -# Docker compose driven unit test orchestration -# -# Notes: -# - Backend names match compose profile names (ag, fs, vo, gd). -# - Hostnames are NOT set here. The app defaults them (localhost for host runs). -# - Linux container env is provided via compose override files: -# dev/compose/linux/ag.yml -# dev/compose/linux/fs.yml -# dev/compose/linux/vo.yml -# dev/compose/linux/gd.yml -namespace :test do - namespace :docker do - BASE_COMPOSE = 'docker-compose.yml' - LINUX_OVERRIDE_DIR = 'dev/compose/linux' - LINUX_NO_PORTS_OVERRIDE = "#{LINUX_OVERRIDE_DIR}/no-ports.yml" - TIMEOUT = (ENV['OP_TEST_DOCKER_TIMEOUT'] || '600').to_i - DEFAULT_BACKEND = (ENV['OP_TEST_DOCKER_BACKEND'] || 'fs').to_sym - - # Minimal per-backend config for host runs only. - # Do not set hostnames here. The app defaults them. - BACKENDS = { - ag: { - host_env: { - 'GOO_BACKEND_NAME' => 'allegrograph', - 'GOO_PORT' => '10035', - 'GOO_PATH_QUERY' => '/repositories/ontoportal_test', - 'GOO_PATH_DATA' => '/repositories/ontoportal_test/statements', - 'GOO_PATH_UPDATE' => '/repositories/ontoportal_test/statements' - } - }, - fs: { - host_env: { - 'GOO_BACKEND_NAME' => '4store', - 'GOO_PORT' => '9000', - 'GOO_PATH_QUERY' => '/sparql/', - 'GOO_PATH_DATA' => '/data/', - 'GOO_PATH_UPDATE' => '/update/' - } - }, - vo: { - host_env: { - 'GOO_BACKEND_NAME' => 'virtuoso', - 'GOO_PORT' => '8890', - 'GOO_PATH_QUERY' => '/sparql', - 'GOO_PATH_DATA' => '/sparql', - 'GOO_PATH_UPDATE' => '/sparql' - } - }, - gd: { - host_env: { - 'GOO_BACKEND_NAME' => 'graphdb', - 'GOO_PORT' => '7200', - 'GOO_PATH_QUERY' => '/repositories/ontoportal_test', - 'GOO_PATH_DATA' => '/repositories/ontoportal_test/statements', - 'GOO_PATH_UPDATE' => '/repositories/ontoportal_test/statements' - } - } - }.freeze - - def abort_with(msg) - warn(msg) - exit(1) - end - - def shell!(cmd) - system(cmd) || abort_with("Command failed: #{cmd}") - end - - def cfg!(key) - cfg = BACKENDS[key] - abort_with("Unknown backend key: #{key}. Supported: #{BACKENDS.keys.join(', ')}") unless cfg - cfg - end - - def compose_files(*files) - files.flatten.map { |f| "-f #{f}" }.join(' ') - end - - def linux_override_for(key) - "#{LINUX_OVERRIDE_DIR}/#{key}.yml" - end - - def compose_up(key, files:) - # Host tests use only the backend profile. Linux tests add the linux profile. - # `docker compose up --wait` only applies to services started by `up`, - # so linux runs still call `run` separately after this wait completes. - shell!("docker compose #{compose_files(files)} --profile #{key} up -d --wait --wait-timeout #{TIMEOUT}") - end - - def compose_down(files:) - return puts('OP_KEEP_CONTAINERS=1 set, skipping docker compose down') if ENV['OP_KEEP_CONTAINERS'] == '1' - - shell!( - "docker compose #{compose_files(files)} " \ - '--profile ag --profile fs --profile vo --profile gd --profile linux down' - ) - end - - def apply_host_env(key) - cfg!(key)[:host_env].each { |k, v| ENV[k] = v } - end - - def run_host_tests(key) - apply_host_env(key) - files = [BASE_COMPOSE] - - compose_up(key, files: files) - Rake::Task['test'].invoke - end - - def run_linux_tests(key) - override = linux_override_for(key) - abort_with("Missing compose override file: #{override}") unless File.exist?(override) - abort_with("Missing compose override file: #{LINUX_NO_PORTS_OVERRIDE}") unless File.exist?(LINUX_NO_PORTS_OVERRIDE) - - files = [BASE_COMPOSE, override, LINUX_NO_PORTS_OVERRIDE] - # docker compose is handleling wait_for_healthy - compose_up(key, files: files) - - shell!( - "docker compose #{compose_files(files)} --profile linux --profile #{key} " \ - 'run --rm --build test-linux bundle exec rake test TESTOPTS="-v"' - ) - end - - def run_linux_shell(key) - override = linux_override_for(key) - abort_with("Missing compose override file: #{override}") unless File.exist?(override) - abort_with("Missing compose override file: #{LINUX_NO_PORTS_OVERRIDE}") unless File.exist?(LINUX_NO_PORTS_OVERRIDE) - - files = [BASE_COMPOSE, override, LINUX_NO_PORTS_OVERRIDE] - compose_up(key, files: files) - - shell!( - "docker compose #{compose_files(files)} --profile linux --profile #{key} " \ - 'run --rm --build test-linux bash' - ) - end - - # - # Public tasks - # - - desc 'Run unit tests with AllegroGraph backend (docker deps, host Ruby)' - task :ag do - run_host_tests(:ag) - ensure - Rake::Task['test'].reenable - compose_down(files: [BASE_COMPOSE]) - end - - desc 'Run unit tests with AllegroGraph backend (docker deps, Linux container)' - task 'ag:linux' do - files = [BASE_COMPOSE, linux_override_for(:ag)] - begin - run_linux_tests(:ag) - ensure - compose_down(files: files) - end - end - - desc 'Run unit tests with 4store backend (docker deps, host Ruby)' - task :fs do - run_host_tests(:fs) - ensure - Rake::Task['test'].reenable - compose_down(files: [BASE_COMPOSE]) - end - - desc 'Run unit tests with 4store backend (docker deps, Linux container)' - task 'fs:linux' do - files = [BASE_COMPOSE, linux_override_for(:fs)] - begin - run_linux_tests(:fs) - ensure - compose_down(files: files) - end - end - - desc 'Run unit tests with Virtuoso backend (docker deps, host Ruby)' - task :vo do - run_host_tests(:vo) - ensure - Rake::Task['test'].reenable - compose_down(files: [BASE_COMPOSE]) - end - - desc 'Run unit tests with Virtuoso backend (docker deps, Linux container)' - task 'vo:linux' do - files = [BASE_COMPOSE, linux_override_for(:vo)] - begin - run_linux_tests(:vo) - ensure - compose_down(files: files) - end - end - - desc 'Run unit tests with GraphDB backend (docker deps, host Ruby)' - task :gd do - run_host_tests(:gd) - ensure - Rake::Task['test'].reenable - compose_down(files: [BASE_COMPOSE]) - end - - desc 'Run unit tests with GraphDB backend (docker deps, Linux container)' - task 'gd:linux' do - files = [BASE_COMPOSE, linux_override_for(:gd)] - begin - run_linux_tests(:gd) - ensure - compose_down(files: files) - end - end - - desc 'Start a shell in the Linux test container (default backend: fs)' - task :shell, [:backend] do |_t, args| - key = (args[:backend] || DEFAULT_BACKEND).to_sym - cfg!(key) - files = [BASE_COMPOSE, linux_override_for(key), LINUX_NO_PORTS_OVERRIDE] - begin - run_linux_shell(key) - ensure - compose_down(files: files) - end - end - - desc 'Start backend services for development (default backend: fs)' - task :up, [:backend] do |_t, args| - key = (args[:backend] || DEFAULT_BACKEND).to_sym - cfg!(key) - compose_up(key, files: [BASE_COMPOSE]) - end - - desc 'Stop backend services for development (default backend: fs)' - task :down, [:backend] do |_t, args| - compose_down(files: [BASE_COMPOSE]) - end - end -end diff --git a/rakelib/ontoportal_testkit.rake b/rakelib/ontoportal_testkit.rake new file mode 100644 index 00000000..7f04305c --- /dev/null +++ b/rakelib/ontoportal_testkit.rake @@ -0,0 +1,2 @@ +# Loads shared OntoPortal testkit rake tasks into this component. +require "ontoportal/testkit/tasks" diff --git a/test/fixtures/backends/graphdb/graphdb-repo-config.ttl b/test/fixtures/backends/graphdb/graphdb-repo-config.ttl deleted file mode 100644 index 84032a0b..00000000 --- a/test/fixtures/backends/graphdb/graphdb-repo-config.ttl +++ /dev/null @@ -1,33 +0,0 @@ -@prefix rdfs: . -@prefix rep: . -@prefix sail: . -@prefix xsd: . - -<#ontoportal_test> a rep:Repository; - rep:repositoryID "ontoportal_test"; - rep:repositoryImpl [ - rep:repositoryType "graphdb:SailRepository"; - [ - "http://example.org/owlim#"; - "false"; - ""; - "true"; - "false"; - "true"; - "true"; - "32"; - "10000000"; - ""; - "true"; - ""; - "0"; - "0"; - "false"; - "file-repository"; - "rdfsplus-optimized"; - "storage"; - "false"; - sail:sailType "owlim:Sail" - ] - ]; - rdfs:label "" . diff --git a/test/fixtures/backends/graphdb/graphdb-test-load.nt b/test/fixtures/backends/graphdb/graphdb-test-load.nt deleted file mode 100644 index e69de29b..00000000 diff --git a/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql b/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql deleted file mode 100644 index d509c6fb..00000000 --- a/test/fixtures/backends/virtuoso_initdb_d/virtuoso-grant-write-sparql-access.sql +++ /dev/null @@ -1,3 +0,0 @@ -GRANT EXECUTE ON DB.DBA.SPARQL_INSERT_DICT_CONTENT TO "SPARQL"; -GRANT SPARQL_UPDATE TO "SPARQL"; -DB.DBA.RDF_DEFAULT_USER_PERMS_SET ('nobody', 7); diff --git a/test/test_case.rb b/test/test_case.rb index 22fd77bc..a8eda0ab 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -20,7 +20,7 @@ MiniTest::Unit.autorun require_relative "../lib/goo.rb" -require_relative '../config/config' +require_relative '../config/config.test' # Safety guard for destructive tests: ensure test targets are safe (localhost or -ut suffix) module TestSafety From a9cfd6df638bd48be1fb3d3d337dd6c192cda12d Mon Sep 17 00:00:00 2001 From: = Date: Wed, 4 Mar 2026 11:23:59 -0800 Subject: [PATCH 100/106] Gemfile.lock update --- .gitignore | 1 + Gemfile.lock | 113 +++++++++++++++++++++++++++++---------------------- 2 files changed, 66 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 3f0f3286..f828b66e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.tags* .bundle .config +.serena coverage InstalledFiles lib/bundler/man diff --git a/Gemfile.lock b/Gemfile.lock index 2817e9ba..0f389531 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -41,89 +41,103 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) - base64 (0.2.0) + addressable (2.8.9) + public_suffix (>= 2.0.2, < 8.0) + base64 (0.3.0) bcp47_spec (0.2.1) - bigdecimal (3.1.9) + bigdecimal (3.3.1) builder (3.3.0) coderay (1.1.3) - concurrent-ruby (1.3.5) - connection_pool (2.5.0) + concurrent-ruby (1.3.6) + connection_pool (3.0.2) daemons (1.4.1) - date (3.4.1) + date (3.5.1) docile (1.4.1) domain_name (0.6.20240107) eventmachine (1.2.7) - faraday (2.12.2) + faraday (2.14.1) faraday-net_http (>= 2.0, < 3.5) json logger - faraday-net_http (3.4.0) - net-http (>= 0.5.0) - ffi (1.17.1-arm64-darwin) - ffi (1.17.1-x86_64-linux-gnu) - htmlentities (4.3.4) + faraday-net_http (3.4.2) + net-http (~> 0.5) + ffi (1.17.3-arm64-darwin) + ffi (1.17.3-x86_64-linux-gnu) + htmlentities (4.4.2) http-accept (1.7.0) - http-cookie (1.0.8) + http-cookie (1.1.0) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.9.1) + io-console (0.8.2) + json (2.18.1) link_header (0.0.8) - logger (1.6.5) + logger (1.7.0) macaddr (1.7.2) systemu (~> 2.6.5) method_source (1.1.0) - mime-types (3.6.0) + mime-types (3.7.0) logger - mime-types-data (~> 3.2015) - mime-types-data (3.2025.0204) + mime-types-data (~> 3.2025, >= 3.2025.0507) + mime-types-data (3.2026.0303) minitest (4.7.5) - multi_json (1.15.0) - mustermann (3.0.3) + multi_json (1.19.1) + mustermann (3.0.4) ruby2_keywords (~> 0.0.1) - net-ftp (0.3.8) + net-ftp (0.3.9) net-protocol time - net-http (0.6.0) - uri - net-http-persistent (4.0.6) - connection_pool (~> 2.2, >= 2.2.4) + net-http (0.9.1) + uri (>= 0.11.1) + net-http-persistent (4.0.8) + connection_pool (>= 2.2.4, < 4) net-protocol (0.2.2) timeout netrc (0.11.0) - pry (0.15.2) + ostruct (0.6.3) + pry (0.16.0) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (6.0.1) - rack (2.2.10) + reline (>= 0.6.0) + public_suffix (7.0.5) + rack (3.2.5) rack-accept (0.4.5) rack (>= 0.4) rack-post-body-to-params (0.1.8) activesupport (>= 2.3) - rack-protection (3.2.0) + rack-protection (4.2.1) base64 (>= 0.1.0) - rack (~> 2.2, >= 2.2.4) - rake (13.2.1) - rdf (3.3.2) + logger (>= 1.6.0) + rack (>= 3.0.0, < 4) + rack-session (2.1.1) + base64 (>= 0.1.0) + rack (>= 3.0.0) + rake (13.3.1) + rdf (3.3.4) bcp47_spec (~> 0.2) bigdecimal (~> 3.1, >= 3.1.5) link_header (~> 0.0, >= 0.0.8) + logger (~> 1.5) + ostruct (~> 0.6) + readline (~> 0.0) rdf-rdfxml (3.3.0) builder (~> 3.2, >= 3.2.4) htmlentities (~> 4.3) rdf (~> 3.3) rdf-xsd (~> 3.3) - rdf-vocab (3.3.2) + rdf-vocab (3.3.3) rdf (~> 3.3) rdf-xsd (3.3.0) rdf (~> 3.3) rexml (~> 3.2) - redis (5.3.0) + readline (0.0.4) + reline + redis (5.4.1) redis-client (>= 0.22.0) - redis-client (0.23.2) + redis-client (0.26.4) connection_pool + reline (0.6.3) + io-console (~> 0.5) request_store (1.7.0) rack (>= 1.4) rest-client (2.1.0) @@ -131,7 +145,7 @@ GEM http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rexml (3.4.0) + rexml (3.4.4) rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) @@ -140,28 +154,31 @@ GEM docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) - simplecov-cobertura (2.1.0) + simplecov-cobertura (3.1.0) rexml simplecov (~> 0.19) - simplecov-html (0.13.1) + simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) - sinatra (3.2.0) + sinatra (4.2.1) + logger (>= 1.6.0) mustermann (~> 3.0) - rack (~> 2.2, >= 2.2.4) - rack-protection (= 3.2.0) + rack (>= 3.0.0, < 4) + rack-protection (= 4.2.1) + rack-session (>= 2.0.0, < 3) tilt (~> 2.0) systemu (2.6.5) - thin (1.8.2) + thin (2.0.1) daemons (~> 1.0, >= 1.0.9) eventmachine (~> 1.0, >= 1.0.4) - rack (>= 1, < 3) + logger + rack (>= 1, < 4) thread_safe (0.3.6) - tilt (2.6.0) - time (0.4.1) + tilt (2.7.0) + time (0.4.2) date - timeout (0.4.3) + timeout (0.6.0) tzinfo (0.3.62) - uri (1.0.2) + uri (1.1.1) uuid (2.3.9) macaddr (~> 1.0) From 93c8ade0f7ef56fa4ad4fbdee0f9c6c4553be287 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Thu, 5 Mar 2026 14:08:15 -0800 Subject: [PATCH 101/106] improved submit_search_query to provide better error reporting --- lib/goo/search/solr/solr_query.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/goo/search/solr/solr_query.rb b/lib/goo/search/solr/solr_query.rb index d9e342fc..c18a2cfb 100644 --- a/lib/goo/search/solr/solr_query.rb +++ b/lib/goo/search/solr/solr_query.rb @@ -85,23 +85,25 @@ def search(query, params = {}) def submit_search_query(query, params = {}) uri = ::URI.parse("#{collection_url}/select") - http = Net::HTTP.new(uri.host, uri.port) request = Net::HTTP::Post.new(uri.request_uri) + params = params.dup params[:q] = query + params[:wt] ||= "json" + + request["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" + request["X-Requested-With"] = "XMLHttpRequest" # helps when Solr CSRF filter is enabled request.set_form_data(params) response = http.request(request) - if response.is_a?(Net::HTTPSuccess) - JSON.parse(response.body) - else - puts "Error: #{response.code} - #{response.message}" - nil + unless response.is_a?(Net::HTTPSuccess) + raise "Solr query failed (HTTP #{response.code} #{response.message}): #{response.body}" end - end + JSON.parse(response.body) + end end end From 3ef0104cd9681c6d6fa1759ae736eee4ffb0ba72 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Mon, 9 Mar 2026 13:11:45 -0700 Subject: [PATCH 102/106] changed Solr query from GET to POST to address URL-too-long errors --- lib/goo/search/solr/solr_query.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/goo/search/solr/solr_query.rb b/lib/goo/search/solr/solr_query.rb index c18a2cfb..b11dac3e 100644 --- a/lib/goo/search/solr/solr_query.rb +++ b/lib/goo/search/solr/solr_query.rb @@ -79,8 +79,9 @@ def delete_by_query(query) end def search(query, params = {}) - params[:q] = query - @solr.get('select', params: params) + body = params.dup + body[:q] = query + @solr.post('select', data: body) end def submit_search_query(query, params = {}) @@ -107,4 +108,3 @@ def submit_search_query(query, params = {}) end end - From f401adcae742b96a8f702314fdab1c400d714703 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Tue, 10 Mar 2026 15:29:48 -0700 Subject: [PATCH 103/106] changed resource_id in solr schema to 'string', NOT 'string_ci' --- lib/goo/search/solr/solr_schema_generator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/goo/search/solr/solr_schema_generator.rb b/lib/goo/search/solr/solr_schema_generator.rb index fda215e6..c36e4f50 100644 --- a/lib/goo/search/solr/solr_schema_generator.rb +++ b/lib/goo/search/solr/solr_schema_generator.rb @@ -288,7 +288,7 @@ def init_fields_types def init_fields [ #{ name: "_version_", type: "plong", indexed: true, stored: true, multiValued: false }, - { name: "resource_id", type: "string_ci", indexed: true, multiValued: false, required: true, stored: true }, + { name: "resource_id", type: "string", indexed: true, multiValued: false, required: true, stored: true }, { name: "resource_model", type: "string", indexed: true, multiValued: false, required: true, stored: false }, { name: "_text_", type: "text_general", indexed: true, multiValued: true, stored: false }, ] From 6600a67325232be2d702f44708e6b2aa681e7acf Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Mar 2026 12:29:16 -0700 Subject: [PATCH 104/106] ontoportal testkit update --- .github/workflows/testkit-unit-tests.yml | 25 ++- .ruby-version | 2 +- Gemfile.lock | 111 +---------- dev/compose/linux/ag.yml | 16 -- dev/compose/linux/fs.yml | 13 -- dev/compose/linux/gd.yml | 19 -- dev/compose/linux/no-ports.yml | 13 -- dev/compose/linux/vo.yml | 16 -- rakelib/ontoportal_testkit.rake | 1 - test/data/virtuoso.init | 240 ----------------------- 10 files changed, 22 insertions(+), 434 deletions(-) delete mode 100644 dev/compose/linux/ag.yml delete mode 100644 dev/compose/linux/fs.yml delete mode 100644 dev/compose/linux/gd.yml delete mode 100644 dev/compose/linux/no-ports.yml delete mode 100644 dev/compose/linux/vo.yml delete mode 100644 test/data/virtuoso.init diff --git a/.github/workflows/testkit-unit-tests.yml b/.github/workflows/testkit-unit-tests.yml index b23760fc..358194a4 100644 --- a/.github/workflows/testkit-unit-tests.yml +++ b/.github/workflows/testkit-unit-tests.yml @@ -12,9 +12,9 @@ env: # CI execution mode for backend tests: # - container: run `test:docker::container` (default) # - native: run `test:docker:` on host Ruby - # OPTK_CI_RUN_MODE: ${{ vars.OPTK_CI_RUN_MODE || 'container' }} + OPTK_CI_RUN_MODE: ${{ vars.OPTK_CI_RUN_MODE || 'container' }} # Example override to force native mode in this workflow file: - OPTK_CI_RUN_MODE: native + # OPTK_CI_RUN_MODE: native jobs: prepare: @@ -42,22 +42,29 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install native system dependencies - if: env.OPTK_CI_RUN_MODE == 'native' - run: | - sudo apt-get update - sudo apt-get install -y raptor2-utils - - name: Set up Ruby from .ruby-version uses: ruby/setup-ruby@v1 with: ruby-version: .ruby-version bundler-cache: true + - name: Set up Java 11 (native mode) + if: env.OPTK_CI_RUN_MODE == 'native' + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: '11' + + - name: Install native system dependencies + if: env.OPTK_CI_RUN_MODE == 'native' + run: | + sudo apt-get update + sudo apt-get install -y raptor2-utils + - name: Run unit tests env: CI: "true" - TESTOPTS: "-v" + TESTOPTS: "--verbose" BACKEND: ${{ matrix.backend }} run: | MODE="${OPTK_CI_RUN_MODE:-container}" diff --git a/.ruby-version b/.ruby-version index e650c01d..f15386a5 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -3.2.9 +3.2.10 diff --git a/Gemfile.lock b/Gemfile.lock index 4fa76735..d718a797 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/alexskr/ontoportal_testkit.git - revision: 741b25d35ed3e3bdbdcbc55f4672a5258215ecdb + revision: b930b550bc8c98ca566c5f71b4f91f6703721240 branch: main specs: ontoportal_testkit (0.1.0) @@ -71,15 +71,9 @@ GEM net-http (~> 0.5) ffi (1.17.3) ffi (1.17.3-aarch64-linux-gnu) - ffi (1.17.3-aarch64-linux-musl) - ffi (1.17.3-arm-linux-gnu) - ffi (1.17.3-arm-linux-musl) ffi (1.17.3-arm64-darwin) - ffi (1.17.3-x86-linux-gnu) - ffi (1.17.3-x86-linux-musl) ffi (1.17.3-x86_64-darwin) ffi (1.17.3-x86_64-linux-gnu) - ffi (1.17.3-x86_64-linux-musl) htmlentities (4.4.2) http-accept (1.7.0) http-cookie (1.1.0) @@ -87,7 +81,7 @@ GEM i18n (0.9.5) concurrent-ruby (~> 1.0) io-console (0.8.2) - json (2.18.1) + json (2.19.1) link_header (0.0.8) logger (1.7.0) macaddr (1.7.2) @@ -151,7 +145,7 @@ GEM reline redis (5.4.1) redis-client (>= 0.22.0) - redis-client (0.26.4) + redis-client (0.27.0) connection_pool reline (0.6.3) io-console (~> 0.5) @@ -193,7 +187,7 @@ GEM tilt (2.7.0) time (0.4.2) date - timeout (0.6.0) + timeout (0.6.1) tzinfo (0.3.62) uri (1.1.1) uuid (2.3.9) @@ -201,16 +195,10 @@ GEM PLATFORMS aarch64-linux-gnu - aarch64-linux-musl - arm-linux-gnu - arm-linux-musl arm64-darwin ruby - x86-linux-gnu - x86-linux-musl x86_64-darwin x86_64-linux-gnu - x86_64-linux-musl DEPENDENCIES activesupport @@ -231,94 +219,5 @@ DEPENDENCIES thin uuid -CHECKSUMS - activesupport (4.0.13) sha256=0fcd111ced80b99339371a869dd187996349ab8ab5dfc08ee63730bdf66276d8 - addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485 - base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b - bcp47_spec (0.2.1) sha256=3fd62edf96c126bd9624e4319ac74082a966081859d1ee0ef3c3041640a37810 - bigdecimal (3.3.1) sha256=eaa01e228be54c4f9f53bf3cc34fe3d5e845c31963e7fcc5bedb05a4e7d52218 - builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f - coderay (1.1.3) sha256=dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b - concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab - connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a - daemons (1.4.1) sha256=8fc76d76faec669feb5e455d72f35bd4c46dc6735e28c420afb822fac1fa9a1d - date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0 - docile (1.4.1) sha256=96159be799bfa73cdb721b840e9802126e4e03dfc26863db73647204c727f21e - domain_name (0.6.20240107) sha256=5f693b2215708476517479bf2b3802e49068ad82167bcd2286f899536a17d933 - eventmachine (1.2.7) sha256=994016e42aa041477ba9cff45cbe50de2047f25dd418eba003e84f0d16560972 - faraday (2.14.1) sha256=a43cceedc1e39d188f4d2cdd360a8aaa6a11da0c407052e426ba8d3fb42ef61c - faraday-net_http (3.4.2) sha256=f147758260d3526939bf57ecf911682f94926a3666502e24c69992765875906c - ffi (1.17.3) sha256=0e9f39f7bb3934f77ad6feab49662be77e87eedcdeb2a3f5c0234c2938563d4c - ffi (1.17.3-aarch64-linux-gnu) sha256=28ad573df26560f0aedd8a90c3371279a0b2bd0b4e834b16a2baa10bd7a97068 - ffi (1.17.3-aarch64-linux-musl) sha256=020b33b76775b1abacc3b7d86b287cef3251f66d747092deec592c7f5df764b2 - ffi (1.17.3-arm-linux-gnu) sha256=5bd4cea83b68b5ec0037f99c57d5ce2dd5aa438f35decc5ef68a7d085c785668 - ffi (1.17.3-arm-linux-musl) sha256=0d7626bb96265f9af78afa33e267d71cfef9d9a8eb8f5525344f8da6c7d76053 - ffi (1.17.3-arm64-darwin) sha256=0c690555d4cee17a7f07c04d59df39b2fba74ec440b19da1f685c6579bb0717f - ffi (1.17.3-x86-linux-gnu) sha256=868a88fcaf5186c3a46b7c7c2b2c34550e1e61a405670ab23f5b6c9971529089 - ffi (1.17.3-x86-linux-musl) sha256=f0286aa6ef40605cf586e61406c446de34397b85dbb08cc99fdaddaef8343945 - ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5 - ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f - ffi (1.17.3-x86_64-linux-musl) sha256=086b221c3a68320b7564066f46fed23449a44f7a1935f1fe5a245bd89d9aea56 - goo (0.0.2) - htmlentities (4.4.2) sha256=bbafbdf69f2eca9262be4efef7e43e6a1de54c95eb600f26984f71d2fe96c5c3 - http-accept (1.7.0) sha256=c626860682bfbb3b46462f8c39cd470fd7b0584f61b3cc9df5b2e9eb9972a126 - http-cookie (1.1.0) sha256=38a5e60d1527eebc396831b8c4b9455440509881219273a6c99943d29eadbb19 - i18n (0.9.5) sha256=43a58b55056ef171cae9b35df8aa5dee22d3a782f8a9bdd0ec8e8d36cfdf180d - io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc - json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986 - link_header (0.0.8) sha256=15c65ce43b29f739b30d05e5f25c22c23797e89cf6f905dbb595fb4c70cb55f9 - logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203 - macaddr (1.7.2) sha256=da377809968bbc1160bf02a999e916bb3255000007291d9d1a49a93ceedadf82 - method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5 - mime-types (3.7.0) sha256=dcebf61c246f08e15a4de34e386ebe8233791e868564a470c3fe77c00eed5e56 - mime-types-data (3.2026.0303) sha256=164af1de5824c5195d4b503b0a62062383b65c08671c792412450cd22d3bc224 - minitest (4.7.5) sha256=3e0ac720a6d0787b4c822514739319493e187400e993fba96397bd64d58ae60e - multi_json (1.19.1) sha256=7aefeff8f2c854bf739931a238e4aea64592845e0c0395c8a7d2eea7fdd631b7 - mustermann (3.0.4) sha256=85fadcb6b3c6493a8b511b42426f904b7f27b282835502233dd154daab13aa22 - net-ftp (0.3.9) sha256=307817ccf7f428f79d083f7e36dbb46a9d1d375e0d23027824de1866f0b13b65 - net-http (0.9.1) sha256=25ba0b67c63e89df626ed8fac771d0ad24ad151a858af2cc8e6a716ca4336996 - net-http-persistent (4.0.8) sha256=ef3de8319d691537b329053fae3a33195f8b070bbbfae8bf1a58c796081960e6 - net-protocol (0.2.2) sha256=aa73e0cba6a125369de9837b8d8ef82a61849360eba0521900e2c3713aa162a8 - netrc (0.11.0) sha256=de1ce33da8c99ab1d97871726cba75151113f117146becbe45aa85cb3dabee3f - ontoportal_testkit (0.1.0) - ostruct (0.6.3) sha256=95a2ed4a4bd1d190784e666b47b2d3f078e4a9efda2fccf18f84ddc6538ed912 - pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e - public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623 - rack (3.2.5) sha256=4cbd0974c0b79f7a139b4812004a62e4c60b145cba76422e288ee670601ed6d3 - rack-accept (0.4.5) sha256=66247b5449db64ebb93ae2ec4af4764b87d1ae8a7463c7c68893ac13fa8d4da2 - rack-post-body-to-params (0.1.8) sha256=d6007f48bee744caa6e755f46526fa57036b95374fa37c9921c3213f0b91dc38 - rack-protection (4.2.1) sha256=cf6e2842df8c55f5e4d1a4be015e603e19e9bc3a7178bae58949ccbb58558bac - rack-session (2.1.1) sha256=0b6dc07dea7e4b583f58a48e8b806d4c9f1c6c9214ebc202ec94562cbea2e4e9 - rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c - rdf (3.3.4) sha256=a77fa0821e5b4e2bea9fdbb7c9d980564c89d27e81979690ce5c9e6bc80859c1 - rdf-raptor (3.3.0) - rdf-rdfxml (3.3.0) sha256=11647f6111b97b6a9b82413bd9810d4bb5524aa7dd06b3c1330bf58ec3aa6a9a - rdf-vocab (3.3.3) sha256=d3b642edb37be7b37b73cafa9e01d55762f99292838e7b0868a3575bd297bf8b - rdf-xsd (3.3.0) sha256=fab51d27b20344237d9b622ef32e83e4c44940840bfc76a245ce6b6abba44772 - readline (0.0.4) sha256=6138eef17be2b98298b672c3ea63bf9cb5158d401324f26e1e84f235879c1d6a - redis (5.4.1) sha256=b5e675b57ad22b15c9bcc765d5ac26f60b675408af916d31527af9bd5a81faae - redis-client (0.26.4) sha256=3ad70beff5da2653e02dfeae996e7d8d7147a558da12b16b2282ad345e4c7120 - reline (0.6.3) sha256=1198b04973565b36ec0f11542ab3f5cfeeec34823f4e54cebde90968092b1835 - request_store (1.7.0) sha256=e1b75d5346a315f452242a68c937ef8e48b215b9453a77a6c0acdca2934c88cb - rest-client (2.1.0) sha256=35a6400bdb14fae28596618e312776c158f7ebbb0ccad752ff4fa142bf2747e3 - rexml (3.4.4) sha256=19e0a2c3425dfbf2d4fc1189747bdb2f849b6c5e74180401b15734bc97b5d142 - rsolr (2.6.0) sha256=4b3bcea772cac300562775c20eeddedf63a6b7516a070cb6fbde000b09cfe12b - ruby2_keywords (0.0.5) sha256=ffd13740c573b7301cf7a2e61fc857b2a8e3d3aff32545d6f8300d8bae10e3ef - simplecov (0.22.0) sha256=fe2622c7834ff23b98066bb0a854284b2729a569ac659f82621fc22ef36213a5 - simplecov-cobertura (3.1.0) sha256=6d7f38aa32c965ca2174b2e5bd88cb17138eaf629518854976ac50e628925dc5 - simplecov-html (0.13.2) sha256=bd0b8e54e7c2d7685927e8d6286466359b6f16b18cb0df47b508e8d73c777246 - simplecov_json_formatter (0.1.4) sha256=529418fbe8de1713ac2b2d612aa3daa56d316975d307244399fa4838c601b428 - sinatra (4.2.1) sha256=b7aeb9b11d046b552972ade834f1f9be98b185fa8444480688e3627625377080 - sparql-client (3.2.2) - systemu (2.6.5) sha256=01f7d014b1453b28e5781e15c4d7d63fc9221c29b174b7aae5253207a75ab33e - thin (2.0.1) sha256=5bbde5648377f5c3864b5da7cd89a23b5c2d8d8bb9435719f6db49644bcdade9 - thread_safe (0.3.6) sha256=9ed7072821b51c57e8d6b7011a8e282e25aeea3a4065eab326e43f66f063b05a - tilt (2.7.0) sha256=0d5b9ba69f6a36490c64b0eee9f6e9aad517e20dcc848800a06eb116f08c6ab3 - time (0.4.2) sha256=f324e498c3bde9471d45a7d18f874c27980e9867aa5cfca61bebf52262bc3dab - timeout (0.6.0) sha256=6d722ad619f96ee383a0c557ec6eb8c4ecb08af3af62098a0be5057bf00de1af - tzinfo (0.3.62) sha256=ea69564cb85d8318f89efced5ed7d117e64fa54dba1abce24d38c6c5dd3472a1 - uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6 - uuid (2.3.9) sha256=aec0cf592053cd6e07c13c1ef94c440aba705f22eb1ee767b39631f2760124d7 - BUNDLED WITH - 4.0.7 + 4.0.8 diff --git a/dev/compose/linux/ag.yml b/dev/compose/linux/ag.yml deleted file mode 100644 index b56d9c8e..00000000 --- a/dev/compose/linux/ag.yml +++ /dev/null @@ -1,16 +0,0 @@ -services: - test-linux: - environment: - GOO_BACKEND_NAME: allegrograph - GOO_PORT: 10035 - GOO_HOST: agraph-ut - GOO_PATH_QUERY: /repositories/ontoportal_test - GOO_PATH_DATA: /repositories/ontoportal_test/statements - GOO_PATH_UPDATE: /repositories/ontoportal_test/statements - depends_on: - solr-ut: - condition: service_healthy - redis-ut: - condition: service_healthy - agraph-ut: - condition: service_healthy diff --git a/dev/compose/linux/fs.yml b/dev/compose/linux/fs.yml deleted file mode 100644 index 27acf4b3..00000000 --- a/dev/compose/linux/fs.yml +++ /dev/null @@ -1,13 +0,0 @@ -services: - test-linux: - environment: - GOO_BACKEND_NAME: '4store' - GOO_HOST: 4store-ut - GOO_PORT: 9000 - depends_on: - solr-ut: - condition: service_healthy - redis-ut: - condition: service_healthy - 4store-ut: - condition: service_healthy diff --git a/dev/compose/linux/gd.yml b/dev/compose/linux/gd.yml deleted file mode 100644 index f83a32f6..00000000 --- a/dev/compose/linux/gd.yml +++ /dev/null @@ -1,19 +0,0 @@ -services: - test-linux: - environment: - GOO_BACKEND_NAME: graphdb - GOO_PORT: 7200 - GOO_HOST: graphdb-ut - GOO_PATH_QUERY: /repositories/ontoportal_test - GOO_PATH_DATA: /repositories/ontoportal_test/statements - GOO_PATH_UPDATE: /repositories/ontoportal_test/statements - depends_on: - solr-ut: - condition: service_healthy - redis-ut: - condition: service_healthy - graphdb-ut: - condition: service_healthy - - graphdb-ut: - ports: diff --git a/dev/compose/linux/no-ports.yml b/dev/compose/linux/no-ports.yml deleted file mode 100644 index f42191b9..00000000 --- a/dev/compose/linux/no-ports.yml +++ /dev/null @@ -1,13 +0,0 @@ -services: - redis-ut: - ports: [] - solr-ut: - ports: [] - agraph-ut: - ports: [] - 4store-ut: - ports: [] - virtuoso-ut: - ports: [] - graphdb-ut: - ports: [] diff --git a/dev/compose/linux/vo.yml b/dev/compose/linux/vo.yml deleted file mode 100644 index c47dd654..00000000 --- a/dev/compose/linux/vo.yml +++ /dev/null @@ -1,16 +0,0 @@ -services: - test-linux: - environment: - GOO_BACKEND_NAME: 'virtuoso' - GOO_HOST: virtuoso-ut - GOO_PORT: 8890 - GOO_PATH_QUERY: /sparql - GOO_PATH_DATA: /sparql - GOO_PATH_UPDATE: /sparql - depends_on: - solr-ut: - condition: service_healthy - redis-ut: - condition: service_healthy - virtuoso-ut: - condition: service_healthy diff --git a/rakelib/ontoportal_testkit.rake b/rakelib/ontoportal_testkit.rake index 7f04305c..39a3ec73 100644 --- a/rakelib/ontoportal_testkit.rake +++ b/rakelib/ontoportal_testkit.rake @@ -1,2 +1 @@ -# Loads shared OntoPortal testkit rake tasks into this component. require "ontoportal/testkit/tasks" diff --git a/test/data/virtuoso.init b/test/data/virtuoso.init deleted file mode 100644 index e5f4bd85..00000000 --- a/test/data/virtuoso.init +++ /dev/null @@ -1,240 +0,0 @@ - ; - ; virtuoso.ini - ; - ; Configuration file for the OpenLink Virtuoso VDBMS Server - ; - ; To learn more about this product, or any other product in our - ; portfolio, please check out our web site at: - ; - ; http://virtuoso.openlinksw.com/ - ; - ; or contact us at: - ; - ; general.information@openlinksw.com - ; - ; If you have any technical questions, please contact our support - ; staff at: - ; - ; technical.support@openlinksw.com - ; - ; - ; Database setup - ; - [Database] - DatabaseFile = ../database/virtuoso.db - ErrorLogFile = ../database/virtuoso.log - LockFile = ../database/virtuoso.lck - TransactionFile = ../database/virtuoso.trx - xa_persistent_file = ../database/virtuoso.pxa - ErrorLogLevel = 7 - FileExtend = 200 - MaxCheckpointRemap = 2000 - Striping = 0 - TempStorage = TempDatabase - - [TempDatabase] - DatabaseFile = ../database/virtuoso-temp.db - TransactionFile = ../database/virtuoso-temp.trx - MaxCheckpointRemap = 2000 - Striping = 0 - - ; - ; Server parameters - ; - [Parameters] - ServerPort = 1111 - LiteMode = 0 - DisableUnixSocket = 1 - DisableTcpSocket = 0 - ;SSLServerPort = 2111 - ;SSLCertificate = cert.pem - ;SSLPrivateKey = pk.pem - ;X509ClientVerify = 0 - ;X509ClientVerifyDepth = 0 - ;X509ClientVerifyCAFile = ca.pem - MaxClientConnections = 10 - CheckpointInterval = 60 - O_DIRECT = 0 - CaseMode = 2 - MaxStaticCursorRows = 5000 - CheckpointAuditTrail = 0 - AllowOSCalls = 0 - SchedulerInterval = 10 - DirsAllowed = ., ../vad, /usr/share/proj - ThreadCleanupInterval = 0 - ThreadThreshold = 10 - ResourcesCleanupInterval = 0 - FreeTextBatchSize = 100000 - SingleCPU = 0 - VADInstallDir = ../vad/ - PrefixResultNames = 0 - RdfFreeTextRulesSize = 100 - IndexTreeMaps = 64 - MaxMemPoolSize = 200000000 - PrefixResultNames = 0 - MacSpotlight = 0 - MaxQueryMem = 2G ; memory allocated to query processor - VectorSize = 1000 ; initial parallel query vector (array of query operations) size - MaxVectorSize = 1000000 ; query vector size threshold. - AdjustVectorSize = 0 - ThreadsPerQuery = 4 - AsyncQueueMaxThreads = 10 - ;; - ;; When running with large data sets, one should configure the Virtuoso - ;; process to use between 2/3 to 3/5 of free system memory and to stripe - ;; storage on all available disks. - ;; - ;; Uncomment next two lines if there is 2 GB system memory free - ;NumberOfBuffers = 170000 - ;MaxDirtyBuffers = 130000 - ;; Uncomment next two lines if there is 4 GB system memory free - ;NumberOfBuffers = 340000 - ; MaxDirtyBuffers = 250000 - ;; Uncomment next two lines if there is 8 GB system memory free - ;NumberOfBuffers = 680000 - ;MaxDirtyBuffers = 500000 - ;; Uncomment next two lines if there is 16 GB system memory free - ;NumberOfBuffers = 1360000 - ;MaxDirtyBuffers = 1000000 - ;; Uncomment next two lines if there is 32 GB system memory free - ;NumberOfBuffers = 2720000 - ;MaxDirtyBuffers = 2000000 - ;; Uncomment next two lines if there is 48 GB system memory free - ;NumberOfBuffers = 4000000 - ;MaxDirtyBuffers = 3000000 - ;; Uncomment next two lines if there is 64 GB system memory free - ;NumberOfBuffers = 5450000 - ;MaxDirtyBuffers = 4000000 - ;; - ;; Note the default settings will take very little memory - ;; but will not result in very good performance - ;; - NumberOfBuffers = 10000 - MaxDirtyBuffers = 6000 - - [HTTPServer] - ServerPort = 8890 - ServerRoot = ../vsp - MaxClientConnections = 10 - DavRoot = DAV - EnabledDavVSP = 0 - HTTPProxyEnabled = 0 - TempASPXDir = 0 - DefaultMailServer = localhost:25 - MaxKeepAlives = 10 - KeepAliveTimeout = 10 - MaxCachedProxyConnections = 10 - ProxyConnectionCacheTimeout = 15 - HTTPThreadSize = 280000 - HttpPrintWarningsInOutput = 0 - Charset = UTF-8 - ;HTTPLogFile = logs/http.log - MaintenancePage = atomic.html - EnabledGzipContent = 1 - - [AutoRepair] - BadParentLinks = 0 - - [Client] - SQL_PREFETCH_ROWS = 100 - SQL_PREFETCH_BYTES = 16000 - SQL_QUERY_TIMEOUT = 0 - SQL_TXN_TIMEOUT = 0 - ;SQL_NO_CHAR_C_ESCAPE = 1 - ;SQL_UTF8_EXECS = 0 - ;SQL_NO_SYSTEM_TABLES = 0 - ;SQL_BINARY_TIMESTAMP = 1 - ;SQL_ENCRYPTION_ON_PASSWORD = -1 - - [VDB] - ArrayOptimization = 0 - NumArrayParameters = 10 - VDBDisconnectTimeout = 1000 - KeepConnectionOnFixedThread = 0 - - [Replication] - ServerName = db-BIONIC-PORT - ServerEnable = 1 - QueueMax = 50000 - - ; - ; Striping setup - ; - ; These parameters have only effect when Striping is set to 1 in the - ; [Database] section, in which case the DatabaseFile parameter is ignored. - ; - ; With striping, the database is spawned across multiple segments - ; where each segment can have multiple stripes. - ; - ; Format of the lines below: - ; Segment = , [, .. ] - ; - ; must be ordered from 1 up. - ; - ; The is the total size of the segment which is equally divided - ; across all stripes forming the segment. Its specification can be in - ; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks - ; (b, the default) - ; - ; Note that the segment size must be a multiple of the database page size - ; which is currently 8k. Also, the segment size must be divisible by the - ; number of stripe files forming the segment. - ; - ; The example below creates a 200 meg database striped on two segments - ; with two stripes of 50 meg and one of 100 meg. - ; - ; You can always add more segments to the configuration, but once - ; added, do not change the setup. - ; - [Striping] - Segment1 = 100M, db-seg1-1.db, db-seg1-2.db - Segment2 = 100M, db-seg2-1.db - ;... - ;[TempStriping] - ;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db - ;Segment2 = 100M, db-seg2-1.db - ;... - ;[Ucms] - ;UcmPath = - ;Ucm1 = - ;Ucm2 = - ;... - - [Zero Config] - ServerName = virtuoso (BIONIC-PORT) - ;ServerDSN = ZDSN - ;SSLServerName = - ;SSLServerDSN = - - [Mono] - ;MONO_TRACE = Off - ;MONO_PATH = - ;MONO_ROOT = - ;MONO_CFG_DIR = - ;virtclr.dll = - - [URIQA] - DynamicLocal = 0 - DefaultHost = localhost:8890 - - [SPARQL] - ;ExternalQuerySource = 1 - ;ExternalXsltSource = 1 - ;DefaultGraph = http://localhost:8890/dataspace - ;ImmutableGraphs = http://localhost:8890/dataspace - ResultSetMaxRows = 10000 - MaxConstructTriples = 10000 - MaxQueryCostEstimationTime = 400 ; in seconds - MaxQueryExecutionTime = 60 ; in seconds - DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100 - DeferInferenceRulesInit = 0 ; controls inference rules loading - MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited) - ;LabelInferenceName = facets ; Only needed when using the Faceted Browser - ;PingService = http://rpc.pingthesemanticweb.com/ - - [Plugins] - LoadPath = ../hosting - Load1 = plain, geos - Load2 = plain, graphql - Load3 = plain, proj4 - Load4 = plain, shapefileio \ No newline at end of file From 414168e0d3559d5e78aae9d3adba440bf21c1ce1 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Mar 2026 12:42:16 -0700 Subject: [PATCH 105/106] ontoportal testkit update --- .github/workflows/testkit-unit-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testkit-unit-tests.yml b/.github/workflows/testkit-unit-tests.yml index 358194a4..2f3e11ad 100644 --- a/.github/workflows/testkit-unit-tests.yml +++ b/.github/workflows/testkit-unit-tests.yml @@ -22,7 +22,7 @@ jobs: outputs: backends: ${{ steps.cfg.outputs.backends }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - id: cfg name: Read backend matrix from .ontoportal-testkit.yml @@ -40,7 +40,7 @@ jobs: backend: ${{ fromJson(needs.prepare.outputs.backends) }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Ruby from .ruby-version uses: ruby/setup-ruby@v1 @@ -50,7 +50,7 @@ jobs: - name: Set up Java 11 (native mode) if: env.OPTK_CI_RUN_MODE == 'native' - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: distribution: temurin java-version: '11' From 6f7f5ec824e3bf9d6d6bc7f4dd6b8d8eb98fa130 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Wed, 11 Mar 2026 12:42:33 -0700 Subject: [PATCH 106/106] ontoportal testkit update --- Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index d718a797..67b81dae 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/alexskr/ontoportal_testkit.git - revision: b930b550bc8c98ca566c5f71b4f91f6703721240 + revision: 45ecfaf2a621b670dad373cd35c1c4b1b529fa96 branch: main specs: ontoportal_testkit (0.1.0) @@ -220,4 +220,4 @@ DEPENDENCIES uuid BUNDLED WITH - 4.0.8 + 4.0.7