From 408314c50f8e256f502cfbe559a465e0e919949c Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 1 Mar 2022 18:01:40 +0100 Subject: [PATCH 01/11] fix to use the unzipped file in the diff process --- .../models/ontology_submission.rb | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 050958dd..bc05b460 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -205,7 +205,7 @@ def sanity_check return true end - zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath) + zip = zipped? files = LinkedData::Utils::FileHelpers.files_from_zip(self.uploadFilePath) if zip if not zip and self.masterFileName.nil? @@ -261,10 +261,19 @@ def data_folder self.submissionId.to_s) end + def zipped?(full_file_path: self.uploadFilePath) + LinkedData::Utils::FileHelpers.zip?(full_file_path) || LinkedData::Utils::FileHelpers.gzip?(full_file_path) + end + def zip_folder - return File.join([self.data_folder, "unzipped"]) + File.join([self.data_folder, "unzipped"]) end + def master_file_path + bring :uploadFilePath if bring? :uploadFilePath + bring :masterFileName if bring :masterFileName + File.expand_path(zipped? ? File.join(zip_folder, self.masterFileName) : self.uploadFilePath) + end def csv_path return File.join(self.data_folder, self.ontology.acronym.to_s + ".csv.gz") end @@ -286,17 +295,16 @@ def triples_file_path self.bring(:masterFileName) if self.bring?(:masterFileName) triples_file_name = File.basename(self.uploadFilePath.to_s) full_file_path = File.join(File.expand_path(self.data_folder.to_s), triples_file_name) - zip = LinkedData::Utils::FileHelpers.zip?(full_file_path) + zip = zipped? full_file_path triples_file_name = File.basename(self.masterFileName.to_s) if zip && self.masterFileName file_name = File.join(File.expand_path(self.data_folder.to_s), triples_file_name) File.expand_path(file_name) end def unzip_submission(logger) - zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath) zip_dst = nil - if zip + if zipped? zip_dst = self.zip_folder if Dir.exist? zip_dst @@ -328,17 +336,17 @@ def delete_old_submission_files # accepts another submission in 'older' (it should be an 'older' ontology version) def diff(logger, older) begin - self.bring_remaining - self.bring(:diffFilePath) - self.bring(:uploadFilePath) - older.bring(:uploadFilePath) + bring_remaining + bring :diffFilePath if bring? :diffFilePath + LinkedData::Diff.logger = logger bubastis = LinkedData::Diff::BubastisDiffCommand.new( - File.expand_path(older.uploadFilePath), - File.expand_path(self.uploadFilePath) + File.expand_path(older.master_file_path), + File.expand_path(self.master_file_path), + data_folder ) self.diffFilePath = bubastis.diff - self.save + save logger.info("Bubastis diff generated successfully for #{self.id}") logger.flush rescue Exception => e From b10b9bf292cd2f47ae8e087211d19bf117648804 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 1 Mar 2022 17:55:55 +0100 Subject: [PATCH 02/11] fix and update the way the diff output folder is computed --- lib/ontologies_linked_data/diff/bubastis_diff.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_linked_data/diff/bubastis_diff.rb b/lib/ontologies_linked_data/diff/bubastis_diff.rb index d5de6f65..4b013744 100644 --- a/lib/ontologies_linked_data/diff/bubastis_diff.rb +++ b/lib/ontologies_linked_data/diff/bubastis_diff.rb @@ -37,11 +37,11 @@ class BubastisDiffCommand # Loading one file locally and one from the web and outputting results to plain text: # java -jar bubastis_1_2.jar -ontology1 "H://disease_ontology_version_1.owl" -ontology2 "http://www.disease.org/diseaseontology_latest.owl" -output "C://my_diff.txt" - def initialize(input_fileOld, input_fileNew) + def initialize(input_fileOld, input_fileNew, output_repo) @bubastis_jar_path = LinkedData.bindir + "/bubastis.jar" @input_fileOld = input_fileOld @input_fileNew = input_fileNew - @output_repo = File.expand_path(@input_fileNew).gsub(File.basename(@input_fileNew),'') + @output_repo = output_repo @file_diff_path = nil end From 3ea86a97086296c4964c90596779e3a8d1a55319 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 2 Mar 2022 10:37:18 +0100 Subject: [PATCH 03/11] adding gzip support --- lib/ontologies_linked_data/utils/file.rb | 64 ++++++++++++++---------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/ontologies_linked_data/utils/file.rb b/lib/ontologies_linked_data/utils/file.rb index 02cfae14..ea7a8902 100644 --- a/lib/ontologies_linked_data/utils/file.rb +++ b/lib/ontologies_linked_data/utils/file.rb @@ -9,11 +9,18 @@ module FileHelpers def self.zip?(file_path) file_path = file_path.to_s - unless File.exist? file_path - raise ArgumentError, "File path #{file_path} not found" - end + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + + file_type = `file --mime -b #{Shellwords.escape(file_path)}` + file_type.split(';')[0] == 'application/zip' + end + + def self.gzip?(file_path) + file_path = file_path.to_s + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + file_type = `file --mime -b #{Shellwords.escape(file_path)}` - return file_type.split(";")[0] == "application/zip" + file_type.split(';')[0] == 'application/x-gzip' end def self.files_from_zip(file_path) @@ -21,11 +28,12 @@ def self.files_from_zip(file_path) unless File.exist? file_path raise ArgumentError, "File path #{file_path} not found" end + files = [] Zip::File.open(file_path) do |zipfile| zipfile.each do |file| if not file.directory? - if not file.name.split("/")[-1].start_with? "." #a hidden file in __MACOSX or .DS_Store + if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store files << file.name end end @@ -37,26 +45,30 @@ def self.files_from_zip(file_path) def self.unzip(file_path, dst_folder) file_path = file_path.to_s dst_folder = dst_folder.to_s - unless File.exist? file_path - raise ArgumentError, "File path #{file_path} not found" - end - unless Dir.exist? dst_folder - raise ArgumentError, "Folder path #{dst_folder} not found" - end + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + raise ArgumentError, "Folder path #{dst_folder} not found" unless Dir.exist? dst_folder + extracted_files = [] - Zip::File.open(file_path) do |zipfile| - zipfile.each do |file| - if file.name.split("/").length > 1 - sub_folder = File.join(dst_folder, - file.name.split("/")[0..-2].join("/")) - unless Dir.exist?(sub_folder) - FileUtils.mkdir_p sub_folder + if gzip?(file_path) + Zlib::GzipReader.open(file_path) do |gz| + File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) } + extracted_files << GzipFile.new(gz) + end + else + Zip::File.open(file_path) do |zipfile| + zipfile.each do |file| + if file.name.split('/').length > 1 + sub_folder = File.join(dst_folder, + file.name.split('/')[0..-2].join('/')) + unless Dir.exist?(sub_folder) + FileUtils.mkdir_p sub_folder + end end + extracted_files << file.extract(File.join(dst_folder,file.name)) end - extracted_files << file.extract(File.join(dst_folder,file.name)) end end - return extracted_files + extracted_files end def self.automaster?(path, format) @@ -65,13 +77,13 @@ def self.automaster?(path, format) def self.automaster(path, format) files = self.files_from_zip(path) - basename = File.basename(path, ".zip") + basename = File.basename(path, '.zip') basename = File.basename(basename, format) files.select {|f| File.basename(f, format).downcase.eql?(basename.downcase)}.first end def self.repeated_names_in_file_list(file_list) - return file_list.group_by {|x| x.split("/")[-1]}.select { |k,v| v.length > 1} + return file_list.group_by {|x| x.split('/')[-1]}.select { |k,v| v.length > 1} end def self.exists_and_file(path) @@ -95,7 +107,7 @@ def self.download_file(uri, limit = 10) http_session.use_ssl = (uri.scheme == 'https') http_session.start do |http| http.read_timeout = 1800 - http.request_get(uri.request_uri, {"Accept-Encoding" => "gzip"}) do |res| + http.request_get(uri.request_uri, {'Accept-Encoding' => 'gzip'}) do |res| if res.kind_of?(Net::HTTPRedirection) new_loc = res['location'] if new_loc.match(/^(http:\/\/|https:\/\/)/) @@ -108,9 +120,9 @@ def self.download_file(uri, limit = 10) raise Net::HTTPBadResponse.new("#{uri.request_uri}: #{res.code}") if res.code.to_i >= 400 - file_size = res.read_header["content-length"].to_i + file_size = res.read_header['content-length'].to_i begin - filename = res.read_header["content-disposition"].match(/filename=\"(.*)\"/)[1] if filename.nil? + filename = res.read_header['content-disposition'].match(/filename=\"(.*)\"/)[1] if filename.nil? rescue filename = LinkedData::Utils::Triples.last_iri_fragment(uri.request_uri) if filename.nil? end @@ -118,7 +130,7 @@ def self.download_file(uri, limit = 10) file.write(res.body) if res.header['Content-Encoding'].eql?('gzip') - uncompressed_file = Tempfile.new("uncompressed-ont-rest-file") + uncompressed_file = Tempfile.new('uncompressed-ont-rest-file') file.rewind sio = StringIO.new(file.read) gz = Zlib::GzipReader.new(sio) From 302462020ac647a9d8f7f461b0f0c7682e431a1d Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 3 Mar 2022 18:19:14 +0100 Subject: [PATCH 04/11] fix function argument default --- lib/ontologies_linked_data/models/ontology_submission.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index bc05b460..b724e412 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -261,12 +261,12 @@ def data_folder self.submissionId.to_s) end - def zipped?(full_file_path: self.uploadFilePath) + def zipped?(full_file_path = uploadFilePath) LinkedData::Utils::FileHelpers.zip?(full_file_path) || LinkedData::Utils::FileHelpers.gzip?(full_file_path) end def zip_folder - File.join([self.data_folder, "unzipped"]) + File.join([data_folder, "unzipped"]) end def master_file_path From a8711f58d6d1de5785b77e3c952350d86a9e256f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 15:01:30 +0200 Subject: [PATCH 05/11] add the owlapi_parser builder --- .../models/ontology_submission.rb | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index b724e412..a4d6e931 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -261,19 +261,14 @@ def data_folder self.submissionId.to_s) end - def zipped?(full_file_path = uploadFilePath) + def zipped?(full_file_path = uploadFilePath) LinkedData::Utils::FileHelpers.zip?(full_file_path) || LinkedData::Utils::FileHelpers.gzip?(full_file_path) end def zip_folder - File.join([data_folder, "unzipped"]) + File.join([data_folder, 'unzipped']) end - def master_file_path - bring :uploadFilePath if bring? :uploadFilePath - bring :masterFileName if bring :masterFileName - File.expand_path(zipped? ? File.join(zip_folder, self.masterFileName) : self.uploadFilePath) - end def csv_path return File.join(self.data_folder, self.ontology.acronym.to_s + ".csv.gz") end @@ -319,10 +314,12 @@ def unzip_submission(logger) self.save end - logger.info("Files extracted from zip #{extracted}") - logger.flush + if logger + logger.info("Files extracted from zip #{extracted}") + logger.flush + end end - return zip_dst + zip_dst end def delete_old_submission_files @@ -1537,6 +1534,23 @@ def delete_classes_graph Goo.sparql_data_client.delete_graph(self.id) end + def owlapi_parser(logger: Logger.new($stdout)) + unzip_submission(logger) + LinkedData::Parser::OWLAPICommand.new( + master_file_path, + File.expand_path(self.data_folder.to_s), + master_file: self.masterFileName, + logger: logger) + end + + def master_file_path + path = if zip? + File.join(self.zip_folder, self.masterFileName) + else + self.uploadFilePath + end + File.expand_path(path) + end private def delete_and_append(triples_file_path, logger, mime_type = nil) From 6a1dcba5b2f3614b205b8aa9747f60855aba63b1 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 14:19:51 +0200 Subject: [PATCH 06/11] use the owlapi_parser builder in the submission generate_rdf --- .../models/ontology_submission.rb | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index a4d6e931..f9dfbee9 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -434,7 +434,7 @@ def generate_umls_metrics_file(tr_file_path=nil) self.generate_metrics_file(class_count, indiv_count, prop_count) end - def generate_rdf(logger, file_path, reasoning=true) + def generate_rdf(logger, reasoning: true) mime_type = nil if self.hasOntologyLanguage.umls? @@ -456,10 +456,7 @@ def generate_rdf(logger, file_path, reasoning=true) logger.info("error deleting owlapi.rdf") end end - owlapi = LinkedData::Parser::OWLAPICommand.new( - File.expand_path(file_path), - File.expand_path(self.data_folder.to_s), - master_file: self.masterFileName) + owlapi = owlapi_parser(logger: nil) if !reasoning owlapi.disable_reasoner @@ -961,7 +958,7 @@ def process_submission(logger, options={}) self.save # Parse RDF - file_path = nil + file_path = master_file_path begin if not self.valid? error = "Submission is not valid, it cannot be processed. Check errors." @@ -973,9 +970,7 @@ def process_submission(logger, options={}) end status = LinkedData::Models::SubmissionStatus.find("RDF").first remove_submission_status(status) #remove RDF status before starting - zip_dst = unzip_submission(logger) - file_path = zip_dst ? zip_dst.to_s : self.uploadFilePath.to_s - generate_rdf(logger, file_path, reasoning=reasoning) + generate_rdf(logger, reasoning: reasoning) add_submission_status(status) self.save rescue Exception => e From 1cec803916513fcb010b221a1b036ff3fce3497e Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 15:10:11 +0200 Subject: [PATCH 07/11] add the parsable? method to the submissions --- .../models/ontology_submission.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index f9dfbee9..d6205253 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -1546,6 +1546,17 @@ def master_file_path end File.expand_path(path) end + def parsable?(logger: Logger.new($stdout)) + owlapi = owlapi_parser(logger: logger) + owlapi.disable_reasoner + parsable = true + begin + owlapi.parse + rescue StandardError => e + parsable = false + end + parsable + end private def delete_and_append(triples_file_path, logger, mime_type = nil) From 5360eb56b025d4d9262a413d7505f6ea365c449b Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 15:11:41 +0200 Subject: [PATCH 08/11] use zipped? in the master_file_path method --- lib/ontologies_linked_data/models/ontology_submission.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index d6205253..1458f617 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -1539,13 +1539,14 @@ def owlapi_parser(logger: Logger.new($stdout)) end def master_file_path - path = if zip? + path = if zipped? File.join(self.zip_folder, self.masterFileName) - else + else self.uploadFilePath end File.expand_path(path) end + def parsable?(logger: Logger.new($stdout)) owlapi = owlapi_parser(logger: logger) owlapi.disable_reasoner @@ -1557,6 +1558,7 @@ def parsable?(logger: Logger.new($stdout)) end parsable end + private def delete_and_append(triples_file_path, logger, mime_type = nil) From 32552814c08697afb8349bf39a63c536d7bd82e8 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 16:41:17 +0200 Subject: [PATCH 09/11] make the owlapi_parser builder private --- .../models/ontology_submission.rb | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 1458f617..704ff85e 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -1529,14 +1529,6 @@ def delete_classes_graph Goo.sparql_data_client.delete_graph(self.id) end - def owlapi_parser(logger: Logger.new($stdout)) - unzip_submission(logger) - LinkedData::Parser::OWLAPICommand.new( - master_file_path, - File.expand_path(self.data_folder.to_s), - master_file: self.masterFileName, - logger: logger) - end def master_file_path path = if zipped? @@ -1558,9 +1550,31 @@ def parsable?(logger: Logger.new($stdout)) end parsable end - + + private + + def owlapi_parser_input + path = if zipped? + self.zip_folder + else + self.uploadFilePath + end + File.expand_path(path) + end + + + def owlapi_parser(logger: Logger.new($stdout)) + unzip_submission(logger) + LinkedData::Parser::OWLAPICommand.new( + owlapi_parser_input, + File.expand_path(self.data_folder.to_s), + master_file: self.masterFileName, + logger: logger) + end + + def delete_and_append(triples_file_path, logger, mime_type = nil) Goo.sparql_data_client.delete_graph(self.id) Goo.sparql_data_client.put_triples(self.id, triples_file_path, mime_type) From de1a2079310d36eca7fa97a78dad5c03bbc813ca Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 16:43:03 +0200 Subject: [PATCH 10/11] use the uploadFilePath as path for missing_labels and obsolete_classes --- lib/ontologies_linked_data/models/ontology_submission.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 704ff85e..fe78e3fa 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -958,7 +958,6 @@ def process_submission(logger, options={}) self.save # Parse RDF - file_path = master_file_path begin if not self.valid? error = "Submission is not valid, it cannot be processed. Check errors." @@ -982,6 +981,7 @@ def process_submission(logger, options={}) raise e end + file_path = self.uploadFilePath callbacks = { missing_labels: { op_name: "Missing Labels Generation", From 4b464644a1d41ff45886358e90eddd9954bd5dc0 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 19 May 2022 16:43:44 +0200 Subject: [PATCH 11/11] bring the uploadFilePath before doing the diff --- lib/ontologies_linked_data/models/ontology_submission.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index fe78e3fa..76f82e56 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -335,6 +335,7 @@ def diff(logger, older) begin bring_remaining bring :diffFilePath if bring? :diffFilePath + older.bring :uploadFilePath if older.bring? :uploadFilePath LinkedData::Diff.logger = logger bubastis = LinkedData::Diff::BubastisDiffCommand.new(