diff --git a/lib/ontologies_linked_data/utils/file.rb b/lib/ontologies_linked_data/utils/file.rb index b7cc1b5f..f740300d 100644 --- a/lib/ontologies_linked_data/utils/file.rb +++ b/lib/ontologies_linked_data/utils/file.rb @@ -1,6 +1,7 @@ require 'net/http' require 'uri' require 'zip' +require 'zlib' require 'tmpdir' module LinkedData @@ -20,7 +21,7 @@ def self.gzip?(file_path) raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path file_type = `file --mime -b #{Shellwords.escape(file_path)}` - file_type.split(';')[0] == 'application/x-gzip' + file_type.split(';')[0] == 'application/gzip' || file_type.split(';')[0] == 'application/x-gzip' end def self.files_from_zip(file_path) @@ -30,15 +31,22 @@ def self.files_from_zip(file_path) end files = [] - Zip::File.open(file_path) do |zipfile| - zipfile.each do |file| - if not file.directory? - if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store + if gzip?(file_path) + Zlib::GzipReader.open(file_path) do |file| + files << file.orig_name unless File.directory?(file) || file.orig_name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store + end + elsif zip?(file_path) + Zip::File.open(file_path) do |zip_files| + zip_files.each do |file| + unless file.directory? || file.name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store files << file.name end end end + else + raise StandardError, "Unsupported file format: #{File.extname(file_path)}" end + return files end @@ -52,21 +60,20 @@ def self.unzip(file_path, dst_folder) if gzip?(file_path) Zlib::GzipReader.open(file_path) do |gz| File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) } - extracted_files << GzipFile.new(gz) + extracted_files << gz end - else + elsif zip?(file_path) Zip::File.open(file_path) do |zipfile| zipfile.each do |file| if file.name.split('/').length > 1 - sub_folder = File.join(dst_folder, - file.name.split('/')[0..-2].join('/')) - unless Dir.exist?(sub_folder) - FileUtils.mkdir_p sub_folder - end + sub_folder = File.join(dst_folder, file.name.split('/')[0..-2].join('/')) + FileUtils.mkdir_p sub_folder unless Dir.exist?(sub_folder) end - extracted_files << file.extract(File.join(dst_folder,file.name)) + extracted_files << file.extract(File.join(dst_folder, file.name)) end end + else + raise StandardError, "Unsupported file format: #{File.extname(file_path)}" end extracted_files end diff --git a/test/data/ontology_files/BRO_v3.2.owl.gz b/test/data/ontology_files/BRO_v3.2.owl.gz new file mode 100644 index 00000000..9e1abfd6 Binary files /dev/null and b/test/data/ontology_files/BRO_v3.2.owl.gz differ diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index d1ea17cf..dffab3c5 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -81,6 +81,13 @@ def test_automaster_from_zip assert_equal nil, LinkedData::Utils::FileHelpers.automaster(zipfile, ".obo") end + def test_is_gzip + gzipfile = "./test/data/ontology_files/BRO_v3.2.owl.gz" + zipfile = "./test/data/ontology_files/evoc_v2.9.zip" + assert LinkedData::Utils::FileHelpers.gzip?(gzipfile) + refute LinkedData::Utils::FileHelpers.gzip?(zipfile) + end + def test_duplicated_file_names acronym = "DUPTEST" @@ -501,6 +508,43 @@ def test_submission_parse_zip puts "#{ctr} classes with no label" end + def test_submission_parse_gzip + skip if ENV["BP_SKIP_HEAVY_TESTS"] == "1" + + acronym = "BROGZ" + name = "BRO GZIPPED" + ontologyFile = "./test/data/ontology_files/BRO_v3.2.owl.gz" + id = 11 + + LinkedData::TestCase.backend_4s_delete + + ont_submission = LinkedData::Models::OntologySubmission.new({submissionId: id}) + refute ont_submission.valid? + assert_equal 4, ont_submission.errors.length + upload_file_path = LinkedData::Models::OntologySubmission.copy_file_repository(acronym, id, ontologyFile) + ont_submission.uploadFilePath = upload_file_path + owl, bro, user, contact = submission_dependent_objects("OWL", acronym, "test_linked_models", name) + ont_submission.released = DateTime.now - 4 + ont_submission.hasOntologyLanguage = owl + ont_submission.prefLabelProperty = RDF::URI.new("http://bioontology.org/projects/ontologies/radlex/radlexOwl#Preferred_name") + ont_submission.ontology = bro + ont_submission.contact = [contact] + assert ont_submission.valid? + ont_submission.save + parse_options = {process_rdf: true, reasoning: true, index_search: false, run_metrics: false, diff: false} + begin + tmp_log = Logger.new(TestLogFile.new) + ont_submission.process_submission(tmp_log, parse_options) + rescue StandardError => e + puts "Error, logged in #{tmp_log.instance_variable_get("@logdev").dev.path}" + raise e + end + + assert ont_submission.ready?({status: [:uploaded, :rdf, :rdf_labels]}) + read_only_classes = LinkedData::Models::Class.in(ont_submission).include(:prefLabel).read_only + refute read_only_classes.empty? + end + def test_download_ontology_file begin server_port = Random.rand(55000..65535) # http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Dynamic.2C_private_or_ephemeral_ports