Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions lib/ontologies_linked_data/utils/file.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'net/http'
require 'uri'
require 'zip'
require 'zlib'
require 'tmpdir'

module LinkedData
Expand All @@ -20,7 +21,7 @@ def self.gzip?(file_path)
raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path

file_type = `file --mime -b #{Shellwords.escape(file_path)}`
file_type.split(';')[0] == 'application/x-gzip'
file_type.split(';')[0] == 'application/gzip' || file_type.split(';')[0] == 'application/x-gzip'
end

def self.files_from_zip(file_path)
Expand All @@ -30,15 +31,22 @@ def self.files_from_zip(file_path)
end

files = []
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if not file.directory?
if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store
if gzip?(file_path)
Zlib::GzipReader.open(file_path) do |file|
files << file.orig_name unless File.directory?(file) || file.orig_name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store
end
elsif zip?(file_path)
Zip::File.open(file_path) do |zip_files|
zip_files.each do |file|
unless file.directory? || file.name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store
files << file.name
end
end
end
else
raise StandardError, "Unsupported file format: #{File.extname(file_path)}"
end

return files
end

Expand All @@ -52,21 +60,20 @@ def self.unzip(file_path, dst_folder)
if gzip?(file_path)
Zlib::GzipReader.open(file_path) do |gz|
File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) }
extracted_files << GzipFile.new(gz)
extracted_files << gz
end
else
elsif zip?(file_path)
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if file.name.split('/').length > 1
sub_folder = File.join(dst_folder,
file.name.split('/')[0..-2].join('/'))
unless Dir.exist?(sub_folder)
FileUtils.mkdir_p sub_folder
end
sub_folder = File.join(dst_folder, file.name.split('/')[0..-2].join('/'))
FileUtils.mkdir_p sub_folder unless Dir.exist?(sub_folder)
end
extracted_files << file.extract(File.join(dst_folder,file.name))
extracted_files << file.extract(File.join(dst_folder, file.name))
end
end
else
raise StandardError, "Unsupported file format: #{File.extname(file_path)}"
end
extracted_files
end
Expand Down
Binary file added test/data/ontology_files/BRO_v3.2.owl.gz
Binary file not shown.
44 changes: 44 additions & 0 deletions test/models/test_ontology_submission.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ def test_automaster_from_zip
assert_equal nil, LinkedData::Utils::FileHelpers.automaster(zipfile, ".obo")
end

def test_is_gzip
gzipfile = "./test/data/ontology_files/BRO_v3.2.owl.gz"
zipfile = "./test/data/ontology_files/evoc_v2.9.zip"
assert LinkedData::Utils::FileHelpers.gzip?(gzipfile)
refute LinkedData::Utils::FileHelpers.gzip?(zipfile)
end

def test_duplicated_file_names

acronym = "DUPTEST"
Expand Down Expand Up @@ -501,6 +508,43 @@ def test_submission_parse_zip
puts "#{ctr} classes with no label"
end

def test_submission_parse_gzip
skip if ENV["BP_SKIP_HEAVY_TESTS"] == "1"

acronym = "BROGZ"
name = "BRO GZIPPED"
ontologyFile = "./test/data/ontology_files/BRO_v3.2.owl.gz"
id = 11

LinkedData::TestCase.backend_4s_delete

ont_submission = LinkedData::Models::OntologySubmission.new({submissionId: id})
refute ont_submission.valid?
assert_equal 4, ont_submission.errors.length
upload_file_path = LinkedData::Models::OntologySubmission.copy_file_repository(acronym, id, ontologyFile)
ont_submission.uploadFilePath = upload_file_path
owl, bro, user, contact = submission_dependent_objects("OWL", acronym, "test_linked_models", name)
ont_submission.released = DateTime.now - 4
ont_submission.hasOntologyLanguage = owl
ont_submission.prefLabelProperty = RDF::URI.new("http://bioontology.org/projects/ontologies/radlex/radlexOwl#Preferred_name")
ont_submission.ontology = bro
ont_submission.contact = [contact]
assert ont_submission.valid?
ont_submission.save
parse_options = {process_rdf: true, reasoning: true, index_search: false, run_metrics: false, diff: false}
begin
tmp_log = Logger.new(TestLogFile.new)
ont_submission.process_submission(tmp_log, parse_options)
rescue StandardError => e
puts "Error, logged in #{tmp_log.instance_variable_get("@logdev").dev.path}"
raise e
end

assert ont_submission.ready?({status: [:uploaded, :rdf, :rdf_labels]})
read_only_classes = LinkedData::Models::Class.in(ont_submission).include(:prefLabel).read_only
refute read_only_classes.empty?
end

def test_download_ontology_file
begin
server_port = Random.rand(55000..65535) # http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Dynamic.2C_private_or_ephemeral_ports
Expand Down