diff --git a/lib/ontologies_linked_data/diff/bubastis_diff.rb b/lib/ontologies_linked_data/diff/bubastis_diff.rb index d5de6f65..4b013744 100644 --- a/lib/ontologies_linked_data/diff/bubastis_diff.rb +++ b/lib/ontologies_linked_data/diff/bubastis_diff.rb @@ -37,11 +37,11 @@ class BubastisDiffCommand # Loading one file locally and one from the web and outputting results to plain text: # java -jar bubastis_1_2.jar -ontology1 "H://disease_ontology_version_1.owl" -ontology2 "http://www.disease.org/diseaseontology_latest.owl" -output "C://my_diff.txt" - def initialize(input_fileOld, input_fileNew) + def initialize(input_fileOld, input_fileNew, output_repo) @bubastis_jar_path = LinkedData.bindir + "/bubastis.jar" @input_fileOld = input_fileOld @input_fileNew = input_fileNew - @output_repo = File.expand_path(@input_fileNew).gsub(File.basename(@input_fileNew),'') + @output_repo = output_repo @file_diff_path = nil end diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 050958dd..b724e412 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -205,7 +205,7 @@ def sanity_check return true end - zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath) + zip = zipped? files = LinkedData::Utils::FileHelpers.files_from_zip(self.uploadFilePath) if zip if not zip and self.masterFileName.nil? @@ -261,10 +261,19 @@ def data_folder self.submissionId.to_s) end + def zipped?(full_file_path = uploadFilePath) + LinkedData::Utils::FileHelpers.zip?(full_file_path) || LinkedData::Utils::FileHelpers.gzip?(full_file_path) + end + def zip_folder - return File.join([self.data_folder, "unzipped"]) + File.join([data_folder, "unzipped"]) end + def master_file_path + bring :uploadFilePath if bring? :uploadFilePath + bring :masterFileName if bring :masterFileName + File.expand_path(zipped? ? File.join(zip_folder, self.masterFileName) : self.uploadFilePath) + end def csv_path return File.join(self.data_folder, self.ontology.acronym.to_s + ".csv.gz") end @@ -286,17 +295,16 @@ def triples_file_path self.bring(:masterFileName) if self.bring?(:masterFileName) triples_file_name = File.basename(self.uploadFilePath.to_s) full_file_path = File.join(File.expand_path(self.data_folder.to_s), triples_file_name) - zip = LinkedData::Utils::FileHelpers.zip?(full_file_path) + zip = zipped? full_file_path triples_file_name = File.basename(self.masterFileName.to_s) if zip && self.masterFileName file_name = File.join(File.expand_path(self.data_folder.to_s), triples_file_name) File.expand_path(file_name) end def unzip_submission(logger) - zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath) zip_dst = nil - if zip + if zipped? zip_dst = self.zip_folder if Dir.exist? zip_dst @@ -328,17 +336,17 @@ def delete_old_submission_files # accepts another submission in 'older' (it should be an 'older' ontology version) def diff(logger, older) begin - self.bring_remaining - self.bring(:diffFilePath) - self.bring(:uploadFilePath) - older.bring(:uploadFilePath) + bring_remaining + bring :diffFilePath if bring? :diffFilePath + LinkedData::Diff.logger = logger bubastis = LinkedData::Diff::BubastisDiffCommand.new( - File.expand_path(older.uploadFilePath), - File.expand_path(self.uploadFilePath) + File.expand_path(older.master_file_path), + File.expand_path(self.master_file_path), + data_folder ) self.diffFilePath = bubastis.diff - self.save + save logger.info("Bubastis diff generated successfully for #{self.id}") logger.flush rescue Exception => e diff --git a/lib/ontologies_linked_data/utils/file.rb b/lib/ontologies_linked_data/utils/file.rb index 02cfae14..ea7a8902 100644 --- a/lib/ontologies_linked_data/utils/file.rb +++ b/lib/ontologies_linked_data/utils/file.rb @@ -9,11 +9,18 @@ module FileHelpers def self.zip?(file_path) file_path = file_path.to_s - unless File.exist? file_path - raise ArgumentError, "File path #{file_path} not found" - end + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + + file_type = `file --mime -b #{Shellwords.escape(file_path)}` + file_type.split(';')[0] == 'application/zip' + end + + def self.gzip?(file_path) + file_path = file_path.to_s + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + file_type = `file --mime -b #{Shellwords.escape(file_path)}` - return file_type.split(";")[0] == "application/zip" + file_type.split(';')[0] == 'application/x-gzip' end def self.files_from_zip(file_path) @@ -21,11 +28,12 @@ def self.files_from_zip(file_path) unless File.exist? file_path raise ArgumentError, "File path #{file_path} not found" end + files = [] Zip::File.open(file_path) do |zipfile| zipfile.each do |file| if not file.directory? - if not file.name.split("/")[-1].start_with? "." #a hidden file in __MACOSX or .DS_Store + if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store files << file.name end end @@ -37,26 +45,30 @@ def self.files_from_zip(file_path) def self.unzip(file_path, dst_folder) file_path = file_path.to_s dst_folder = dst_folder.to_s - unless File.exist? file_path - raise ArgumentError, "File path #{file_path} not found" - end - unless Dir.exist? dst_folder - raise ArgumentError, "Folder path #{dst_folder} not found" - end + raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path + raise ArgumentError, "Folder path #{dst_folder} not found" unless Dir.exist? dst_folder + extracted_files = [] - Zip::File.open(file_path) do |zipfile| - zipfile.each do |file| - if file.name.split("/").length > 1 - sub_folder = File.join(dst_folder, - file.name.split("/")[0..-2].join("/")) - unless Dir.exist?(sub_folder) - FileUtils.mkdir_p sub_folder + if gzip?(file_path) + Zlib::GzipReader.open(file_path) do |gz| + File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) } + extracted_files << GzipFile.new(gz) + end + else + Zip::File.open(file_path) do |zipfile| + zipfile.each do |file| + if file.name.split('/').length > 1 + sub_folder = File.join(dst_folder, + file.name.split('/')[0..-2].join('/')) + unless Dir.exist?(sub_folder) + FileUtils.mkdir_p sub_folder + end end + extracted_files << file.extract(File.join(dst_folder,file.name)) end - extracted_files << file.extract(File.join(dst_folder,file.name)) end end - return extracted_files + extracted_files end def self.automaster?(path, format) @@ -65,13 +77,13 @@ def self.automaster?(path, format) def self.automaster(path, format) files = self.files_from_zip(path) - basename = File.basename(path, ".zip") + basename = File.basename(path, '.zip') basename = File.basename(basename, format) files.select {|f| File.basename(f, format).downcase.eql?(basename.downcase)}.first end def self.repeated_names_in_file_list(file_list) - return file_list.group_by {|x| x.split("/")[-1]}.select { |k,v| v.length > 1} + return file_list.group_by {|x| x.split('/')[-1]}.select { |k,v| v.length > 1} end def self.exists_and_file(path) @@ -95,7 +107,7 @@ def self.download_file(uri, limit = 10) http_session.use_ssl = (uri.scheme == 'https') http_session.start do |http| http.read_timeout = 1800 - http.request_get(uri.request_uri, {"Accept-Encoding" => "gzip"}) do |res| + http.request_get(uri.request_uri, {'Accept-Encoding' => 'gzip'}) do |res| if res.kind_of?(Net::HTTPRedirection) new_loc = res['location'] if new_loc.match(/^(http:\/\/|https:\/\/)/) @@ -108,9 +120,9 @@ def self.download_file(uri, limit = 10) raise Net::HTTPBadResponse.new("#{uri.request_uri}: #{res.code}") if res.code.to_i >= 400 - file_size = res.read_header["content-length"].to_i + file_size = res.read_header['content-length'].to_i begin - filename = res.read_header["content-disposition"].match(/filename=\"(.*)\"/)[1] if filename.nil? + filename = res.read_header['content-disposition'].match(/filename=\"(.*)\"/)[1] if filename.nil? rescue filename = LinkedData::Utils::Triples.last_iri_fragment(uri.request_uri) if filename.nil? end @@ -118,7 +130,7 @@ def self.download_file(uri, limit = 10) file.write(res.body) if res.header['Content-Encoding'].eql?('gzip') - uncompressed_file = Tempfile.new("uncompressed-ont-rest-file") + uncompressed_file = Tempfile.new('uncompressed-ont-rest-file') file.rewind sio = StringIO.new(file.read) gz = Zlib::GzipReader.new(sio)