Skip to content

Commit 7fc7bbb

Browse files
committed
Towards reading XLSX files
1 parent 8ffbf4e commit 7fc7bbb

3 files changed

Lines changed: 84 additions & 32 deletions

File tree

app/models/register.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,39 @@ def pending_genomes?
420420
names.any? { |n| n.type_genome.try(:pending?) }
421421
end
422422

423+
# ============ --- FILES --- ============
424+
%i[publication supplementary].each do |file|
425+
define_method(:"#{file}_file") do
426+
send(:"#{file}_pdf")
427+
end
428+
429+
define_method(:"#{file}_file?") do
430+
send(:"#{file}_file").attached?
431+
end
432+
end
433+
434+
def file?(file)
435+
%i[publication supplementary].include?(file.to_sym) &&
436+
send(:"#{file}_file?")
437+
end
438+
439+
def file(file)
440+
return unless file?(file)
441+
send(:"#{file}_pdf")
442+
end
443+
444+
def file_is_pdf?(file)
445+
file = file(file) or return false
446+
file.filename.extension == 'pdf' || file.content_type == 'application/pdf'
447+
end
448+
449+
def file_is_xlsx?(file)
450+
file = file(file) or return false
451+
file.filename.extension == 'xlsx' ||
452+
file.content_type ==
453+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
454+
end
455+
423456
private
424457

425458
def assign_accession

app/models/register/status.rb

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -272,55 +272,68 @@ def automated_validation
272272
end
273273

274274
##
275-
# Check if the PDF file(s) include accession and all list names, and report
276-
# results as register list notes
275+
# Check if the PDF and XLSX file(s) include accession and all list names, and
276+
# report results as register list notes
277277
#
278278
# Returns boolean, with true indicating all checks passed and false otherwise
279279
#
280280
# IMPORTANT: Notes are soft-registered, remember to +save+ to make them
281-
# persistent
281+
# persistent, but the individual +Check+ entries are actually saved
282282
def check_pdf_files
283-
inames = Hash[names.map { |n| [n, false] }]
284-
anames = Hash[names.map { |n| [n, false] }]
285-
[publication_pdf, supplementary_pdf].each do |as|
286-
next unless as.attached?
287-
break if anames.values.all? && inames.values.all?
288-
289-
if as.filename.extension == 'pdf' || as.content_type == 'application/pdf'
290-
as.open do |file|
291-
render = PDF::Reader.new(file.path)
283+
xnames = Hash[names.map { |n| [n, [false, false]] }]
284+
%i[publication supplementary].each do |file|
285+
next unless file?(file)
286+
break if xnames.values.flatten.all?
287+
288+
file(file).open do |fh|
289+
if file_is_pdf?(file)
290+
render = PDF::Reader.new(fh.path)
292291
render.pages.each do |page|
293-
txt = page.text.unicode_normalize(:nfkc)
294-
anames.each { |n, _| anames[n] = true } if txt.index(accession)
295-
names.each do |n|
296-
inames[n] ||= n.pdf_variants.find { |i| txt.index(i) }.present?
297-
anames[n] ||= txt.index(n.seqcode_url(false)).present?
298-
end
299-
break if anames.values.all? && inames.values.all?
292+
break if _search_names_in_text(
293+
xnames, names, accession,
294+
page.text.unicode_normalize(:nfkc)
295+
)
296+
end
297+
elsif file_is_xlsx?(file)
298+
xlsx = Roo::Spreadsheet.open(fh.path)
299+
xlsx.each do |row|
300+
break if _search_names_in_text(
301+
xnames, names, accession,
302+
row.select(&:present?).join(' ')
303+
)
300304
end
301305
end
302-
elsif as.filename.extension == 'xlsx'
303-
# TODO
304-
# Parse spreadsheets!
305306
end
306307
end
307308

308309
names.each do |n|
309-
par = { pass: anames[n], user: nil }
310+
par = { pass: xnames[n][0], user: nil }
310311
Check.create_with(par).find_or_create_by(
311312
name: n, kind: :effective_publication_missing_accession
312313
).update(par)
313314

314-
par = { pass: inames[n], user: nil }
315+
par = { pass: xnames[n][1], user: nil }
315316
Check.create_with(par).find_or_create_by(
316317
name: n, kind: :name_missing_in_effective_publication
317318
).update(par)
318319
end
319320

320321
add_note('The effective publication files have been parsed')
321-
anames.values.all? && inames.values.all?
322+
xnames.values.flatten.all?
322323
rescue => e
323324
add_note('ERROR: The effective publication files could not be parsed')
324325
raise e
325326
end
327+
328+
private
329+
330+
def _search_names_in_text(xnames, names, accession, txt)
331+
xnames.each { |n, _| xnames[n][0] = true } if txt.index(accession)
332+
names.each do |n|
333+
xnames[n][0] ||= txt.index(n.seqcode_url(false)).present?
334+
xnames[n][1] ||= n.pdf_variants.find { |i| txt.index(i) }.present?
335+
end
336+
xnames.values.flatten.all?
337+
end
326338
end
339+

app/views/registers/_citation.html.erb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,15 +104,21 @@
104104
<%= @register.publication.long_citation_html %>
105105
</p>
106106
<% if @register.can_view_publication?(current_user) %>
107-
<% %i[publication_pdf supplementary_pdf].each do |attach| %>
108-
<% as = @register.send(attach) %>
109-
<% if as.attached? %>
107+
<% %i[publication supplementary].each do |attach| %>
108+
<% if @register.file?(attach) %>
110109
<%= link_to(
111-
rails_blob_path(as, disposition: 'attachment'),
112-
class: 'btn btn-secondary btn-sm'
110+
rails_blob_path(
111+
@register.file(attach), disposition: 'attachment'
112+
), class: 'btn btn-secondary btn-sm'
113113
) do %>
114-
<%= fa_icon('file-pdf') %>
115-
<%= attach.to_s.sub(/_.*/, '') %>
114+
<% if @register.file_is_pdf?(attach) %>
115+
<%= fa_icon('file-pdf') %>
116+
<% elsif @register.file_is_xlsx?(attach) %>
117+
<%= fa_icon('file-excel') %>
118+
<% else %>
119+
<%= fa_icon('file-alt') %>
120+
<% end %>
121+
<%= attach %>
116122
<% end %>
117123
<% end %>
118124
<% end %>

0 commit comments

Comments
 (0)