Skip to content

Commit 4e6c8c7

Browse files
committed
Preserve local curation when importing LPSN
- Local changes are now preserved between LPSN imports through a marking fields for protection (`protect_from_lpsn`). - To use, execute the `lpsn:protect` task with the last imported version of LPSN. Any fields that differ are assumed to have been manually updated and will be protected.
1 parent 1f85313 commit 4e6c8c7

4 files changed

Lines changed: 96 additions & 3 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddProtectFromLpsnToNames < ActiveRecord::Migration[6.1]
2+
def change
3+
add_column :names, :protect_from_lpsn, :string, default: nil
4+
end
5+
end

db/schema.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#
1111
# It's strongly recommended that you check this file into your version control system.
1212

13-
ActiveRecord::Schema.define(version: 2025_05_20_204421) do
13+
ActiveRecord::Schema.define(version: 2025_06_23_185140) do
1414

1515
create_table "action_text_rich_texts", force: :cascade do |t|
1616
t.string "name", null: false
@@ -234,6 +234,7 @@
234234
t.string "nomenclatural_type_type"
235235
t.integer "nomenclatural_type_id"
236236
t.datetime "claimed_at"
237+
t.string "protect_from_lpsn"
237238
t.index ["genome_id"], name: "index_names_on_genome_id"
238239
t.index ["name"], name: "index_names_on_name", unique: true
239240
t.index ["name_order"], name: "index_names_on_name_order"

lib/tasks/import_lpsn.rake

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace :lpsn do
3333
type_name = nil
3434
if pars[:rank] == 'genus'
3535
type_name = row['nomenclatural_type']
36-
else
36+
else # LPSN only has genera, species, and subspecies
3737
pars[:nomenclatural_type_type] = 'Strain'
3838
pars[:nomenclatural_type_entry] =
3939
row['nomenclatural_type'].gsub('; ', ' = ')
@@ -48,7 +48,7 @@ namespace :lpsn do
4848
pars[:lpsn_url] = row['address']
4949
pars[:status] = 20
5050

51-
# Save data
51+
# Check first the current name record
5252
name = Name.find_or_create_by(name: pars[:name])
5353
if name.status > 5 && name.status < 20
5454
warn "- Name in SeqCode, bypassing: #{name.name}"
@@ -58,6 +58,17 @@ namespace :lpsn do
5858
warn "- Name in a different code, bypassing: #{name.name}"
5959
next
6060
end
61+
if name.redirect_id.present?
62+
warn "- Name is deprecated, bypassing: #{name.name}"
63+
next
64+
end
65+
name.protect_from_lpsn.to_s.split(',').each do |i|
66+
pars.delete(i.to_sym)
67+
parent = nil if i == 'parent'
68+
row['record_lnk'] = nil if i == 'correct_name'
69+
end
70+
71+
# Save data
6172
name.update!(pars)
6273
parsed_names[row['record_no']] = {
6374
name_id: name.id, parent: parent, correct_name: row['record_lnk'],

lib/tasks/protect_lpsn.rake

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
2+
require 'csv'
3+
4+
namespace :lpsn do
5+
desc 'Marks changes as protected from LPSN for future updates'
6+
task :protect, [:csv] => :environment do |t, args|
7+
def usage(t)
8+
puts "Usage: rake #{t}[lpsn_gss.csv]"
9+
exit 0
10+
end
11+
12+
usage(t) unless args[:csv]
13+
# First pass, discard repeats
14+
$stderr.puts "Parsing CSV: #{args[:csv]}"
15+
names = {}
16+
CSV.foreach(args[:csv], headers: true).each_with_index do |row, k|
17+
n = [row['genus_name'], row['sp_epithet'], row['subsp_epithet']]
18+
names[n] = k
19+
end
20+
21+
# Second pass, actually do work
22+
CSV.foreach(args[:csv], headers: true).each_with_index do |row, k|
23+
$stderr.print "- #{k} \r"
24+
25+
# Discard repeats
26+
n = [row['genus_name'], row['sp_epithet'], row['subsp_epithet']]
27+
next unless names[n] == k
28+
29+
# Get data
30+
pars = { name: row['genus_name'] }
31+
parent = nil
32+
if row['sp_epithet'].present?
33+
parent = { name: pars[:name] }
34+
pars[:name] = pars[:name] + ' ' + row['sp_epithet']
35+
if row['subsp_epithet'].present?
36+
parent = { name: pars[:name] }
37+
pars[:name] = pars[:name] + ' subsp. ' + row['subsp_epithet']
38+
end
39+
end
40+
status = row['status'].split('; ')
41+
pars[:proposal_kind] = status[1] if status[1].present?
42+
pars[:nomenclatural_status] = status[2] if status[2].present?
43+
pars[:taxonomic_status] = status[3] if status[3].present?
44+
pars[:authority] = row['authors']
45+
46+
# Save data
47+
name = Name.find_by(name: pars[:name])
48+
next unless name.present?
49+
next if name.status > 5 && name.status < 20
50+
next if name.status > 20
51+
52+
protect = Set.new(name.protect_from_lpsn.to_s.split(','))
53+
%i[
54+
proposal_kind nomenclatural_status taxonomic_status authority
55+
].each do |i|
56+
protect << i.to_s if name.send(i).to_s != pars[i].to_s
57+
end
58+
if row['record_lnk'].present? && (
59+
protect.include?('nomenclatural_status') ||
60+
protect.include?('taxonomic_status')
61+
)
62+
protect << 'correct_name'
63+
end
64+
if parent.present? && name.parent.present? &&
65+
!name.parent.is_variant?(parent[:name])
66+
protect << 'parent'
67+
end
68+
unless protect.empty?
69+
warn "- Protecting: #{name.name}: #{protect.join(', ')}"
70+
name.update_column(:protect_from_lpsn, protect.join(','))
71+
end
72+
end # CSV.foreach
73+
$stderr.puts
74+
end
75+
76+
end

0 commit comments

Comments
 (0)