Skip to content

Commit b28af6b

Browse files
committed
Implement recommendation 9.2
Close #6
1 parent 16062c6 commit b28af6b

2 files changed

Lines changed: 71 additions & 2 deletions

File tree

app/models/name.rb

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,14 +218,14 @@ def fuzzy_match(
218218
when :similarity
219219
threshold ||= 0.7
220220
selection
221-
.select("name, similarity(name, #{clean_query}) AS score")
221+
.select("id, name, similarity(name, #{clean_query}) AS score")
222222
.where('similarity(name, ?) > ?', query, threshold)
223223
.order('score DESC')
224224
.limit(limit)
225225
when :levenshtein
226226
threshold ||= 3
227227
selection
228-
.select("name, levenshtein(name, #{clean_query}) AS score")
228+
.select("id, name, levenshtein(name, #{clean_query}) AS score")
229229
.where('levenshtein(name, ?) <= ?', query, threshold)
230230
.order('score ASC')
231231
.limit(limit)
@@ -650,6 +650,47 @@ def is_variant?(alt_spelling)
650650
false
651651
end
652652

653+
##
654+
# This method always return +nil+ for names that are not at (inferred) rank
655+
# of genus or species
656+
#
657+
# Find names similar to the current one (using the cannonical spelling
658+
# from +base_name+) with Levenshtein ≤ 3, considering a search space
659+
# defined by the taxonomic rank and +among+:
660+
# - valid: All validly published names of genera or species of the same genus
661+
# - public: All publicly visible names of genera or species of the same genus
662+
# - register: All names names of genera or species of the same genus in the
663+
# same register list as this name (if any)
664+
def similar_names(among: :valid)
665+
selection =
666+
case among.to_sym
667+
when :valid
668+
self.class.all_valid
669+
when :public
670+
self.class.all_public
671+
when :register
672+
return unless register.present?
673+
register.names
674+
else
675+
raise ArgumentError, "Unsupported search space (among): #{among}"
676+
end
677+
678+
case inferred_rank.to_sym
679+
when :genus
680+
selection = selection.where(rank: :genus)
681+
when :species
682+
return unless parent.present?
683+
selection = selection.where(parent_id: parent.id)
684+
else
685+
return
686+
end
687+
688+
selection = selection.where.not(id: id)
689+
self.class.fuzzy_match(
690+
base_name, method: :levenshtein, selection: selection
691+
)
692+
end
693+
653694
# ============ --- OUTLINKS --- ============
654695

655696
def ncbi_search_url

app/models/name/quality_checks.rb

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,26 @@ class QcWarning
176176
}.merge(@@link_to_edit_spelling),
177177
# - Recommendation 9.1 covered in § Rule 9b
178178
# - Recommendation 9.2 [TODO: issue #6]:
179+
similar_names_validly_published: {
180+
message: lambda { |w|
181+
similar = w.name.similar_names(:valid).limit(5).map(&:name)
182+
<<~MSG.html_safe
183+
Name is similar in spelling to: #{similar.to_sentence}.
184+
Consider variations that are less prone to confusion
185+
MSG
186+
},
187+
recommendations: %w[9.2]
188+
}.merge(@@link_to_edit_spelling),
189+
similar_names_in_register_list: {
190+
message: lambda { |w|
191+
similar = w.name.similar_names(:register).limit(5).map(&:name)
192+
<<~MSG.html_safe
193+
Name is similar in spelling to: #{similar.to_sentence}.
194+
Consider variations that are less prone to confusion
195+
MSG
196+
},
197+
recommendations: %w[9.2]
198+
}.merge(@@link_to_edit_spelling),
179199
# Names should differ by at least three characters from existing names
180200
# of genera or species within the same genus.
181201
# - Recommendation 9.3 [Checklist-N]
@@ -1006,6 +1026,14 @@ def qc_warnings
10061026
@qc_warnings.add(:long_name) if long_word?
10071027
@qc_warnings.add(:difficult_to_pronounce) if hard_to_pronounce?
10081028

1029+
if similar_names(:valid).present?
1030+
@qc_warnings.add(:similar_names_validly_published)
1031+
end
1032+
1033+
if similar_names(:register).present?
1034+
@qc_warnings.add(:similar_names_in_register_list)
1035+
end
1036+
10091037
if rank? && %w[species subspecies].include?(rank)
10101038
unless consistent_species_name?
10111039
@qc_warnings.add(:inconsistent_species_name)

0 commit comments

Comments
 (0)