@@ -31,6 +31,7 @@ class Genome < ApplicationRecord
3131
3232 include HasExternalResources
3333 include Genome ::ExternalResources
34+ include Genome ::SampleSet
3435
3536 attr_accessor :queue_for_source_update
3637
@@ -77,34 +78,6 @@ def required
7778 kind? source_database? source_accession?
7879 ]
7980 end
80-
81- def important_sample_attributes
82- {
83- date : %i[ collection_date event_date_time_start event_date_time_end ] ,
84- location : %i[
85- lat_lon lat lon
86- geographic_location_latitude geographic_location_longitude
87- latitude_start latitude_end longitude_start longitude_end
88- ] ,
89- toponym : %i[
90- geo_loc_name geographic_location_country_and_or_sea marine_region
91- ] ,
92- environment : %i[
93- env_material sample_type env_biome isolation_source analyte_type
94- env_broad_scale env_local_scale env_medium
95- environment_biome environment_feature gold_ecosystem_classification
96- broad_scale_environmental_context local_environmental_context
97- environmental_medium
98- ] ,
99- other : %i[
100- host ph depth temp temperature rel_to_oxygen geographic_location_depth
101- chlorophyll isol_growth_condt
102- ] ,
103- package : %i[
104- ncbi_package ena_checklist ncbi_submission_package biosamplemodel
105- ]
106- }
107- end
10881 end
10982
11083 @@FIELDS_WITH_AUTO = %i[
@@ -218,174 +191,6 @@ def source_links
218191 end
219192 end
220193
221- def source_extra_biosamples
222- return [ ] unless source_hash
223- return @source_extra_biosamples if @source_extra_biosamples
224-
225- @source_extra_biosamples = [ ]
226- %i[ derived_from sample_derived_from ] . each do |attribute |
227- next unless attr = source_attributes [ attribute ]
228-
229- attr . each do |i |
230- @source_extra_biosamples +=
231- i . gsub ( /.*: */ , '' ) . gsub ( /[\. ]/ , '' ) . split ( / *,(?: and)? */ )
232- end
233- end
234- @source_extra_biosamples . uniq!
235- @source_extra_biosamples -= source_hash [ :samples ] . keys . map ( &:to_s )
236- @source_extra_biosamples -= source_accessions
237- @source_extra_biosamples
238- end
239-
240- def source_attribute_groups
241- return { } unless source_hash
242- return @source_attribute_groups if @source_attribute_groups
243-
244- @source_attribute_groups = { }
245- self . class . important_sample_attributes . each do |group , attributes |
246- @source_attribute_groups [ group ] = { }
247- attributes . each do |attribute |
248- attr = source_attributes [ attribute ]
249- @source_attribute_groups [ group ] [ attribute ] = attr if attr . present?
250- end
251- end
252- @source_attribute_groups
253- end
254-
255- ##
256- # Finds the locations of all source samples associated to this genome, and
257- # returns them as an Array of 2-element Arrays ([lat, lon]) or +nil+
258- def source_sample_locations
259- coord = /([-+] *)?(\d +(?:[\. \, ]\d +)?|\d +°(?:\d +['"])*)( *[NSEW])?/
260- keys = {
261- lat : %i[ lat geographic_location_latitude latitude_start latitude_end ] ,
262- lon : %i[ lon geographic_location_longitude longitude_start longitude_end ]
263- }
264-
265- coords = { lat : nil , lon : nil }
266- @_source_sample_locations ||=
267- source_cannonical_samples . map do |sample |
268- # Try joint keys
269- if sample [ :lat_lon ]
270- m = sample [ :lat_lon ] . match ( /^ *(#{ coord } )[ ,;\/ \- ]+(#{ coord } ) *$/i )
271- m ||= [ ]
272- coords [ :lat ] = m [ 2 ..4 ]
273- coords [ :lon ] = m [ 6 ..8 ]
274- end
275-
276- # Try individual keys
277- if coords . values . any? ( &:nil? )
278- keys . each do |dim , list |
279- list . each do |key |
280- if sample [ key ]
281- m = sample [ key ] . match ( /^#{ coord } $/i ) || [ ]
282- coords [ dim ] = m [ 1 ..3 ]
283- end
284- break unless coords [ dim ] . nil?
285- end
286- end
287- end
288-
289- # Parse each coordinate
290- if coords . values . any? ( &:nil? )
291- nil
292- else
293- coords . map do |k , v |
294- v . map! ( &:to_s ) . map! ( &:strip )
295- decimal =
296- if m = v [ 1 ] . match ( /^(\d ) *°(?: *(\d +) *'(?: *(\d +) *(?:"|''))?)?/ )
297- m [ 1 ] . to_f + ( m [ 2 ] . to_f + m [ 3 ] . to_f / 60 ) / 60
298- else
299- v [ 1 ] . gsub ( ',' , '.' ) . to_f
300- end
301-
302- if %w[ S s W w ] . include? ( v [ 2 ] ) || v [ 0 ] == '-'
303- -decimal
304- else
305- decimal
306- end
307- end
308- end
309- end
310- end
311-
312- ##
313- # Finds the rectangular bounds of all sample locations, with a minimum range
314- # of latitudes and longitudes of +min+ after expanding both by a factor of
315- # +pad+. Since +pad+ is a multiplicative factor, no padding is added if only
316- # one location is found (but the +min+ is still applied). It returns the
317- # bounds as an Array in the [south, west, north, east] order
318- def source_sample_area ( min = 0.1 , pad = 0.5 )
319- loc = source_sample_locations . compact
320- return unless loc . present?
321-
322- rng = {
323- lat : loc . map { |i | i [ 0 ] } . minmax ,
324- lon : loc . map { |i | i [ 1 ] } . minmax
325- }
326-
327- rng . each do |k , v |
328- width = v . inject ( :- ) . abs
329- v [ 0 ] -= width * pad / 2
330- v [ 1 ] += width * pad / 2
331- width = v . inject ( :- ) . abs
332- if width < min
333- pad_extra = ( min - width ) / 2
334- rng [ k ] [ 0 ] -= pad_extra
335- rng [ k ] [ 1 ] += pad_extra
336- end
337- end
338-
339- [ rng [ :lat ] [ 0 ] , rng [ :lon ] [ 0 ] , rng [ :lat ] [ 1 ] , rng [ :lon ] [ 1 ] ]
340- end
341-
342- ##
343- # TODO
344- # Use source_cannonical_samples instead!
345- def source_attributes
346- return unless source_hash
347- return @source_attributes if @source_attributes
348-
349- not_provided = [
350- 'not provided' , 'not collected' , 'unavailable' , 'not applicable' ,
351- 'missing' , '-' , 'n/a' , 'null'
352- ]
353- @source_attributes = { }
354- source_hash [ :samples ] . each_value do |sample |
355- sample [ :attributes ] . each do |key , value |
356- value . strip!
357- nice_key = key . to_s . downcase . gsub ( /[^A-Za-z0-9]/ , '_' )
358- . gsub ( /_+/ , '_' ) . gsub ( /^_|_$/ , '' ) . to_sym
359- if value . present? && !not_provided . include? ( value . downcase )
360- @source_attributes [ nice_key ] ||= [ ]
361- @source_attributes [ nice_key ] << value
362- end
363- end if sample [ :attributes ] . present?
364- end
365- @source_attributes . each_value ( &:uniq! )
366- @source_attributes
367- end
368-
369- def source_cannonical_samples
370- not_provided = [
371- 'not provided' , 'unavailable' , 'missing' , 'not applicable' ,
372- '-' , 'n/a' , 'null'
373- ]
374- @_source_cannonical_samples ||=
375- source_hash [ :samples ] . each_value . map do |sample |
376- Hash [
377- sample [ :attributes ] . map do |key , value |
378- value . strip!
379- nice_key = key . to_s . downcase . gsub ( /[^A-Za-z0-9]/ , '_' )
380- . gsub ( /_+/ , '_' ) . gsub ( /^_|_$/ , '' ) . to_sym
381- if value . present? && !not_provided . include? ( value . downcase )
382- [ nice_key , value ]
383- end
384- end . compact
385- ]
386- end
387- end
388-
389194 ##
390195 # Returns registered BioSample accessions, directly from the database
391196 # if the source database is +:biosample+, or through the external links
0 commit comments