Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions cancermuts/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -1846,12 +1846,12 @@ def __init__(self, database_dir, database_files=None):
self._ptm_types = ['acetylation', 'methylation', 'O-GalNAc', 'O-GlcNAc', 'phosphorylation', 'sumoylation', 'ubiquitination']
self._ptm_types_to_classes = { 'acetylation' : 'ptm_acetylation',
'methylation' : 'ptm_methylation',
'O-GalNAc' : 'ptm_ogalnac',
'O-GlcNAc' : 'ptm_oglcnac',
'O-GalNAc' : 'ptm_glycosylation',
'O-GlcNAc' : 'ptm_glycosylation',
'phosphorylation' : 'ptm_phosphorylation',
'sumoylation' : 'ptm_sumoylation',
'ubiquitination' : 'ptm_ubiquitination' }
self._ptm_suffixes = ['ac', 'm[0-9]', 'ga', 'gl', 'p', 'sm', 'ub']
self._ptm_suffixes = ['ac', 'm[0-9]', 'gly', 'gly', 'p', 'sm', 'ub']
self._ptm_suffix_offsets = [-3, -3, -3, -3, -2, -3, -3]

self._database_dir = database_dir
Expand Down Expand Up @@ -1931,6 +1931,30 @@ def add_position_properties(self, sequence, properties=None):
)
position.add_property(property_obj)
self.log.info("adding %s to site %s" % (m, property_obj.name))

is_gly = False
has_gly_site = False
has_gly_sub = False
for prop in position.properties.values():
if prop.category == 'ptm_glycosylation':
is_gly = True
break

has_gly_site = any(prop.category == 'ptm_glycosylation' for prop in position.properties.values())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might be the source of your issue

here you use lines 1935-1941 to check which one of your properties is a Gly (ok)

however you don't handle the case in which there are no Glys. This part of the code use just prop which is whatever property comes last out of the previous for loop, whether it is a glycosilation or not (i.e. whether the break was triggered or not)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mind that this section of the code is bound to be changed - read all my comments before starting to change things :)

if not has_gly_site:
gly_site = position_properties_classes['ptm_glycosylation'](
position=position,
sources=[self])
position.add_property(gly_site)
Comment on lines +1935 to +1948
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code assumes we will have only one glycosilation site property per position

this might be the case - do we need to also consider cases there might be different glycosilation subtypes for the same site? if e.g. our different sources have different or complementary subtypes


has_gly_subtype = any(prop.header == 'glycosylation_subtype' for prop in position.properties.values())
if not has_gly_subtype:
gly_sub = position_properties_classes['glycosylation_subtype'](
position=position,
sources=[self],
subtype=""
)
position.add_property(gly_sub)

class MyVariant(DynamicSource, object):
@logger_init
Expand Down
48 changes: 27 additions & 21 deletions cancermuts/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,47 +173,53 @@ def __init__(self, position, sources):

def get_value_str(self):
return self.code

class OGalNAcSite(PositionProperty):
description = "OGalNAc site"
header = "ogalnac_site"
category='ptm_ogalnac'
code = "O-GalNAc"
class GlycosylationSite(PositionProperty):
description = "Glycosylation site"
header = "glycosylation_site"
category='ptm_glycosylation'
code = "Gly"

def __init__(self, position, sources):
super(OGalNAcSite, self).__init__( name="o-GalNAc Site",
super(GlycosylationSite, self).__init__( name="Glycosylation Site",
position=position,
sources=sources,
values={},
metadata={} )

def get_value_str(self):
return self.code

class GlycosylationSubtype(PositionProperty):
description = "Glycosylation Subtype"
header = "glycosylation_subtype"
category="glycosylation_subtype"
#code =
Comment on lines +193 to +197
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this should be a PositionProperty

this is not a feature of the protein position, it's a feature of the glycosilation itself

I think the constructor of GlycosilationSite should accept a subtype argument which would be a list of subtypes that can be at that site (as strings), so that they can be provided when creating the object and can be tracked as GlycosilationSite.subtypes


class OGlcNAcSite(PositionProperty):
description = "OGlcNAc site"
header = "oglcnac_site"
category='ptm_oglcnac'
code = "O-GlcNAc"

subtype = ""

def __init__(self, position, sources):
super(OGlcNAcSite, self).__init__( name="o-GlcNAc Site",
def __init__(self, position, sources, subtype):
super(GlycosylationSubtype, self).__init__( name="GlycosylationSubtype",
position=position,
sources=sources,
values={},
values={"subtype": subtype},
metadata={} )

def get_value_str(self):
return self.code

return self.values.get("subtype", "")
#return self.values.get("subtype")
#val = self.values.get("subtype")
#if val is None:
#return ""
#else:
#return str(val)

class SumoylationSite(PositionProperty):
description = "Sumoylation site"
header = "sumoyylation_site"
category='ptm_sumoylation'
code = "Sumo"


def __init__(self, position, sources):
super(SumoylationSite, self).__init__( name="Sumoylation Site",
position=position,
Expand Down Expand Up @@ -306,8 +312,8 @@ def __init__(self, position, sources):
'ptm_methylation' : MethylationSite,
'ptm_acetylation' : AcetylationSite,
'ptm_nitrosylation' : SNitrosylationSite,
'ptm_ogalnac' : OGalNAcSite,
'ptm_oglcnac' : OGlcNAcSite,
'ptm_glycosylation' : GlycosylationSite,
'glycosylation_subtype' : GlycosylationSubtype,
'ptm_sumoylation' : SumoylationSite,
'ptm_ubiquitination' : UbiquitinationSite,
'ptm_cleavage' : CleavageSite,
Expand Down
6 changes: 3 additions & 3 deletions cancermuts/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ class Table:
ptm_colors = defaultdict(lambda: 'black',
{ 'ptm_acetylation' : 'grey',
'ptm_methylation' : 'darkgreen',
'ptm_ogalnac' : 'orange',
'ptm_oglcnac' : 'darkorange',
'ptm_glycosylation' : 'orange',
'ptm_gly_subtype' : 'darkorange',
'ptm_phosphorylation' : 'red',
'ptm_ubiquitination' : 'blue',
'ptm_sumoylation' : 'lightblue',
Expand Down Expand Up @@ -181,7 +181,7 @@ def __init__(self, labels=None, ptm_colors=None, y_ptm=1.02):
def to_dataframe(self, sequence, mutation_metadata=["cancer_study", "cancer_type", "genomic_coordinates", "genomic_mutations", "revel_score", "cancer_site", "cancer_histology",'gnomad_exome_allele_frequency', 'gnomad_genome_allele_frequency',
'gnomad_popmax_exome_allele_frequency', 'gnomad_popmax_genome_allele_frequency', 'clinvar_variant_id', 'clinvar_germline_classification', 'clinvar_germline_condition', 'clinvar_germline_review_status',
'clinvar_oncogenicity_classification', 'clinvar_oncogenicity_condition', 'clinvar_oncogenicity_review_status', 'clinvar_clinical_impact_classification', 'clinvar_clinical_impact_condition', 'clinvar_clinical_impact_review_status'],
position_properties=['ptm_phosphorylation','ptm_methylation','ptm_ubiquitination','ptm_cleavage', 'ptm_nitrosylation','ptm_acetylation', 'ptm_sumoylation', 'ptm_ogalnac', 'ptm_oglcnac', 'mobidb_disorder_propensity'],
position_properties=['ptm_phosphorylation','ptm_methylation','ptm_ubiquitination','ptm_cleavage', 'ptm_nitrosylation','ptm_acetylation', 'ptm_sumoylation', 'ptm_glycosylation', 'glycosylation_subtype', 'mobidb_disorder_propensity'],
sequence_properties=['linear_motif', 'structure']):

rows = []
Expand Down
Loading