Skip to content

Lots of syntax warnings with Python 3.12+ #147

@jonhoo

Description

@jonhoo

Describe the bug
With newer Python versions, the first time feedvalidator is run, it produces a large number of warnings:

/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/__init__.py:50: SyntaxWarning: invalid escape sequence '\s'
  if re.match("^\s+<\?xml",aString) and re.search("<generator.*wordpress.*</generator>",aString):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/__init__.py:75: SyntaxWarning: invalid escape sequence '\?'
  xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/base.py:98: SyntaxWarning: invalid escape sequence '\w'
  return re.match(".*\w", ns).group().lower()
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:55: SyntaxWarning: invalid escape sequence '\s'
  mime_re = re.compile('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:164: SyntaxWarning: invalid escape sequence '\('
  valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:165: SyntaxWarning: invalid escape sequence '\d'
  '\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$')
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:245: SyntaxWarning: invalid escape sequence '\d'
  match = re.search(', at line (\d+), column (\d+)',str(msg))
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:268: SyntaxWarning: invalid escape sequence '\s'
  if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:270: SyntaxWarning: invalid escape sequence '\s'
  if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:274: SyntaxWarning: invalid escape sequence '\w'
  for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style.lower()):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:361: SyntaxWarning: invalid escape sequence '\.'
  domain_re = '''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))([A-Z0-9][-A-Z0-9]*)'''
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:362: SyntaxWarning: invalid escape sequence '\-'
  email_re = re.compile("([A-Z0-9_\-\+\.\']+)@" + domain_re + "$", re.I)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:363: SyntaxWarning: invalid escape sequence '\w'
  simple_email_re = re.compile('^[\w._%+-]+@[A-Za-z][\w.-]+$')
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:429: SyntaxWarning: invalid escape sequence '\d'
  iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:430: SyntaxWarning: invalid escape sequence '\d'
  "(Z|([+-]\d\d:\d\d))?)?)?)?$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:477: SyntaxWarning: invalid escape sequence '\d'
  iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:478: SyntaxWarning: invalid escape sequence '\d'
  "(Z|([+-]\d\d:\d\d)))?)?)?$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:487: SyntaxWarning: invalid escape sequence '\d'
  iso8601_re = re.compile("^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:488: SyntaxWarning: invalid escape sequence '\d'
  "(Z|([+-]\d\d:\d\d))$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:492: SyntaxWarning: invalid escape sequence '\d'
  date_re = re.compile("^\d\d\d\d-\d\d-\d\d$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:617: SyntaxWarning: invalid escape sequence '\s'
  rfc822_re = re.compile("(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:618: SyntaxWarning: invalid escape sequence '\d'
  "\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:619: SyntaxWarning: invalid escape sequence '\s'
  "(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:623: SyntaxWarning: invalid escape sequence '\d'
  "\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:624: SyntaxWarning: invalid escape sequence '\d'
  "(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" +
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:649: SyntaxWarning: invalid escape sequence '\('
  while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:660: SyntaxWarning: invalid escape sequence '\w'
  chunks=re.split('&#?(\w+);',data)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:678: SyntaxWarning: invalid escape sequence '\s'
  anchor_re = re.compile('<a\s+href=(?:"(.*?)"|\'(.*?)\'|([\w-]+))[\s>]', re.IGNORECASE)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:679: SyntaxWarning: invalid escape sequence '\s'
  img_re = re.compile('<img\s+[^>]*src=(?:"(.*?)"|\'(.*?)\'|([\w-]+))[\s>]', re.IGNORECASE)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:680: SyntaxWarning: invalid escape sequence '\w'
  absref_re = re.compile("\w+:")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:720: SyntaxWarning: invalid escape sequence '\w'
  htmlEndTag_re = re.compile("</(\w+)>")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:721: SyntaxWarning: invalid escape sequence '\w'
  htmlEntity_re = re.compile("&(#?\w+)")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:829: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('\d+\.?\d*$', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:834: SyntaxWarning: invalid escape sequence '\s'
  if not re.match('^\s*[A-Za-z0-9]+\s*$', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:871: SyntaxWarning: invalid escape sequence '\d'
  http_re = re.compile("(http|https)://" + addr_spec.domain_re + '(?::\d+)?' + '(/|$)', re.IGNORECASE)
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:973: SyntaxWarning: invalid escape sequence '\d'
  duration_re = re.compile("\d+(:[0-5][0-9](:[0-5][0-9])?)?$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/validators.py:996: SyntaxWarning: invalid escape sequence '\d'
  if not re.match("^\d+(,\s*\d+)*$", self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/uri.py:83: SyntaxWarning: invalid escape sequence '\d'
  portRe = re.compile(':(\d*)$')
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/uri.py:149: SyntaxWarning: invalid escape sequence '\?'
  uriRe = re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$')
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/extension.py:766: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/extension.py:788: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('^[-+]?\d+\.?\d*$', value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/extension.py:1160: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('\d+\.?\d*\s*\w*', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/extension.py:1167: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('\d+\.?\d*\s*\w*$', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/extension.py:1174: SyntaxWarning: invalid escape sequence '\d'
  if not re.match('[-+]?\d+\.?\d*\s*$', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/media.py:250: SyntaxWarning: invalid escape sequence '\d'
  npt_re = re.compile("^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$")
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/media.py:298: SyntaxWarning: invalid escape sequence '\d'
  if self.value and not re.match('\d+\.?\d*', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/media.py:307: SyntaxWarning: invalid escape sequence '\d'
  if self.value and not re.match('\d+\.?\d*', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/media.py:320: SyntaxWarning: invalid escape sequence '\d'
  if self.value and not re.match('\d+\.?\d*', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/media.py:339: SyntaxWarning: invalid escape sequence '\d'
  if self.value and not re.match('\d+\.?\d*', self.value):
/home/runner/work/pod-misc/pod-misc/feedvalidator/src/feedvalidator/image.py:73: SyntaxWarning: invalid escape sequence '\w'
  if re.match("^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']:

To Reproduce

  • git clone https://github.com/w3c/feedvalidator.git
  • python src/demo.py "https://rustacean-station.org/podcast.rss"

Interestingly, if the command is then re-run, it produces no warnings or errors.
However, if one runs git clean -d -f -x and then re-runs, the errors are printed again.

Expected behavior
No warnings are produced

Additional context
This changed from a deprecation warning to a syntax warning in Python 3.12 (second bullet). These will become syntax errors in a future Python version. The solution here is to make these be raw strings (i.e., r"\d").

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions