From dbb0069db83f4bd6eed824dd7db8c78b367af196 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 4 Apr 2025 18:22:37 +0300 Subject: [PATCH 1/2] Lint with Ruff via pre-commit on GitHub Actions --- .github/workflows/lint.yml | 22 ++++++++++++++++++++++ .pre-commit-config.yaml | 6 ++++++ .ruff.toml | 9 +++++++++ 3 files changed, 37 insertions(+) create mode 100644 .github/workflows/lint.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .ruff.toml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e25d314c --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,22 @@ +name: Lint + +on: [push, pull_request, workflow_dispatch] + +permissions: {} + +env: + FORCE_COLOR: 1 + RUFF_OUTPUT_FORMAT: github + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - uses: tox-dev/action-pre-commit-uv@v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..ae05443e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.3 + hooks: + - id: ruff + args: [--exit-non-zero-on-fix] diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 00000000..af15c969 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,9 @@ +fix = true +extend-exclude = [ + "*.cgi", # Python 2 code +] +target-version = "py310" + +lint.select = [ + "W605", # invalid-escape-sequence +] From 2e70417236aacd93332a4c09ba0b6edaad2c70db Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 4 Apr 2025 18:23:38 +0300 Subject: [PATCH 2/2] Fix invalid escape sequences --- docs-xml/build-html-docs.py | 6 +-- docs-xml/mkmsgs.py | 2 +- src/feedvalidator/__init__.py | 4 +- src/feedvalidator/base.py | 2 +- src/feedvalidator/extension.py | 10 ++--- src/feedvalidator/image.py | 2 +- src/feedvalidator/kml.py | 4 +- src/feedvalidator/media.py | 10 ++--- src/feedvalidator/opensearch.py | 4 +- src/feedvalidator/opml.py | 2 +- src/feedvalidator/uri.py | 4 +- src/feedvalidator/validators.py | 68 ++++++++++++++++----------------- src/validtest.py | 4 +- 13 files changed, 61 insertions(+), 61 deletions(-) diff --git a/docs-xml/build-html-docs.py b/docs-xml/build-html-docs.py index daee6220..fe537211 100755 --- a/docs-xml/build-html-docs.py +++ b/docs-xml/build-html-docs.py @@ -32,7 +32,7 @@ def asText(x): import re -wsRE = re.compile('\s+') +wsRE = re.compile(r'\s+') def trimWS(s): s = wsRE.sub(' ', s) @@ -43,7 +43,7 @@ def trimWS(s): return s -secRe = re.compile("
\n(.*?\n)
\n", re.DOTALL) +secRe = re.compile("
\n(.*?\n)
\n", re.DOTALL) import codecs @@ -67,7 +67,7 @@ def writeDoc(x, h): for (sec, txt) in secRe.findall(t): - r = re.compile('

' + sec + '

\s*
\s*()
', re.IGNORECASE) + r = re.compile('

' + sec + r'

\s*
\s*()
', re.IGNORECASE) idx = r.search(doc).start(1) doc = doc[:idx] + txt + doc[idx:] diff --git a/docs-xml/mkmsgs.py b/docs-xml/mkmsgs.py index a8383d67..39c30ecd 100644 --- a/docs-xml/mkmsgs.py +++ b/docs-xml/mkmsgs.py @@ -94,7 +94,7 @@ def buildTestSuite(): if __name__ == '__main__': import re for dir, id, msg, html, xml in missing(): - msg = re.sub("%\(\w+\)\w?", "foo", msg) + msg = re.sub(r"%\(\w+\)\w?", "foo", msg) if not path.exists(html): pass if not path.exists(xml): diff --git a/src/feedvalidator/__init__.py b/src/feedvalidator/__init__.py index 8ca3ba32..ecdc6f98 100644 --- a/src/feedvalidator/__init__.py +++ b/src/feedvalidator/__init__.py @@ -47,7 +47,7 @@ def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfUR from .base import SAXDispatcher from io import StringIO - if re.match("^\s+<\?xml",aString) and re.search("",aString): + if re.match(r"^\s+<\?xml",aString) and re.search("",aString): lt = aString.find('<'); gt = aString.find('>') if lt > 0 and gt > 0 and lt < gt: loggedEvents.append(logging.WPBlankLine({'line':1,'column':1})) @@ -72,7 +72,7 @@ def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfUR # experimental RSS-Profile support validator.rssCharData = [s.find('&#x')>=0 for s in aString.split('\n')] - xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString) + xmlver = re.match("^<\\?\\s*xml\\s+version\\s*=\\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString) if xmlver and xmlver.group(1) != '1.0': validator.log(logging.BadXmlVersion({"version":xmlver.group(1)})) diff --git a/src/feedvalidator/base.py b/src/feedvalidator/base.py index ca323471..66af1601 100644 --- a/src/feedvalidator/base.py +++ b/src/feedvalidator/base.py @@ -95,7 +95,7 @@ def near_miss(ns): try: - return re.match(".*\w", ns).group().lower() + return re.match(r".*\w", ns).group().lower() except: return ns diff --git a/src/feedvalidator/extension.py b/src/feedvalidator/extension.py index 6c5f6a4b..4596ab9f 100644 --- a/src/feedvalidator/extension.py +++ b/src/feedvalidator/extension.py @@ -763,7 +763,7 @@ def validate(self): class gml_pos(text): def validate(self): - if not re.match('^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value): + if not re.match(r'^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value): return self.log(InvalidCoord({'value':self.value})) if self.value.find(',')>=0: self.log(CoordComma({'value':self.value})) @@ -785,7 +785,7 @@ def validate(self): if len(values)<3 or len(values)%2 == 1: return self.log(InvalidCoordList({'value':self.value})) for value in values: - if not re.match('^[-+]?\d+\.?\d*$', value): + if not re.match(r'^[-+]?\d+\.?\d*$', value): return self.log(InvalidCoordList({'value':value})) class gml_polygon(geo_srsName): @@ -1157,21 +1157,21 @@ class g_serviceTypeEnumeration(enumeration): class g_float(text): def validate(self): import re - if not re.match('\d+\.?\d*\s*\w*', self.value): + if not re.match(r'\d+\.?\d*\s*\w*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class floatUnit(text): def validate(self): import re - if not re.match('\d+\.?\d*\s*\w*$', self.value): + if not re.match(r'\d+\.?\d*\s*\w*$', self.value): self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class decimal(text): def validate(self): import re - if not re.match('[-+]?\d+\.?\d*\s*$', self.value): + if not re.match(r'[-+]?\d+\.?\d*\s*$', self.value): self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) diff --git a/src/feedvalidator/image.py b/src/feedvalidator/image.py index 01a6920f..1bc6480d 100644 --- a/src/feedvalidator/image.py +++ b/src/feedvalidator/image.py @@ -70,7 +70,7 @@ def validate(self): rfc2396_full.validate(self) import re ext = self.value.split('.')[-1].lower() - if re.match("^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']: + if re.match(r"^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']: self.log(ImageUrlFormat({"parent":self.parent.name, "element":self.name})) class title(nonhtml): diff --git a/src/feedvalidator/kml.py b/src/feedvalidator/kml.py index f46b7192..d47a423f 100644 --- a/src/feedvalidator/kml.py +++ b/src/feedvalidator/kml.py @@ -1063,7 +1063,7 @@ def validate(self): self.log(InvalidKmlLatitude({"parent":self.parent.name, "element":self.name, "value":lat})) # Third coordinate value (altitude) has to be float, if present if len(point) == 3: - if not re.match('\d+\.?\d*$', point[2]): + if not re.match(r'\d+\.?\d*$', point[2]): self.log(InvalidFloat({"attr":self.name, "value":point[2]})) class angle360(text): @@ -1079,5 +1079,5 @@ def validate(self): class FloatWithNegative(text): def validate(self, name=None): - if not re.match('-?\d+\.?\d*$', self.value): + if not re.match(r'-?\d+\.?\d*$', self.value): self.log(InvalidFloat({"attr":name or self.name, "value":self.value})) diff --git a/src/feedvalidator/media.py b/src/feedvalidator/media.py index ccf9e62f..6d05ee56 100644 --- a/src/feedvalidator/media.py +++ b/src/feedvalidator/media.py @@ -247,7 +247,7 @@ def validate(self): nonhtml.validate(self, ContainsUndeclaredHTML) class media_thumbnail(positiveInteger,rfc2396_full): - npt_re = re.compile("^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$") + npt_re = re.compile(r"^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$") def getExpectedAttrNames(self): return [(None,'height'),(None,'time'),(None,'url'),(None, 'width')] def validate(self): @@ -295,7 +295,7 @@ def getExpectedAttrNames(self): ] def validate(self): self.value = self.attrs.get((None,'bitrate')) - if self.value and not re.match('\d+\.?\d*', self.value): + if self.value and not re.match(r'\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'bitrate', "value":self.value})) @@ -304,7 +304,7 @@ def validate(self): if self.value: nonNegativeInteger.validate(self) self.value = self.attrs.get((None,'duration')) - if self.value and not re.match('\d+\.?\d*', self.value): + if self.value and not re.match(r'\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'duration', "value":self.value})) @@ -317,7 +317,7 @@ def validate(self): if self.value: positiveInteger.validate(self) self.value = self.attrs.get((None,'framerate')) - if self.value and not re.match('\d+\.?\d*', self.value): + if self.value and not re.match(r'\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'framerate', "value":self.value})) @@ -336,7 +336,7 @@ def validate(self): self.log(InvalidMediaMedium({"parent":self.parent.name, "element":self.name, "value": self.value})) self.value = self.attrs.get((None,'samplingrate')) - if self.value and not re.match('\d+\.?\d*', self.value): + if self.value and not re.match(r'\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'samplingrate', "value":self.value})) diff --git a/src/feedvalidator/opensearch.py b/src/feedvalidator/opensearch.py index bd3aa204..4da9f007 100644 --- a/src/feedvalidator/opensearch.py +++ b/src/feedvalidator/opensearch.py @@ -63,7 +63,7 @@ def prevalidate(self): # self.validate_optional_attribute((None,'rel'), text) class Template(rfc2396_full): - tparam = re.compile("{((?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})+:?(?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})*)\??}") + tparam = re.compile(r"{((?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})+:?(?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})*)\??}") valuelist = ['searchTerms', 'count', 'startIndex', 'startPage', 'language', 'inputEncoding', 'outputEncoding'] @@ -127,7 +127,7 @@ def validate(self): from urllib.parse import quote, unquote import re for value in self.value.split(): - value = re.sub('%\w\w', lambda x: x.group(0).upper(), value) + value = re.sub(r'%\w\w', lambda x: x.group(0).upper(), value) if value != quote(unquote(value)): self.log(NotURLEncoded({})) break diff --git a/src/feedvalidator/opml.py b/src/feedvalidator/opml.py index 6b7dcc0b..05656a22 100644 --- a/src/feedvalidator/opml.py +++ b/src/feedvalidator/opml.py @@ -76,7 +76,7 @@ def do_windowRight(self): return positiveInteger(), nonblank(), noduplicates() class commaSeparatedLines(text): - linenumbers_re=re.compile('^(\d+(,\s*\d+)*)?$') + linenumbers_re=re.compile(r'^(\d+(,\s*\d+)*)?$') def validate(self): if not self.linenumbers_re.match(self.value): self.log(InvalidExpansionState({"parent":self.parent.name, "element":self.name, "value":self.value})) diff --git a/src/feedvalidator/uri.py b/src/feedvalidator/uri.py index 57906063..4c42a38e 100644 --- a/src/feedvalidator/uri.py +++ b/src/feedvalidator/uri.py @@ -80,7 +80,7 @@ def _qnu(s,safe=''): return res # Match an optional port specification -portRe = re.compile(':(\d*)$') +portRe = re.compile(r':(\d*)$') def _normPort(netloc,defPort): nl = netloc.lower() @@ -146,7 +146,7 @@ def _normPath(p): return '/'.join([_qnu(c, PCHAR) for c in l]) # From RFC 2396bis, with added end-of-string marker -uriRe = re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$') +uriRe = re.compile(r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$') def _canonical(s): m = uriRe.match(s) diff --git a/src/feedvalidator/validators.py b/src/feedvalidator/validators.py index 49d790c2..60aad2f2 100644 --- a/src/feedvalidator/validators.py +++ b/src/feedvalidator/validators.py @@ -52,7 +52,7 @@ def implausible_8601(value): # # Valid mime type # -mime_re = re.compile('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$') +mime_re = re.compile('[^\\s()<>,;:\\"/[\\]?=]+/[^\\s()<>,;:\\"/[\\]?=]+(\\s*;\\s*[^\\s()<>,;:\\"/[\\]?=]+=("(\\"|[^"])*"|[^\\s()<>,;:\\"/[\\]?=]+))*$') # # Extensibility hook: logic varies based on type of feed @@ -161,8 +161,8 @@ class HTMLValidator: 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', 'transparent', 'underline', 'white', 'yellow'] - valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + - '\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') + valid_css_values = re.compile(r'^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + + r'\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', 'merror', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', @@ -242,7 +242,7 @@ def __init__(self,value,element): element = self.element offset = [element.line - element.dispatcher.locator.getLineNumber(), - element.dispatcher.locator.getColumnNumber()] - match = re.search(', at line (\d+), column (\d+)',str(msg)) + match = re.search(r', at line (\d+), column (\d+)',str(msg)) if match: offset[0] += int(match.group(1))-1 element.log(NotHtml({"parent":element.parent.name, "element":element.name, "message":"Invalid HTML", "value": str(msg)}),offset) @@ -265,13 +265,13 @@ def handle_tag(self, tag, attributes, text): # Scub CSS properties for potentially evil intent # def checkStyle(style): - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return [style] - if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style): + if not re.match(r"^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style): return [style] unsafe = [] - for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style.lower()): + for prop,value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)",style.lower()): if prop not in HTMLValidator.acceptable_css_properties: if prop not in unsafe: unsafe.append(prop) elif prop.split('-')[0] in ['background','border','margin','padding']: @@ -358,9 +358,9 @@ def prevalidate(self): # valid e-mail addr-spec # class addr_spec(text): - domain_re = '''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))([A-Z0-9][-A-Z0-9]*)''' - email_re = re.compile("([A-Z0-9_\-\+\.\']+)@" + domain_re + "$", re.I) - simple_email_re = re.compile('^[\w._%+-]+@[A-Za-z][\w.-]+$') + domain_re = r'''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))([A-Z0-9][-A-Z0-9]*)''' + email_re = re.compile("([A-Z0-9_\\-\\+\\.\']+)@" + domain_re + "$", re.I) + simple_email_re = re.compile(r'^[\w._%+-]+@[A-Za-z][\w.-]+$') message = InvalidAddrSpec def validate(self, value=None): if not value: value=self.value @@ -426,8 +426,8 @@ def validate(self): # iso8601 dateTime # class unbounded_iso8601(text): - iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + - "(Z|([+-]\d\d:\d\d))?)?)?)?$") + iso8601_re = re.compile(r"^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + + r"(Z|([+-]\d\d:\d\d))?)?)?)?$") message = InvalidISO8601DateTime def validate(self): @@ -474,8 +474,8 @@ def validate(self): class w3cdtf(iso8601): # The same as in iso8601, except a timezone is not optional when # a time is present - iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + - "(Z|([+-]\d\d:\d\d)))?)?)?$") + iso8601_re = re.compile(r"^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + + r"(Z|([+-]\d\d:\d\d)))?)?)?$") message = InvalidW3CDTFDate class unbounded_w3cdtf(w3cdtf): @@ -484,12 +484,12 @@ class unbounded_w3cdtf(w3cdtf): class rfc3339(iso8601): # The same as in iso8601, except that the only thing that is optional # is the seconds - iso8601_re = re.compile("^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" + - "(Z|([+-]\d\d:\d\d))$") + iso8601_re = re.compile(r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" + + r"(Z|([+-]\d\d:\d\d))$") message = InvalidRFC3339Date class iso8601_date(iso8601): - date_re = re.compile("^\d\d\d\d-\d\d-\d\d$") + date_re = re.compile(r"^\d\d\d\d-\d\d-\d\d$") def validate(self): if iso8601.validate(self): if not self.date_re.search(self.value): @@ -614,14 +614,14 @@ def validate(self, errorClass=InvalidIRI, successClass=ValidURI, extraParams={}) # rfc822 dateTime (+Y2K extension) # class rfc822(text): - rfc822_re = re.compile("(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" + - "\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" + - "(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" + + rfc822_re = re.compile(r"(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" + + r"\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" + + r"(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" + "(ut)|(gmt)|(est)|(edt)|(cst)|(cdt)|(mst)|(mdt)|(pst)|(pdt)|[a-ik-z])?$", re.UNICODE) rfc2822_re = re.compile("(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), )?" + - "\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" + - "(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" + + r"\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" + + r"(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" + "(UT)|(GMT)|(EST)|(EDT)|(CST)|(CDT)|(MST)|(MDT)|(PST)|(PDT)|Z)$") def validate(self): if self.rfc2822_re.match(self.value): @@ -646,7 +646,7 @@ def validate(self): else: value1,value2 = '', self.value value2 = re.sub(r'[\\](.)','',value2) - while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2) + while value1!=value2: value1,value2=value2,re.sub(r'\([^(]*?\)',' ',value2) if not self.rfc822_re.match(value2.strip().lower()): self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) else: @@ -657,7 +657,7 @@ def validate(self): # from html.entities import name2codepoint def decodehtml(data): - chunks=re.split('&#?(\w+);',data) + chunks=re.split(r'&#?(\w+);',data) for i in range(1,len(chunks),2): if chunks[i].isdigit(): @@ -675,9 +675,9 @@ def decodehtml(data): # Scan HTML for relative URLs # class absUrlMixin: - anchor_re = re.compile(']', re.IGNORECASE) - img_re = re.compile(']*src=(?:"(.*?)"|\'(.*?)\'|([\w-]+))[\s>]', re.IGNORECASE) - absref_re = re.compile("\w+:") + anchor_re = re.compile(']', re.IGNORECASE) + img_re = re.compile(']*src=(?:"(.*?)"|\'(.*?)\'|([\\w-]+))[\\s>]', re.IGNORECASE) + absref_re = re.compile(r"\w+:") def validateAbsUrl(self,value): refs = self.img_re.findall(self.value) + self.anchor_re.findall(self.value) for ref in [reduce(lambda a,b: a or b, x) for x in refs]: @@ -717,8 +717,8 @@ def validate(self): # Elements for which html is discouraged, also checks for relative URLs # class nonhtml(text,safeHtmlMixin):#,absUrlMixin): - htmlEndTag_re = re.compile("") - htmlEntity_re = re.compile("&(#?\w+)") + htmlEndTag_re = re.compile(r"") + htmlEntity_re = re.compile(r"&(#?\w+)") def start(self): nonhtml.startline = self.__dict__['startline'] = self.line def prevalidate(self): @@ -826,12 +826,12 @@ def validate(self): class Float(text): def validate(self, name=None): - if not re.match('\d+\.?\d*$', self.value): + if not re.match(r'\d+\.?\d*$', self.value): self.log(InvalidFloat({"attr":name or self.name, "value":self.value})) class alphanumeric(text): def validate(self): - if not re.match('^\s*[A-Za-z0-9]+\s*$', self.value): + if not re.match(r'^\s*[A-Za-z0-9]+\s*$', self.value): self.log(InvalidAlphanum({"attr":self.name, "value":self.value})) class percentType(text): @@ -868,7 +868,7 @@ def validate(self): self.log(InvalidLongitude({"parent":self.parent.name, "element":self.name, "value":self.value})) class httpURL(text): - http_re = re.compile("(http|https)://" + addr_spec.domain_re + '(?::\d+)?' + '(/|$)', re.IGNORECASE) + http_re = re.compile("(http|https)://" + addr_spec.domain_re + r'(?::\d+)?' + '(/|$)', re.IGNORECASE) def validate(self): if not self.http_re.match(self.value): self.log(InvalidURLAttribute({"parent":self.parent.name, "element":self.name, "value":self.value})) @@ -970,7 +970,7 @@ def validate(self): self.log(InvalidTrueFalse({"parent":self.parent.name, "element":self.name,"value":self.value})) class duration(text): - duration_re = re.compile("\d+(:[0-5][0-9](:[0-5][0-9])?)?$") + duration_re = re.compile(r"\d+(:[0-5][0-9](:[0-5][0-9])?)?$") def validate(self): if not self.duration_re.match(self.value): self.log(InvalidDuration({"parent":self.parent.name, "element":self.name @@ -993,7 +993,7 @@ def validate(self): class commaSeparatedIntegers(text): def validate(self): - if not re.match("^\d+(,\s*\d+)*$", self.value): + if not re.match(r"^\d+(,\s*\d+)*$", self.value): self.log(InvalidCommaSeparatedIntegers({"parent":self.parent.name, "element":self.name})) diff --git a/src/validtest.py b/src/validtest.py index 7418e38d..9ed7d3ec 100755 --- a/src/validtest.py +++ b/src/validtest.py @@ -57,9 +57,9 @@ def failIfContainsInstanceOf(self, theClass, params, theList, msg=None): raise self.failureException("unexpected %s.%s with a value of %s" % \ (theClass.__name__, k, v)) -desc_re = re.compile("") +desc_re = re.compile(r"") -validome_re = re.compile("", re.S) +validome_re = re.compile(r"", re.S) def getDescription(xmlfile): """Extract description and exception from XML file