From dbb0069db83f4bd6eed824dd7db8c78b367af196 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Date: Fri, 4 Apr 2025 18:22:37 +0300
Subject: [PATCH 1/2] Lint with Ruff via pre-commit on GitHub Actions
---
.github/workflows/lint.yml | 22 ++++++++++++++++++++++
.pre-commit-config.yaml | 6 ++++++
.ruff.toml | 9 +++++++++
3 files changed, 37 insertions(+)
create mode 100644 .github/workflows/lint.yml
create mode 100644 .pre-commit-config.yaml
create mode 100644 .ruff.toml
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..e25d314c
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,22 @@
+name: Lint
+
+on: [push, pull_request, workflow_dispatch]
+
+permissions: {}
+
+env:
+ FORCE_COLOR: 1
+ RUFF_OUTPUT_FORMAT: github
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ persist-credentials: false
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.x"
+ - uses: tox-dev/action-pre-commit-uv@v1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..ae05443e
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.11.3
+ hooks:
+ - id: ruff
+ args: [--exit-non-zero-on-fix]
diff --git a/.ruff.toml b/.ruff.toml
new file mode 100644
index 00000000..af15c969
--- /dev/null
+++ b/.ruff.toml
@@ -0,0 +1,9 @@
+fix = true
+extend-exclude = [
+ "*.cgi", # Python 2 code
+]
+target-version = "py310"
+
+lint.select = [
+ "W605", # invalid-escape-sequence
+]
From 2e70417236aacd93332a4c09ba0b6edaad2c70db Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Date: Fri, 4 Apr 2025 18:23:38 +0300
Subject: [PATCH 2/2] Fix invalid escape sequences
---
docs-xml/build-html-docs.py | 6 +--
docs-xml/mkmsgs.py | 2 +-
src/feedvalidator/__init__.py | 4 +-
src/feedvalidator/base.py | 2 +-
src/feedvalidator/extension.py | 10 ++---
src/feedvalidator/image.py | 2 +-
src/feedvalidator/kml.py | 4 +-
src/feedvalidator/media.py | 10 ++---
src/feedvalidator/opensearch.py | 4 +-
src/feedvalidator/opml.py | 2 +-
src/feedvalidator/uri.py | 4 +-
src/feedvalidator/validators.py | 68 ++++++++++++++++-----------------
src/validtest.py | 4 +-
13 files changed, 61 insertions(+), 61 deletions(-)
diff --git a/docs-xml/build-html-docs.py b/docs-xml/build-html-docs.py
index daee6220..fe537211 100755
--- a/docs-xml/build-html-docs.py
+++ b/docs-xml/build-html-docs.py
@@ -32,7 +32,7 @@ def asText(x):
import re
-wsRE = re.compile('\s+')
+wsRE = re.compile(r'\s+')
def trimWS(s):
s = wsRE.sub(' ', s)
@@ -43,7 +43,7 @@ def trimWS(s):
return s
-secRe = re.compile("
\n(.*?\n)
\n", re.DOTALL)
+secRe = re.compile("\n(.*?\n)
\n", re.DOTALL)
import codecs
@@ -67,7 +67,7 @@ def writeDoc(x, h):
for (sec, txt) in secRe.findall(t):
- r = re.compile('' + sec + '
\s*\s*()
', re.IGNORECASE)
+ r = re.compile('' + sec + r'
\s*\s*()
', re.IGNORECASE)
idx = r.search(doc).start(1)
doc = doc[:idx] + txt + doc[idx:]
diff --git a/docs-xml/mkmsgs.py b/docs-xml/mkmsgs.py
index a8383d67..39c30ecd 100644
--- a/docs-xml/mkmsgs.py
+++ b/docs-xml/mkmsgs.py
@@ -94,7 +94,7 @@ def buildTestSuite():
if __name__ == '__main__':
import re
for dir, id, msg, html, xml in missing():
- msg = re.sub("%\(\w+\)\w?", "foo", msg)
+ msg = re.sub(r"%\(\w+\)\w?", "foo", msg)
if not path.exists(html):
pass
if not path.exists(xml):
diff --git a/src/feedvalidator/__init__.py b/src/feedvalidator/__init__.py
index 8ca3ba32..ecdc6f98 100644
--- a/src/feedvalidator/__init__.py
+++ b/src/feedvalidator/__init__.py
@@ -47,7 +47,7 @@ def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfUR
from .base import SAXDispatcher
from io import StringIO
- if re.match("^\s+<\?xml",aString) and re.search("",aString):
+ if re.match(r"^\s+<\?xml",aString) and re.search("",aString):
lt = aString.find('<'); gt = aString.find('>')
if lt > 0 and gt > 0 and lt < gt:
loggedEvents.append(logging.WPBlankLine({'line':1,'column':1}))
@@ -72,7 +72,7 @@ def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfUR
# experimental RSS-Profile support
validator.rssCharData = [s.find('')>=0 for s in aString.split('\n')]
- xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString)
+ xmlver = re.match("^<\\?\\s*xml\\s+version\\s*=\\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString)
if xmlver and xmlver.group(1) != '1.0':
validator.log(logging.BadXmlVersion({"version":xmlver.group(1)}))
diff --git a/src/feedvalidator/base.py b/src/feedvalidator/base.py
index ca323471..66af1601 100644
--- a/src/feedvalidator/base.py
+++ b/src/feedvalidator/base.py
@@ -95,7 +95,7 @@
def near_miss(ns):
try:
- return re.match(".*\w", ns).group().lower()
+ return re.match(r".*\w", ns).group().lower()
except:
return ns
diff --git a/src/feedvalidator/extension.py b/src/feedvalidator/extension.py
index 6c5f6a4b..4596ab9f 100644
--- a/src/feedvalidator/extension.py
+++ b/src/feedvalidator/extension.py
@@ -763,7 +763,7 @@ def validate(self):
class gml_pos(text):
def validate(self):
- if not re.match('^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value):
+ if not re.match(r'^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value):
return self.log(InvalidCoord({'value':self.value}))
if self.value.find(',')>=0:
self.log(CoordComma({'value':self.value}))
@@ -785,7 +785,7 @@ def validate(self):
if len(values)<3 or len(values)%2 == 1:
return self.log(InvalidCoordList({'value':self.value}))
for value in values:
- if not re.match('^[-+]?\d+\.?\d*$', value):
+ if not re.match(r'^[-+]?\d+\.?\d*$', value):
return self.log(InvalidCoordList({'value':value}))
class gml_polygon(geo_srsName):
@@ -1157,21 +1157,21 @@ class g_serviceTypeEnumeration(enumeration):
class g_float(text):
def validate(self):
import re
- if not re.match('\d+\.?\d*\s*\w*', self.value):
+ if not re.match(r'\d+\.?\d*\s*\w*', self.value):
self.log(InvalidFloat({"parent":self.parent.name, "element":self.name,
"attr": ':'.join(self.name.split('_',1)), "value":self.value}))
class floatUnit(text):
def validate(self):
import re
- if not re.match('\d+\.?\d*\s*\w*$', self.value):
+ if not re.match(r'\d+\.?\d*\s*\w*$', self.value):
self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name,
"attr": ':'.join(self.name.split('_',1)), "value":self.value}))
class decimal(text):
def validate(self):
import re
- if not re.match('[-+]?\d+\.?\d*\s*$', self.value):
+ if not re.match(r'[-+]?\d+\.?\d*\s*$', self.value):
self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name,
"attr": ':'.join(self.name.split('_',1)), "value":self.value}))
diff --git a/src/feedvalidator/image.py b/src/feedvalidator/image.py
index 01a6920f..1bc6480d 100644
--- a/src/feedvalidator/image.py
+++ b/src/feedvalidator/image.py
@@ -70,7 +70,7 @@ def validate(self):
rfc2396_full.validate(self)
import re
ext = self.value.split('.')[-1].lower()
- if re.match("^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']:
+ if re.match(r"^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']:
self.log(ImageUrlFormat({"parent":self.parent.name, "element":self.name}))
class title(nonhtml):
diff --git a/src/feedvalidator/kml.py b/src/feedvalidator/kml.py
index f46b7192..d47a423f 100644
--- a/src/feedvalidator/kml.py
+++ b/src/feedvalidator/kml.py
@@ -1063,7 +1063,7 @@ def validate(self):
self.log(InvalidKmlLatitude({"parent":self.parent.name, "element":self.name, "value":lat}))
# Third coordinate value (altitude) has to be float, if present
if len(point) == 3:
- if not re.match('\d+\.?\d*$', point[2]):
+ if not re.match(r'\d+\.?\d*$', point[2]):
self.log(InvalidFloat({"attr":self.name, "value":point[2]}))
class angle360(text):
@@ -1079,5 +1079,5 @@ def validate(self):
class FloatWithNegative(text):
def validate(self, name=None):
- if not re.match('-?\d+\.?\d*$', self.value):
+ if not re.match(r'-?\d+\.?\d*$', self.value):
self.log(InvalidFloat({"attr":name or self.name, "value":self.value}))
diff --git a/src/feedvalidator/media.py b/src/feedvalidator/media.py
index ccf9e62f..6d05ee56 100644
--- a/src/feedvalidator/media.py
+++ b/src/feedvalidator/media.py
@@ -247,7 +247,7 @@ def validate(self):
nonhtml.validate(self, ContainsUndeclaredHTML)
class media_thumbnail(positiveInteger,rfc2396_full):
- npt_re = re.compile("^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$")
+ npt_re = re.compile(r"^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$")
def getExpectedAttrNames(self):
return [(None,'height'),(None,'time'),(None,'url'),(None, 'width')]
def validate(self):
@@ -295,7 +295,7 @@ def getExpectedAttrNames(self):
]
def validate(self):
self.value = self.attrs.get((None,'bitrate'))
- if self.value and not re.match('\d+\.?\d*', self.value):
+ if self.value and not re.match(r'\d+\.?\d*', self.value):
self.log(InvalidFloat({"parent":self.parent.name, "element":self.name,
"attr": 'bitrate', "value":self.value}))
@@ -304,7 +304,7 @@ def validate(self):
if self.value: nonNegativeInteger.validate(self)
self.value = self.attrs.get((None,'duration'))
- if self.value and not re.match('\d+\.?\d*', self.value):
+ if self.value and not re.match(r'\d+\.?\d*', self.value):
self.log(InvalidFloat({"parent":self.parent.name, "element":self.name,
"attr": 'duration', "value":self.value}))
@@ -317,7 +317,7 @@ def validate(self):
if self.value: positiveInteger.validate(self)
self.value = self.attrs.get((None,'framerate'))
- if self.value and not re.match('\d+\.?\d*', self.value):
+ if self.value and not re.match(r'\d+\.?\d*', self.value):
self.log(InvalidFloat({"parent":self.parent.name, "element":self.name,
"attr": 'framerate', "value":self.value}))
@@ -336,7 +336,7 @@ def validate(self):
self.log(InvalidMediaMedium({"parent":self.parent.name, "element":self.name, "value": self.value}))
self.value = self.attrs.get((None,'samplingrate'))
- if self.value and not re.match('\d+\.?\d*', self.value):
+ if self.value and not re.match(r'\d+\.?\d*', self.value):
self.log(InvalidFloat({"parent":self.parent.name, "element":self.name,
"attr": 'samplingrate', "value":self.value}))
diff --git a/src/feedvalidator/opensearch.py b/src/feedvalidator/opensearch.py
index bd3aa204..4da9f007 100644
--- a/src/feedvalidator/opensearch.py
+++ b/src/feedvalidator/opensearch.py
@@ -63,7 +63,7 @@ def prevalidate(self):
# self.validate_optional_attribute((None,'rel'), text)
class Template(rfc2396_full):
- tparam = re.compile("{((?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})+:?(?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})*)\??}")
+ tparam = re.compile(r"{((?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})+:?(?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})*)\??}")
valuelist = ['searchTerms', 'count', 'startIndex', 'startPage', 'language',
'inputEncoding', 'outputEncoding']
@@ -127,7 +127,7 @@ def validate(self):
from urllib.parse import quote, unquote
import re
for value in self.value.split():
- value = re.sub('%\w\w', lambda x: x.group(0).upper(), value)
+ value = re.sub(r'%\w\w', lambda x: x.group(0).upper(), value)
if value != quote(unquote(value)):
self.log(NotURLEncoded({}))
break
diff --git a/src/feedvalidator/opml.py b/src/feedvalidator/opml.py
index 6b7dcc0b..05656a22 100644
--- a/src/feedvalidator/opml.py
+++ b/src/feedvalidator/opml.py
@@ -76,7 +76,7 @@ def do_windowRight(self):
return positiveInteger(), nonblank(), noduplicates()
class commaSeparatedLines(text):
- linenumbers_re=re.compile('^(\d+(,\s*\d+)*)?$')
+ linenumbers_re=re.compile(r'^(\d+(,\s*\d+)*)?$')
def validate(self):
if not self.linenumbers_re.match(self.value):
self.log(InvalidExpansionState({"parent":self.parent.name, "element":self.name, "value":self.value}))
diff --git a/src/feedvalidator/uri.py b/src/feedvalidator/uri.py
index 57906063..4c42a38e 100644
--- a/src/feedvalidator/uri.py
+++ b/src/feedvalidator/uri.py
@@ -80,7 +80,7 @@ def _qnu(s,safe=''):
return res
# Match an optional port specification
-portRe = re.compile(':(\d*)$')
+portRe = re.compile(r':(\d*)$')
def _normPort(netloc,defPort):
nl = netloc.lower()
@@ -146,7 +146,7 @@ def _normPath(p):
return '/'.join([_qnu(c, PCHAR) for c in l])
# From RFC 2396bis, with added end-of-string marker
-uriRe = re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$')
+uriRe = re.compile(r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$')
def _canonical(s):
m = uriRe.match(s)
diff --git a/src/feedvalidator/validators.py b/src/feedvalidator/validators.py
index 49d790c2..60aad2f2 100644
--- a/src/feedvalidator/validators.py
+++ b/src/feedvalidator/validators.py
@@ -52,7 +52,7 @@ def implausible_8601(value):
#
# Valid mime type
#
-mime_re = re.compile('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
+mime_re = re.compile('[^\\s()<>,;:\\"/[\\]?=]+/[^\\s()<>,;:\\"/[\\]?=]+(\\s*;\\s*[^\\s()<>,;:\\"/[\\]?=]+=("(\\"|[^"])*"|[^\\s()<>,;:\\"/[\\]?=]+))*$')
#
# Extensibility hook: logic varies based on type of feed
@@ -161,8 +161,8 @@ class HTMLValidator:
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
'transparent', 'underline', 'white', 'yellow']
- valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' +
- '\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$')
+ valid_css_values = re.compile(r'^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' +
+ r'\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$')
mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math',
'merror', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded',
@@ -242,7 +242,7 @@ def __init__(self,value,element):
element = self.element
offset = [element.line - element.dispatcher.locator.getLineNumber(),
- element.dispatcher.locator.getColumnNumber()]
- match = re.search(', at line (\d+), column (\d+)',str(msg))
+ match = re.search(r', at line (\d+), column (\d+)',str(msg))
if match: offset[0] += int(match.group(1))-1
element.log(NotHtml({"parent":element.parent.name, "element":element.name, "message":"Invalid HTML", "value": str(msg)}),offset)
@@ -265,13 +265,13 @@ def handle_tag(self, tag, attributes, text):
# Scub CSS properties for potentially evil intent
#
def checkStyle(style):
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
return [style]
- if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style):
+ if not re.match(r"^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style):
return [style]
unsafe = []
- for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style.lower()):
+ for prop,value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)",style.lower()):
if prop not in HTMLValidator.acceptable_css_properties:
if prop not in unsafe: unsafe.append(prop)
elif prop.split('-')[0] in ['background','border','margin','padding']:
@@ -358,9 +358,9 @@ def prevalidate(self):
# valid e-mail addr-spec
#
class addr_spec(text):
- domain_re = '''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))([A-Z0-9][-A-Z0-9]*)'''
- email_re = re.compile("([A-Z0-9_\-\+\.\']+)@" + domain_re + "$", re.I)
- simple_email_re = re.compile('^[\w._%+-]+@[A-Za-z][\w.-]+$')
+ domain_re = r'''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))([A-Z0-9][-A-Z0-9]*)'''
+ email_re = re.compile("([A-Z0-9_\\-\\+\\.\']+)@" + domain_re + "$", re.I)
+ simple_email_re = re.compile(r'^[\w._%+-]+@[A-Za-z][\w.-]+$')
message = InvalidAddrSpec
def validate(self, value=None):
if not value: value=self.value
@@ -426,8 +426,8 @@ def validate(self):
# iso8601 dateTime
#
class unbounded_iso8601(text):
- iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
- "(Z|([+-]\d\d:\d\d))?)?)?)?$")
+ iso8601_re = re.compile(r"^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
+ r"(Z|([+-]\d\d:\d\d))?)?)?)?$")
message = InvalidISO8601DateTime
def validate(self):
@@ -474,8 +474,8 @@ def validate(self):
class w3cdtf(iso8601):
# The same as in iso8601, except a timezone is not optional when
# a time is present
- iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
- "(Z|([+-]\d\d:\d\d)))?)?)?$")
+ iso8601_re = re.compile(r"^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" +
+ r"(Z|([+-]\d\d:\d\d)))?)?)?$")
message = InvalidW3CDTFDate
class unbounded_w3cdtf(w3cdtf):
@@ -484,12 +484,12 @@ class unbounded_w3cdtf(w3cdtf):
class rfc3339(iso8601):
# The same as in iso8601, except that the only thing that is optional
# is the seconds
- iso8601_re = re.compile("^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" +
- "(Z|([+-]\d\d:\d\d))$")
+ iso8601_re = re.compile(r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" +
+ r"(Z|([+-]\d\d:\d\d))$")
message = InvalidRFC3339Date
class iso8601_date(iso8601):
- date_re = re.compile("^\d\d\d\d-\d\d-\d\d$")
+ date_re = re.compile(r"^\d\d\d\d-\d\d-\d\d$")
def validate(self):
if iso8601.validate(self):
if not self.date_re.search(self.value):
@@ -614,14 +614,14 @@ def validate(self, errorClass=InvalidIRI, successClass=ValidURI, extraParams={})
# rfc822 dateTime (+Y2K extension)
#
class rfc822(text):
- rfc822_re = re.compile("(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" +
- "\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" +
- "(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" +
+ rfc822_re = re.compile(r"(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" +
+ r"\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" +
+ r"(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" +
"(ut)|(gmt)|(est)|(edt)|(cst)|(cdt)|(mst)|(mdt)|(pst)|(pdt)|[a-ik-z])?$",
re.UNICODE)
rfc2822_re = re.compile("(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), )?" +
- "\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" +
- "(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" +
+ r"\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" +
+ r"(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" +
"(UT)|(GMT)|(EST)|(EDT)|(CST)|(CDT)|(MST)|(MDT)|(PST)|(PDT)|Z)$")
def validate(self):
if self.rfc2822_re.match(self.value):
@@ -646,7 +646,7 @@ def validate(self):
else:
value1,value2 = '', self.value
value2 = re.sub(r'[\\](.)','',value2)
- while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2)
+ while value1!=value2: value1,value2=value2,re.sub(r'\([^(]*?\)',' ',value2)
if not self.rfc822_re.match(value2.strip().lower()):
self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
else:
@@ -657,7 +657,7 @@ def validate(self):
#
from html.entities import name2codepoint
def decodehtml(data):
- chunks=re.split('?(\w+);',data)
+ chunks=re.split(r'?(\w+);',data)
for i in range(1,len(chunks),2):
if chunks[i].isdigit():
@@ -675,9 +675,9 @@ def decodehtml(data):
# Scan HTML for relative URLs
#
class absUrlMixin:
- anchor_re = re.compile(']', re.IGNORECASE)
- img_re = re.compile('
]*src=(?:"(.*?)"|\'(.*?)\'|([\w-]+))[\s>]', re.IGNORECASE)
- absref_re = re.compile("\w+:")
+ anchor_re = re.compile(']', re.IGNORECASE)
+ img_re = re.compile('
]*src=(?:"(.*?)"|\'(.*?)\'|([\\w-]+))[\\s>]', re.IGNORECASE)
+ absref_re = re.compile(r"\w+:")
def validateAbsUrl(self,value):
refs = self.img_re.findall(self.value) + self.anchor_re.findall(self.value)
for ref in [reduce(lambda a,b: a or b, x) for x in refs]:
@@ -717,8 +717,8 @@ def validate(self):
# Elements for which html is discouraged, also checks for relative URLs
#
class nonhtml(text,safeHtmlMixin):#,absUrlMixin):
- htmlEndTag_re = re.compile("(\w+)>")
- htmlEntity_re = re.compile("&(#?\w+)")
+ htmlEndTag_re = re.compile(r"(\w+)>")
+ htmlEntity_re = re.compile(r"&(#?\w+)")
def start(self):
nonhtml.startline = self.__dict__['startline'] = self.line
def prevalidate(self):
@@ -826,12 +826,12 @@ def validate(self):
class Float(text):
def validate(self, name=None):
- if not re.match('\d+\.?\d*$', self.value):
+ if not re.match(r'\d+\.?\d*$', self.value):
self.log(InvalidFloat({"attr":name or self.name, "value":self.value}))
class alphanumeric(text):
def validate(self):
- if not re.match('^\s*[A-Za-z0-9]+\s*$', self.value):
+ if not re.match(r'^\s*[A-Za-z0-9]+\s*$', self.value):
self.log(InvalidAlphanum({"attr":self.name, "value":self.value}))
class percentType(text):
@@ -868,7 +868,7 @@ def validate(self):
self.log(InvalidLongitude({"parent":self.parent.name, "element":self.name, "value":self.value}))
class httpURL(text):
- http_re = re.compile("(http|https)://" + addr_spec.domain_re + '(?::\d+)?' + '(/|$)', re.IGNORECASE)
+ http_re = re.compile("(http|https)://" + addr_spec.domain_re + r'(?::\d+)?' + '(/|$)', re.IGNORECASE)
def validate(self):
if not self.http_re.match(self.value):
self.log(InvalidURLAttribute({"parent":self.parent.name, "element":self.name, "value":self.value}))
@@ -970,7 +970,7 @@ def validate(self):
self.log(InvalidTrueFalse({"parent":self.parent.name, "element":self.name,"value":self.value}))
class duration(text):
- duration_re = re.compile("\d+(:[0-5][0-9](:[0-5][0-9])?)?$")
+ duration_re = re.compile(r"\d+(:[0-5][0-9](:[0-5][0-9])?)?$")
def validate(self):
if not self.duration_re.match(self.value):
self.log(InvalidDuration({"parent":self.parent.name, "element":self.name
@@ -993,7 +993,7 @@ def validate(self):
class commaSeparatedIntegers(text):
def validate(self):
- if not re.match("^\d+(,\s*\d+)*$", self.value):
+ if not re.match(r"^\d+(,\s*\d+)*$", self.value):
self.log(InvalidCommaSeparatedIntegers({"parent":self.parent.name,
"element":self.name}))
diff --git a/src/validtest.py b/src/validtest.py
index 7418e38d..9ed7d3ec 100755
--- a/src/validtest.py
+++ b/src/validtest.py
@@ -57,9 +57,9 @@ def failIfContainsInstanceOf(self, theClass, params, theList, msg=None):
raise self.failureException("unexpected %s.%s with a value of %s" % \
(theClass.__name__, k, v))
-desc_re = re.compile("")
+desc_re = re.compile(r"")
-validome_re = re.compile("", re.S)
+validome_re = re.compile(r"", re.S)
def getDescription(xmlfile):
"""Extract description and exception from XML file