From 9342cf651f7a8eafe5c3d6456aa3ca7344e07d08 Mon Sep 17 00:00:00 2001 From: Dominique Hazael-Massieux Date: Thu, 11 Dec 2025 16:16:01 +0100 Subject: [PATCH 1/2] Provide error message when hitting 403 from Cloudflare WAF since it's a frequent source of unactionable issues --- docs-xml/error/CloudflareHttpError.xml | 13 ++++++ docs-xml/error/HttpError.xml | 2 + docs/error/ClouldflareHttpError.html | 55 ++++++++++++++++++++++++++ docs/error/HttpError.html | 2 +- src/feedvalidator/__init__.py | 2 + src/feedvalidator/logging.py | 1 + 6 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 docs-xml/error/CloudflareHttpError.xml create mode 100644 docs/error/ClouldflareHttpError.html diff --git a/docs-xml/error/CloudflareHttpError.xml b/docs-xml/error/CloudflareHttpError.xml new file mode 100644 index 00000000..84f29dd6 --- /dev/null +++ b/docs-xml/error/CloudflareHttpError.xml @@ -0,0 +1,13 @@ + +
+
+

HTTP Error

+
+
+

The feed couldn't be validated because there the validator was blocked by CloudFlare WebApp's Firewall. The validator can be identified by its User-Agent string FeedValidator/1.3 to be specifically authorized to access that resource.

+
+
+

Deactive the WebApp Firewall or allow-list the feedvalidator user agent FeedValidator/1.3.

+
+
+
diff --git a/docs-xml/error/HttpError.xml b/docs-xml/error/HttpError.xml index 1ebf26e0..56625015 100644 --- a/docs-xml/error/HttpError.xml +++ b/docs-xml/error/HttpError.xml @@ -10,6 +10,8 @@ and make sure that the URL you supplied resolves to an accessible file.

This usually means that the URL was wrong, or that permissions on the server don't allow us to fetch that file. The error shows the message that the server sent, which may help.

+ +

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: FeedValidator/1.3.

Make sure this URL can be downloaded with a browser, then try again.

diff --git a/docs/error/ClouldflareHttpError.html b/docs/error/ClouldflareHttpError.html new file mode 100644 index 00000000..97598e73 --- /dev/null +++ b/docs/error/ClouldflareHttpError.html @@ -0,0 +1,55 @@ + + + + Blocked by CloudFlare Web Apps Firewall + + + + +
+

Message

+
+

Blocked by CloudFlare Web Apps Firewall

+
+

Explanation

+ +
+

The feed couldn't be validated because there the validator was blocked by CloudFlare WebApp's Firewall. The validator can be identified by its User-Agent string FeedValidator/1.3 to be specifically authorized to access that resource. + +

+

Solution

+
+

Deactive the WebApp Firewall or allow-list the feedvalidator user agent FeedValidator/1.3.

+
+

Not clear? Disagree?

+
+

You might be able to find help in one of these fine resources.

+
+ +
+ + + + + + diff --git a/docs/error/HttpError.html b/docs/error/HttpError.html index 7233b11e..6d709188 100644 --- a/docs/error/HttpError.html +++ b/docs/error/HttpError.html @@ -31,7 +31,7 @@

Explanation

the server don't allow us to fetch that file. The error shows the message that the server sent, which may help.

-

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: "FeedValidator/1.3". +

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: FeedValidator/1.3".

Solution

diff --git a/src/feedvalidator/__init__.py b/src/feedvalidator/__init__.py index ecdc6f98..ae25cc8f 100644 --- a/src/feedvalidator/__init__.py +++ b/src/feedvalidator/__init__.py @@ -240,6 +240,8 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0, groupEvents=0): raise except urllib.error.HTTPError as status: + if status.code == 403 and status.headers.get("cf-mitigated") == "challenge": + ValidationFailure(logging.CloudflareHttpError({'status': status})) raise ValidationFailure(logging.HttpError({'status': status})) except urllib.error.URLError as x: raise ValidationFailure(logging.HttpError({'status': x.reason})) diff --git a/src/feedvalidator/logging.py b/src/feedvalidator/logging.py index 36980f7c..d1ea7e9a 100644 --- a/src/feedvalidator/logging.py +++ b/src/feedvalidator/logging.py @@ -134,6 +134,7 @@ class AtomLinkNotEmpty(Warning): pass class UnregisteredAtomLinkRel(Warning): pass class HttpError(Error): pass +class CloudflareHttpError(HttpError): pass class IOError(Error): pass class UnknownEncoding(Error): pass From 327f82e2cd021598a6359fc71d761cdb72943590 Mon Sep 17 00:00:00 2001 From: Dominique Hazael-Massieux Date: Thu, 11 Dec 2025 16:40:11 +0100 Subject: [PATCH 2/2] Fix filename --- .../error/{ClouldflareHttpError.html => CloudflareHttpError.html} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/error/{ClouldflareHttpError.html => CloudflareHttpError.html} (100%) diff --git a/docs/error/ClouldflareHttpError.html b/docs/error/CloudflareHttpError.html similarity index 100% rename from docs/error/ClouldflareHttpError.html rename to docs/error/CloudflareHttpError.html