diff --git a/docs-xml/error/CloudflareHttpError.xml b/docs-xml/error/CloudflareHttpError.xml new file mode 100644 index 00000000..84f29dd6 --- /dev/null +++ b/docs-xml/error/CloudflareHttpError.xml @@ -0,0 +1,13 @@ + +
+
+

HTTP Error

+
+
+

The feed couldn't be validated because there the validator was blocked by CloudFlare WebApp's Firewall. The validator can be identified by its User-Agent string FeedValidator/1.3 to be specifically authorized to access that resource.

+
+
+

Deactive the WebApp Firewall or allow-list the feedvalidator user agent FeedValidator/1.3.

+
+
+
diff --git a/docs-xml/error/HttpError.xml b/docs-xml/error/HttpError.xml index 1ebf26e0..56625015 100644 --- a/docs-xml/error/HttpError.xml +++ b/docs-xml/error/HttpError.xml @@ -10,6 +10,8 @@ and make sure that the URL you supplied resolves to an accessible file.

This usually means that the URL was wrong, or that permissions on the server don't allow us to fetch that file. The error shows the message that the server sent, which may help.

+ +

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: FeedValidator/1.3.

Make sure this URL can be downloaded with a browser, then try again.

diff --git a/docs/error/CloudflareHttpError.html b/docs/error/CloudflareHttpError.html new file mode 100644 index 00000000..97598e73 --- /dev/null +++ b/docs/error/CloudflareHttpError.html @@ -0,0 +1,55 @@ + + + + Blocked by CloudFlare Web Apps Firewall + + + + +
+

Message

+
+

Blocked by CloudFlare Web Apps Firewall

+
+

Explanation

+ +
+

The feed couldn't be validated because there the validator was blocked by CloudFlare WebApp's Firewall. The validator can be identified by its User-Agent string FeedValidator/1.3 to be specifically authorized to access that resource. + +

+

Solution

+
+

Deactive the WebApp Firewall or allow-list the feedvalidator user agent FeedValidator/1.3.

+
+

Not clear? Disagree?

+
+

You might be able to find help in one of these fine resources.

+
+ +
+ + + + + + diff --git a/docs/error/HttpError.html b/docs/error/HttpError.html index 7233b11e..6d709188 100644 --- a/docs/error/HttpError.html +++ b/docs/error/HttpError.html @@ -31,7 +31,7 @@

Explanation

the server don't allow us to fetch that file. The error shows the message that the server sent, which may help.

-

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: "FeedValidator/1.3". +

If the error is "403 Forbidden" and your Web site uses a Web App Firewall, you may need to allow-list the feedvalidator User-Agent: FeedValidator/1.3".

Solution

diff --git a/src/feedvalidator/__init__.py b/src/feedvalidator/__init__.py index ecdc6f98..ae25cc8f 100644 --- a/src/feedvalidator/__init__.py +++ b/src/feedvalidator/__init__.py @@ -240,6 +240,8 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0, groupEvents=0): raise except urllib.error.HTTPError as status: + if status.code == 403 and status.headers.get("cf-mitigated") == "challenge": + ValidationFailure(logging.CloudflareHttpError({'status': status})) raise ValidationFailure(logging.HttpError({'status': status})) except urllib.error.URLError as x: raise ValidationFailure(logging.HttpError({'status': x.reason})) diff --git a/src/feedvalidator/logging.py b/src/feedvalidator/logging.py index 36980f7c..d1ea7e9a 100644 --- a/src/feedvalidator/logging.py +++ b/src/feedvalidator/logging.py @@ -134,6 +134,7 @@ class AtomLinkNotEmpty(Warning): pass class UnregisteredAtomLinkRel(Warning): pass class HttpError(Error): pass +class CloudflareHttpError(HttpError): pass class IOError(Error): pass class UnknownEncoding(Error): pass