From f426a4c5e985be5c2394c99d16bc94f442e65d42 Mon Sep 17 00:00:00 2001 From: ephphatha Date: Tue, 25 Apr 2023 00:03:29 +1000 Subject: [PATCH] Include none directive if either of noindex,nofollow are specified Matches the expectations of how noindex is treated by clients which respect robots meta directives --- img2dataset/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/img2dataset/main.py b/img2dataset/main.py index 11d9c9a..a03fea6 100644 --- a/img2dataset/main.py +++ b/img2dataset/main.py @@ -110,6 +110,10 @@ def download( disallowed_header_directives = ["noai", "noimageai", "noindex", "noimageindex"] if len(disallowed_header_directives) == 0: disallowed_header_directives = None + elif "none" not in disallowed_header_directives and any( + x in disallowed_header_directives for x in ("noindex", "nofollow") + ): + disallowed_header_directives.append("none") config_parameters = dict(locals()) arguments_validator(config_parameters)