From b7c36ac16ad45e2c9545f4dc10106f7e44b53ea2 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Mon, 24 Jun 2024 11:03:14 +1000 Subject: [PATCH] URL validator: accept user/password, use urllib.urlparse I was hitting an issue in a build tool that was not letting me specify a URL to clone a git tree with a personal access token (e.g. [1]) in a wtform URL field. I started looking at expanding the original regex, but there are tricks like multiple "@"'s in passwords that are hard to get right. I think that for this purpose, urllib.urlparse (urlparse/urlsplit doesn't seem to matter here) will just "do the right thing". The test-cases are expanded with some coverage of username/passwords. [1] https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html#clone-repository-using-personal-access-token --- CHANGES.rst | 8 ++++++++ src/wtforms/validators.py | 28 ++++++++++++++++------------ tests/validators/test_url.py | 5 +++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 75661e659..9427a0952 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,5 +1,13 @@ .. currentmodule:: wtforms +Version 3.X.X +------------- + +Unreleased + +- :class:`~validators.URL` validator is based on `urllib.urlparse` and + allows `username:password@` values. + Version 3.1.2 ------------- diff --git a/src/wtforms/validators.py b/src/wtforms/validators.py index d5b3c39a9..15c8433dc 100644 --- a/src/wtforms/validators.py +++ b/src/wtforms/validators.py @@ -2,6 +2,7 @@ import math import re import uuid +from urllib.parse import urlparse __all__ = ( "DataRequired", @@ -505,9 +506,9 @@ def __call__(self, form, field): super().__call__(form, field, message) -class URL(Regexp): +class URL: """ - Simple regexp based url validation. Much like the email validator, you + Simple url validation. Much like the email validator, you probably want to validate the url later by other means if the url must resolve. @@ -522,14 +523,7 @@ class URL(Regexp): """ def __init__(self, require_tld=True, allow_ip=True, message=None): - regex = ( - r"^[a-z]+://" - r"(?P[^\/\?:]+)" - r"(?P:[0-9]+)?" - r"(?P\/.*?)?" - r"(?P\?.*)?$" - ) - super().__init__(regex, re.IGNORECASE, message) + self.message = message self.validate_hostname = HostnameValidation( require_tld=require_tld, allow_ip=allow_ip ) @@ -539,8 +533,18 @@ def __call__(self, form, field): if message is None: message = field.gettext("Invalid URL.") - match = super().__call__(form, field, message) - if not self.validate_hostname(match.group("host")): + try: + r = urlparse(field.data) + except ValueError as exc: + raise ValidationError(message) from exc + + if not r.scheme: + raise ValidationError(message) + + if not r.hostname: + raise ValidationError(message) + + if not self.validate_hostname(r.hostname): raise ValidationError(message) diff --git a/tests/validators/test_url.py b/tests/validators/test_url.py index 30d9f8ce0..26e317479 100644 --- a/tests/validators/test_url.py +++ b/tests/validators/test_url.py @@ -22,6 +22,10 @@ "\u0625\u062e\u062a\u0628\u0627\u0631/foo.com", # Arabic "http://उदाहरण.परीक्षा/", # Hindi "http://실례.테스트", # Hangul + "http://username:password@foobar.dk", + "http://username@foobar.dk", + "http://usern@me:p@ssword@foobar.dk", + "http://username:password@foobar.dk:1234/path?query=parm", ], ) def test_valid_url_passes(url_val, dummy_form, dummy_field): @@ -42,6 +46,7 @@ def test_valid_url_passes(url_val, dummy_form, dummy_field): "http://foobar:5000?query=param&foo=faa", "http://foobar/path?query=param&foo=faa", "http://foobar:1234/path?query=param&foo=faa", + "http://user:password@foobar:1234/path?query=param&foo=faa", ], ) def test_valid_url_notld_passes(url_val, dummy_form, dummy_field):