diff --git a/src/sentry/api/serializers/models/organization.py b/src/sentry/api/serializers/models/organization.py index d1310c7c784530..96c39a2c9739c6 100644 --- a/src/sentry/api/serializers/models/organization.py +++ b/src/sentry/api/serializers/models/organization.py @@ -89,6 +89,7 @@ from sentry.users.models.user import User from sentry.users.services.user.model import RpcUser from sentry.users.services.user.service import user_service +from sentry.utils.display_name_filter import is_spam_display_name if TYPE_CHECKING: from sentry.api.serializers.models.project import OrganizationProjectResponse @@ -175,6 +176,12 @@ def validate_name(self, value: str) -> str: raise serializers.ValidationError( "Organization name cannot contain URL schemes (e.g. http:// or https://)." ) + + if is_spam_display_name(value): + raise serializers.ValidationError( + "This name contains disallowed content. Please choose a different name." + ) + return value def validate_slug(self, value: str) -> str: diff --git a/src/sentry/sentry_apps/api/parsers/sentry_app.py b/src/sentry/sentry_apps/api/parsers/sentry_app.py index 64f473eed51e46..5c121d7b1d6f4e 100644 --- a/src/sentry/sentry_apps/api/parsers/sentry_app.py +++ b/src/sentry/sentry_apps/api/parsers/sentry_app.py @@ -11,6 +11,7 @@ from sentry.sentry_apps.api.parsers.schema import validate_ui_element_schema from sentry.sentry_apps.models.sentry_app import REQUIRED_EVENT_PERMISSIONS, UUID_CHARS_IN_SLUG from sentry.sentry_apps.utils.webhooks import VALID_EVENT_RESOURCES +from sentry.utils.display_name_filter import is_spam_display_name @extend_schema_field(build_typed_list(OpenApiTypes.STR)) @@ -164,6 +165,12 @@ def validate_name(self, value): max_length = 64 - UUID_CHARS_IN_SLUG - 1 # -1 comes from the - before the UUID bit if len(value) > max_length: raise ValidationError("Cannot exceed %d characters" % max_length) + + if is_spam_display_name(value): + raise ValidationError( + "This name contains disallowed content. Please choose a different name." + ) + return value def validate_allowedOrigins(self, value): diff --git a/src/sentry/utils/display_name_filter.py b/src/sentry/utils/display_name_filter.py new file mode 100644 index 00000000000000..faf01b2d78df97 --- /dev/null +++ b/src/sentry/utils/display_name_filter.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from collections.abc import Callable + +CURRENCY_SIGNALS: list[str] = [ + "$", + "\U0001f4b2", # 💲 Heavy Dollar Sign + "\U0001f4b0", # 💰 Money Bag + "\U0001f4b5", # 💵 Dollar Banknote + "\U0001f48e", # 💎 Gem Stone + "\U0001fa99", # 🪙 Coin + "btc", + "eth", + "usdt", + "crypto", + "compensation", + "bitcoin", + "ethereum", + "litecoin", + "ltc", + "xrp", + "doge", + "dogecoin", + "bnb", + "solana", + "sol", + "airdrop", +] + +CTA_VERBS: list[str] = ["click", "claim", "collect", "withdraw", "act", "pay"] +CTA_URGENCY: list[str] = ["now", "here", "your", "link"] + +SHORT_URL_SIGNALS: list[str] = [ + "2g.tel/", + "bit.ly/", + "t.co/", + "tinyurl.com/", + "rb.gy/", + "cutt.ly/", + "shorturl.at/", + "is.gd/", + "v.gd/", + "ow.ly/", + "bl.ink/", +] + + +def _is_word_at(text: str, pos: int, length: int) -> bool: + """Check that the match at text[pos:pos+length] is bounded by non-alphanumeric chars.""" + if pos > 0 and text[pos - 1].isalnum(): + return False + end = pos + length + if end < len(text) and text[end].isalnum(): + return False + return True + + +def _has_substring(lowered: str, signals: list[str]) -> bool: + return any(s in lowered for s in signals) + + +def _has_word(lowered: str, signals: list[str]) -> bool: + for signal in signals: + pos = lowered.find(signal) + while pos != -1: + if _is_word_at(lowered, pos, len(signal)): + return True + pos = lowered.find(signal, pos + 1) + return False + + +def _has_signal(lowered: str, signals: list[str]) -> bool: + """Use word-boundary matching for alphabetic signals, substring for the rest.""" + alpha = [s for s in signals if s.isalpha()] + other = [s for s in signals if not s.isalpha()] + return _has_word(lowered, alpha) or _has_substring(lowered, other) + + +def _has_cta(lowered: str) -> bool: + return _has_signal(lowered, CTA_VERBS) and _has_signal(lowered, CTA_URGENCY) + + +_CATEGORIES: list[tuple[str, Callable[[str], bool]]] = [ + ("cryptocurrency terminology", lambda val: _has_signal(val, CURRENCY_SIGNALS)), + ("call-to-action phrases", _has_cta), + ("URL shortener domains", lambda val: _has_signal(val, SHORT_URL_SIGNALS)), +] + + +def is_spam_display_name(name: str) -> bool: + """Return True if the name matches 2+ spam signal categories.""" + lowered = name.lower() + matched = sum(1 for _, check in _CATEGORIES if check(lowered)) + return matched >= 2 diff --git a/tests/sentry/core/endpoints/test_organization_index.py b/tests/sentry/core/endpoints/test_organization_index.py index 67602f65306fcb..a2d15e4b6f4f43 100644 --- a/tests/sentry/core/endpoints/test_organization_index.py +++ b/tests/sentry/core/endpoints/test_organization_index.py @@ -272,6 +272,19 @@ def test_name_with_url_scheme_rejected(self) -> None: ) self.get_error_response(name="http://evil.com", slug="legit-slug-2", status_code=400) + def test_name_with_spam_signals_rejected(self) -> None: + response = self.get_error_response( + name="Win $50 ETH bit.ly/offer Claim Now", + slug="spam-org", + status_code=400, + ) + assert "disallowed content" in str(response.data) + + def test_name_with_single_signal_allowed(self) -> None: + response = self.get_success_response(name="BTC Analytics", slug="btc-analytics") + org = Organization.objects.get(id=response.data["id"]) + assert org.name == "BTC Analytics" + def test_name_with_periods_allowed(self) -> None: response = self.get_success_response(name="Acme Inc.", slug="acme-inc") org = Organization.objects.get(id=response.data["id"]) diff --git a/tests/sentry/utils/test_display_name_filter.py b/tests/sentry/utils/test_display_name_filter.py new file mode 100644 index 00000000000000..28099ba84be017 --- /dev/null +++ b/tests/sentry/utils/test_display_name_filter.py @@ -0,0 +1,69 @@ +from sentry.utils.display_name_filter import is_spam_display_name + + +class TestIsSpamDisplayName: + def test_clean_name_passes(self) -> None: + assert not is_spam_display_name("My Company Inc.") + + def test_single_currency_signal_passes(self) -> None: + assert not is_spam_display_name("BTC Analytics") + + def test_single_cta_verb_passes(self) -> None: + assert not is_spam_display_name("Click Studios") + + def test_single_cta_urgency_passes(self) -> None: + assert not is_spam_display_name("Do It Now Labs") + + def test_cta_verb_plus_urgency_alone_passes(self) -> None: + assert not is_spam_display_name("Click Here Studios") + + def test_single_shorturl_signal_passes(self) -> None: + assert not is_spam_display_name("bit.ly/promo team") + + def test_currency_plus_cta_rejected(self) -> None: + assert is_spam_display_name("Free BTC - Click Here") + + def test_currency_plus_shorturl_rejected(self) -> None: + assert is_spam_display_name("Earn $100 via 2g.tel/promo") + + def test_shorturl_without_slash_not_matched(self) -> None: + assert not is_spam_display_name("support.com Solutions") + + def test_cta_plus_shorturl_rejected(self) -> None: + assert is_spam_display_name("Click Here: bit.ly/free") + + def test_bare_shorturl_domain_without_path_passes(self) -> None: + assert not is_spam_display_name("Free BTC bit.ly") + + def test_all_three_categories_rejected(self) -> None: + assert is_spam_display_name("Win $50 ETH bit.ly/offer Claim Now") + + def test_case_insensitive(self) -> None: + assert is_spam_display_name("FREE BTC - CLICK HERE") + + def test_currency_emoji_detected(self) -> None: + assert is_spam_display_name("\U0001f4b2Compensation Btc: 2g.tel/x Click Your Pay Link.") + + def test_single_currency_emoji_passes(self) -> None: + assert not is_spam_display_name("My \U0001f4b0 Company") + + def test_cta_novel_combo_rejected(self) -> None: + assert is_spam_display_name("Withdraw Now - Free BTC") + + def test_substring_sol_in_solutions_not_matched(self) -> None: + assert not is_spam_display_name("Impactful Solutions") + + def test_substring_eth_in_method_not_matched(self) -> None: + assert not is_spam_display_name("Method Analytics") + + def test_substring_act_in_contact_not_matched(self) -> None: + assert not is_spam_display_name("Contact Knowledge Solutions") + + def test_substring_now_in_knowledge_not_matched(self) -> None: + assert not is_spam_display_name("Knowledge Now Platform") + + def test_substring_here_in_where_not_matched(self) -> None: + assert not is_spam_display_name("Where We Shine") + + def test_empty_string_passes(self) -> None: + assert not is_spam_display_name("")