Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/sentry/api/serializers/models/organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
from sentry.users.models.user import User
from sentry.users.services.user.model import RpcUser
from sentry.users.services.user.service import user_service
from sentry.utils.display_name_filter import is_spam_display_name

if TYPE_CHECKING:
from sentry.api.serializers.models.project import OrganizationProjectResponse
Expand Down Expand Up @@ -175,6 +176,12 @@ def validate_name(self, value: str) -> str:
raise serializers.ValidationError(
"Organization name cannot contain URL schemes (e.g. http:// or https://)."
)

if is_spam_display_name(value):
raise serializers.ValidationError(
"This name contains disallowed content. Please choose a different name."
)

return value

def validate_slug(self, value: str) -> str:
Expand Down
7 changes: 7 additions & 0 deletions src/sentry/sentry_apps/api/parsers/sentry_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sentry.sentry_apps.api.parsers.schema import validate_ui_element_schema
from sentry.sentry_apps.models.sentry_app import REQUIRED_EVENT_PERMISSIONS, UUID_CHARS_IN_SLUG
from sentry.sentry_apps.utils.webhooks import VALID_EVENT_RESOURCES
from sentry.utils.display_name_filter import is_spam_display_name


@extend_schema_field(build_typed_list(OpenApiTypes.STR))
Expand Down Expand Up @@ -164,6 +165,12 @@ def validate_name(self, value):
max_length = 64 - UUID_CHARS_IN_SLUG - 1 # -1 comes from the - before the UUID bit
if len(value) > max_length:
raise ValidationError("Cannot exceed %d characters" % max_length)

if is_spam_display_name(value):
raise ValidationError(
"This name contains disallowed content. Please choose a different name."
)

return value

def validate_allowedOrigins(self, value):
Expand Down
94 changes: 94 additions & 0 deletions src/sentry/utils/display_name_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import annotations

from collections.abc import Callable

CURRENCY_SIGNALS: list[str] = [
"$",
"\U0001f4b2", # 💲 Heavy Dollar Sign
"\U0001f4b0", # 💰 Money Bag
"\U0001f4b5", # 💵 Dollar Banknote
"\U0001f48e", # 💎 Gem Stone
"\U0001fa99", # 🪙 Coin
"btc",
"eth",
"usdt",
"crypto",
"compensation",
"bitcoin",
"ethereum",
"litecoin",
"ltc",
Comment thread
geoffg-sentry marked this conversation as resolved.
"xrp",
"doge",
"dogecoin",
"bnb",
"solana",
"sol",
"airdrop",
]

CTA_VERBS: list[str] = ["click", "claim", "collect", "withdraw", "act", "pay"]
CTA_URGENCY: list[str] = ["now", "here", "your", "link"]

SHORT_URL_SIGNALS: list[str] = [
"2g.tel/",
"bit.ly/",
"t.co/",
"tinyurl.com/",
"rb.gy/",
"cutt.ly/",
"shorturl.at/",
"is.gd/",
"v.gd/",
"ow.ly/",
"bl.ink/",
]


def _is_word_at(text: str, pos: int, length: int) -> bool:
"""Check that the match at text[pos:pos+length] is bounded by non-alphanumeric chars."""
if pos > 0 and text[pos - 1].isalnum():
return False
end = pos + length
if end < len(text) and text[end].isalnum():
return False
return True


def _has_substring(lowered: str, signals: list[str]) -> bool:
return any(s in lowered for s in signals)
Comment thread
geoffg-sentry marked this conversation as resolved.


def _has_word(lowered: str, signals: list[str]) -> bool:
for signal in signals:
pos = lowered.find(signal)
while pos != -1:
if _is_word_at(lowered, pos, len(signal)):
return True
pos = lowered.find(signal, pos + 1)
return False


def _has_signal(lowered: str, signals: list[str]) -> bool:
"""Use word-boundary matching for alphabetic signals, substring for the rest."""
alpha = [s for s in signals if s.isalpha()]
other = [s for s in signals if not s.isalpha()]
return _has_word(lowered, alpha) or _has_substring(lowered, other)


def _has_cta(lowered: str) -> bool:
return _has_signal(lowered, CTA_VERBS) and _has_signal(lowered, CTA_URGENCY)

Comment thread
geoffg-sentry marked this conversation as resolved.

_CATEGORIES: list[tuple[str, Callable[[str], bool]]] = [
("cryptocurrency terminology", lambda val: _has_signal(val, CURRENCY_SIGNALS)),
("call-to-action phrases", _has_cta),
("URL shortener domains", lambda val: _has_signal(val, SHORT_URL_SIGNALS)),
]


def is_spam_display_name(name: str) -> bool:
"""Return True if the name matches 2+ spam signal categories."""
lowered = name.lower()
matched = sum(1 for _, check in _CATEGORIES if check(lowered))
return matched >= 2
13 changes: 13 additions & 0 deletions tests/sentry/core/endpoints/test_organization_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,19 @@ def test_name_with_url_scheme_rejected(self) -> None:
)
self.get_error_response(name="http://evil.com", slug="legit-slug-2", status_code=400)

def test_name_with_spam_signals_rejected(self) -> None:
response = self.get_error_response(
name="Win $50 ETH bit.ly/offer Claim Now",
slug="spam-org",
status_code=400,
)
assert "disallowed content" in str(response.data)

def test_name_with_single_signal_allowed(self) -> None:
response = self.get_success_response(name="BTC Analytics", slug="btc-analytics")
org = Organization.objects.get(id=response.data["id"])
assert org.name == "BTC Analytics"

def test_name_with_periods_allowed(self) -> None:
response = self.get_success_response(name="Acme Inc.", slug="acme-inc")
org = Organization.objects.get(id=response.data["id"])
Expand Down
69 changes: 69 additions & 0 deletions tests/sentry/utils/test_display_name_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from sentry.utils.display_name_filter import is_spam_display_name


class TestIsSpamDisplayName:
def test_clean_name_passes(self) -> None:
assert not is_spam_display_name("My Company Inc.")

def test_single_currency_signal_passes(self) -> None:
assert not is_spam_display_name("BTC Analytics")

def test_single_cta_verb_passes(self) -> None:
assert not is_spam_display_name("Click Studios")

def test_single_cta_urgency_passes(self) -> None:
assert not is_spam_display_name("Do It Now Labs")

def test_cta_verb_plus_urgency_alone_passes(self) -> None:
assert not is_spam_display_name("Click Here Studios")

def test_single_shorturl_signal_passes(self) -> None:
assert not is_spam_display_name("bit.ly/promo team")

def test_currency_plus_cta_rejected(self) -> None:
assert is_spam_display_name("Free BTC - Click Here")

def test_currency_plus_shorturl_rejected(self) -> None:
assert is_spam_display_name("Earn $100 via 2g.tel/promo")

def test_shorturl_without_slash_not_matched(self) -> None:
assert not is_spam_display_name("support.com Solutions")

def test_cta_plus_shorturl_rejected(self) -> None:
assert is_spam_display_name("Click Here: bit.ly/free")

def test_bare_shorturl_domain_without_path_passes(self) -> None:
assert not is_spam_display_name("Free BTC bit.ly")

def test_all_three_categories_rejected(self) -> None:
assert is_spam_display_name("Win $50 ETH bit.ly/offer Claim Now")

def test_case_insensitive(self) -> None:
assert is_spam_display_name("FREE BTC - CLICK HERE")

def test_currency_emoji_detected(self) -> None:
assert is_spam_display_name("\U0001f4b2Compensation Btc: 2g.tel/x Click Your Pay Link.")

def test_single_currency_emoji_passes(self) -> None:
assert not is_spam_display_name("My \U0001f4b0 Company")

def test_cta_novel_combo_rejected(self) -> None:
assert is_spam_display_name("Withdraw Now - Free BTC")

def test_substring_sol_in_solutions_not_matched(self) -> None:
assert not is_spam_display_name("Impactful Solutions")

def test_substring_eth_in_method_not_matched(self) -> None:
assert not is_spam_display_name("Method Analytics")

def test_substring_act_in_contact_not_matched(self) -> None:
assert not is_spam_display_name("Contact Knowledge Solutions")

def test_substring_now_in_knowledge_not_matched(self) -> None:
assert not is_spam_display_name("Knowledge Now Platform")

def test_substring_here_in_where_not_matched(self) -> None:
assert not is_spam_display_name("Where We Shine")

def test_empty_string_passes(self) -> None:
assert not is_spam_display_name("")
Loading