Skip to content

Commit ca77ff5

Browse files
Display name validation and filtering for apps and orgs (#113106)
Adds some display name validation and filtering for org and app names during org creation and edits. Reduces abuse potential, a follow up from #113052
1 parent d4c66aa commit ca77ff5

File tree

5 files changed

+190
-0
lines changed

5 files changed

+190
-0
lines changed

src/sentry/api/serializers/models/organization.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
from sentry.users.models.user import User
9090
from sentry.users.services.user.model import RpcUser
9191
from sentry.users.services.user.service import user_service
92+
from sentry.utils.display_name_filter import is_spam_display_name
9293

9394
if TYPE_CHECKING:
9495
from sentry.api.serializers.models.project import OrganizationProjectResponse
@@ -175,6 +176,12 @@ def validate_name(self, value: str) -> str:
175176
raise serializers.ValidationError(
176177
"Organization name cannot contain URL schemes (e.g. http:// or https://)."
177178
)
179+
180+
if is_spam_display_name(value):
181+
raise serializers.ValidationError(
182+
"This name contains disallowed content. Please choose a different name."
183+
)
184+
178185
return value
179186

180187
def validate_slug(self, value: str) -> str:

src/sentry/sentry_apps/api/parsers/sentry_app.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sentry.sentry_apps.api.parsers.schema import validate_ui_element_schema
1212
from sentry.sentry_apps.models.sentry_app import REQUIRED_EVENT_PERMISSIONS, UUID_CHARS_IN_SLUG
1313
from sentry.sentry_apps.utils.webhooks import VALID_EVENT_RESOURCES
14+
from sentry.utils.display_name_filter import is_spam_display_name
1415

1516

1617
@extend_schema_field(build_typed_list(OpenApiTypes.STR))
@@ -164,6 +165,12 @@ def validate_name(self, value):
164165
max_length = 64 - UUID_CHARS_IN_SLUG - 1 # -1 comes from the - before the UUID bit
165166
if len(value) > max_length:
166167
raise ValidationError("Cannot exceed %d characters" % max_length)
168+
169+
if is_spam_display_name(value):
170+
raise ValidationError(
171+
"This name contains disallowed content. Please choose a different name."
172+
)
173+
167174
return value
168175

169176
def validate_allowedOrigins(self, value):
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Callable
4+
5+
CURRENCY_SIGNALS: list[str] = [
6+
"$",
7+
"\U0001f4b2", # 💲 Heavy Dollar Sign
8+
"\U0001f4b0", # 💰 Money Bag
9+
"\U0001f4b5", # 💵 Dollar Banknote
10+
"\U0001f48e", # 💎 Gem Stone
11+
"\U0001fa99", # 🪙 Coin
12+
"btc",
13+
"eth",
14+
"usdt",
15+
"crypto",
16+
"compensation",
17+
"bitcoin",
18+
"ethereum",
19+
"litecoin",
20+
"ltc",
21+
"xrp",
22+
"doge",
23+
"dogecoin",
24+
"bnb",
25+
"solana",
26+
"sol",
27+
"airdrop",
28+
]
29+
30+
CTA_VERBS: list[str] = ["click", "claim", "collect", "withdraw", "act", "pay"]
31+
CTA_URGENCY: list[str] = ["now", "here", "your", "link"]
32+
33+
SHORT_URL_SIGNALS: list[str] = [
34+
"2g.tel/",
35+
"bit.ly/",
36+
"t.co/",
37+
"tinyurl.com/",
38+
"rb.gy/",
39+
"cutt.ly/",
40+
"shorturl.at/",
41+
"is.gd/",
42+
"v.gd/",
43+
"ow.ly/",
44+
"bl.ink/",
45+
]
46+
47+
48+
def _is_word_at(text: str, pos: int, length: int) -> bool:
49+
"""Check that the match at text[pos:pos+length] is bounded by non-alphanumeric chars."""
50+
if pos > 0 and text[pos - 1].isalnum():
51+
return False
52+
end = pos + length
53+
if end < len(text) and text[end].isalnum():
54+
return False
55+
return True
56+
57+
58+
def _has_substring(lowered: str, signals: list[str]) -> bool:
59+
return any(s in lowered for s in signals)
60+
61+
62+
def _has_word(lowered: str, signals: list[str]) -> bool:
63+
for signal in signals:
64+
pos = lowered.find(signal)
65+
while pos != -1:
66+
if _is_word_at(lowered, pos, len(signal)):
67+
return True
68+
pos = lowered.find(signal, pos + 1)
69+
return False
70+
71+
72+
def _has_signal(lowered: str, signals: list[str]) -> bool:
73+
"""Use word-boundary matching for alphabetic signals, substring for the rest."""
74+
alpha = [s for s in signals if s.isalpha()]
75+
other = [s for s in signals if not s.isalpha()]
76+
return _has_word(lowered, alpha) or _has_substring(lowered, other)
77+
78+
79+
def _has_cta(lowered: str) -> bool:
80+
return _has_signal(lowered, CTA_VERBS) and _has_signal(lowered, CTA_URGENCY)
81+
82+
83+
_CATEGORIES: list[tuple[str, Callable[[str], bool]]] = [
84+
("cryptocurrency terminology", lambda val: _has_signal(val, CURRENCY_SIGNALS)),
85+
("call-to-action phrases", _has_cta),
86+
("URL shortener domains", lambda val: _has_signal(val, SHORT_URL_SIGNALS)),
87+
]
88+
89+
90+
def is_spam_display_name(name: str) -> bool:
91+
"""Return True if the name matches 2+ spam signal categories."""
92+
lowered = name.lower()
93+
matched = sum(1 for _, check in _CATEGORIES if check(lowered))
94+
return matched >= 2

tests/sentry/core/endpoints/test_organization_index.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,19 @@ def test_name_with_url_scheme_rejected(self) -> None:
272272
)
273273
self.get_error_response(name="http://evil.com", slug="legit-slug-2", status_code=400)
274274

275+
def test_name_with_spam_signals_rejected(self) -> None:
276+
response = self.get_error_response(
277+
name="Win $50 ETH bit.ly/offer Claim Now",
278+
slug="spam-org",
279+
status_code=400,
280+
)
281+
assert "disallowed content" in str(response.data)
282+
283+
def test_name_with_single_signal_allowed(self) -> None:
284+
response = self.get_success_response(name="BTC Analytics", slug="btc-analytics")
285+
org = Organization.objects.get(id=response.data["id"])
286+
assert org.name == "BTC Analytics"
287+
275288
def test_name_with_periods_allowed(self) -> None:
276289
response = self.get_success_response(name="Acme Inc.", slug="acme-inc")
277290
org = Organization.objects.get(id=response.data["id"])
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from sentry.utils.display_name_filter import is_spam_display_name
2+
3+
4+
class TestIsSpamDisplayName:
5+
def test_clean_name_passes(self) -> None:
6+
assert not is_spam_display_name("My Company Inc.")
7+
8+
def test_single_currency_signal_passes(self) -> None:
9+
assert not is_spam_display_name("BTC Analytics")
10+
11+
def test_single_cta_verb_passes(self) -> None:
12+
assert not is_spam_display_name("Click Studios")
13+
14+
def test_single_cta_urgency_passes(self) -> None:
15+
assert not is_spam_display_name("Do It Now Labs")
16+
17+
def test_cta_verb_plus_urgency_alone_passes(self) -> None:
18+
assert not is_spam_display_name("Click Here Studios")
19+
20+
def test_single_shorturl_signal_passes(self) -> None:
21+
assert not is_spam_display_name("bit.ly/promo team")
22+
23+
def test_currency_plus_cta_rejected(self) -> None:
24+
assert is_spam_display_name("Free BTC - Click Here")
25+
26+
def test_currency_plus_shorturl_rejected(self) -> None:
27+
assert is_spam_display_name("Earn $100 via 2g.tel/promo")
28+
29+
def test_shorturl_without_slash_not_matched(self) -> None:
30+
assert not is_spam_display_name("support.com Solutions")
31+
32+
def test_cta_plus_shorturl_rejected(self) -> None:
33+
assert is_spam_display_name("Click Here: bit.ly/free")
34+
35+
def test_bare_shorturl_domain_without_path_passes(self) -> None:
36+
assert not is_spam_display_name("Free BTC bit.ly")
37+
38+
def test_all_three_categories_rejected(self) -> None:
39+
assert is_spam_display_name("Win $50 ETH bit.ly/offer Claim Now")
40+
41+
def test_case_insensitive(self) -> None:
42+
assert is_spam_display_name("FREE BTC - CLICK HERE")
43+
44+
def test_currency_emoji_detected(self) -> None:
45+
assert is_spam_display_name("\U0001f4b2Compensation Btc: 2g.tel/x Click Your Pay Link.")
46+
47+
def test_single_currency_emoji_passes(self) -> None:
48+
assert not is_spam_display_name("My \U0001f4b0 Company")
49+
50+
def test_cta_novel_combo_rejected(self) -> None:
51+
assert is_spam_display_name("Withdraw Now - Free BTC")
52+
53+
def test_substring_sol_in_solutions_not_matched(self) -> None:
54+
assert not is_spam_display_name("Impactful Solutions")
55+
56+
def test_substring_eth_in_method_not_matched(self) -> None:
57+
assert not is_spam_display_name("Method Analytics")
58+
59+
def test_substring_act_in_contact_not_matched(self) -> None:
60+
assert not is_spam_display_name("Contact Knowledge Solutions")
61+
62+
def test_substring_now_in_knowledge_not_matched(self) -> None:
63+
assert not is_spam_display_name("Knowledge Now Platform")
64+
65+
def test_substring_here_in_where_not_matched(self) -> None:
66+
assert not is_spam_display_name("Where We Shine")
67+
68+
def test_empty_string_passes(self) -> None:
69+
assert not is_spam_display_name("")

0 commit comments

Comments
 (0)