-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Display name validation and filtering for apps and orgs #113106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
geoffg-sentry
merged 5 commits into
master
from
geoffg-sentry/display-name-app-name-filtering
Apr 15, 2026
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
036279c
Add filtering to app and org display names
geoffg-sentry 49788c7
comment brevity
geoffg-sentry 9c70b12
better word boundaries and trailing slashes
geoffg-sentry 0119ca1
simplify error messaging
geoffg-sentry 1cba7e1
move import, fix return
geoffg-sentry File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from collections.abc import Callable | ||
|
|
||
| CURRENCY_SIGNALS: list[str] = [ | ||
| "$", | ||
| "\U0001f4b2", # 💲 Heavy Dollar Sign | ||
| "\U0001f4b0", # 💰 Money Bag | ||
| "\U0001f4b5", # 💵 Dollar Banknote | ||
| "\U0001f48e", # 💎 Gem Stone | ||
| "\U0001fa99", # 🪙 Coin | ||
| "btc", | ||
| "eth", | ||
| "usdt", | ||
| "crypto", | ||
| "compensation", | ||
| "bitcoin", | ||
| "ethereum", | ||
| "litecoin", | ||
| "ltc", | ||
| "xrp", | ||
| "doge", | ||
| "dogecoin", | ||
| "bnb", | ||
| "solana", | ||
| "sol", | ||
| "airdrop", | ||
| ] | ||
|
|
||
| CTA_VERBS: list[str] = ["click", "claim", "collect", "withdraw", "act", "pay"] | ||
| CTA_URGENCY: list[str] = ["now", "here", "your", "link"] | ||
|
|
||
| SHORT_URL_SIGNALS: list[str] = [ | ||
| "2g.tel/", | ||
| "bit.ly/", | ||
| "t.co/", | ||
| "tinyurl.com/", | ||
| "rb.gy/", | ||
| "cutt.ly/", | ||
| "shorturl.at/", | ||
| "is.gd/", | ||
| "v.gd/", | ||
| "ow.ly/", | ||
| "bl.ink/", | ||
| ] | ||
|
|
||
|
|
||
| def _is_word_at(text: str, pos: int, length: int) -> bool: | ||
| """Check that the match at text[pos:pos+length] is bounded by non-alphanumeric chars.""" | ||
| if pos > 0 and text[pos - 1].isalnum(): | ||
| return False | ||
| end = pos + length | ||
| if end < len(text) and text[end].isalnum(): | ||
| return False | ||
| return True | ||
|
|
||
|
|
||
| def _has_substring(lowered: str, signals: list[str]) -> bool: | ||
| return any(s in lowered for s in signals) | ||
|
geoffg-sentry marked this conversation as resolved.
|
||
|
|
||
|
|
||
| def _has_word(lowered: str, signals: list[str]) -> bool: | ||
| for signal in signals: | ||
| pos = lowered.find(signal) | ||
| while pos != -1: | ||
| if _is_word_at(lowered, pos, len(signal)): | ||
| return True | ||
| pos = lowered.find(signal, pos + 1) | ||
| return False | ||
|
|
||
|
|
||
| def _has_signal(lowered: str, signals: list[str]) -> bool: | ||
| """Use word-boundary matching for alphabetic signals, substring for the rest.""" | ||
| alpha = [s for s in signals if s.isalpha()] | ||
| other = [s for s in signals if not s.isalpha()] | ||
| return _has_word(lowered, alpha) or _has_substring(lowered, other) | ||
|
|
||
|
|
||
| def _has_cta(lowered: str) -> bool: | ||
| return _has_signal(lowered, CTA_VERBS) and _has_signal(lowered, CTA_URGENCY) | ||
|
|
||
|
geoffg-sentry marked this conversation as resolved.
|
||
|
|
||
| _CATEGORIES: list[tuple[str, Callable[[str], bool]]] = [ | ||
| ("cryptocurrency terminology", lambda val: _has_signal(val, CURRENCY_SIGNALS)), | ||
| ("call-to-action phrases", _has_cta), | ||
| ("URL shortener domains", lambda val: _has_signal(val, SHORT_URL_SIGNALS)), | ||
| ] | ||
|
|
||
|
|
||
| def is_spam_display_name(name: str) -> bool: | ||
| """Return True if the name matches 2+ spam signal categories.""" | ||
| lowered = name.lower() | ||
| matched = sum(1 for _, check in _CATEGORIES if check(lowered)) | ||
| return matched >= 2 | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| from sentry.utils.display_name_filter import is_spam_display_name | ||
|
|
||
|
|
||
| class TestIsSpamDisplayName: | ||
| def test_clean_name_passes(self) -> None: | ||
| assert not is_spam_display_name("My Company Inc.") | ||
|
|
||
| def test_single_currency_signal_passes(self) -> None: | ||
| assert not is_spam_display_name("BTC Analytics") | ||
|
|
||
| def test_single_cta_verb_passes(self) -> None: | ||
| assert not is_spam_display_name("Click Studios") | ||
|
|
||
| def test_single_cta_urgency_passes(self) -> None: | ||
| assert not is_spam_display_name("Do It Now Labs") | ||
|
|
||
| def test_cta_verb_plus_urgency_alone_passes(self) -> None: | ||
| assert not is_spam_display_name("Click Here Studios") | ||
|
|
||
| def test_single_shorturl_signal_passes(self) -> None: | ||
| assert not is_spam_display_name("bit.ly/promo team") | ||
|
|
||
| def test_currency_plus_cta_rejected(self) -> None: | ||
| assert is_spam_display_name("Free BTC - Click Here") | ||
|
|
||
| def test_currency_plus_shorturl_rejected(self) -> None: | ||
| assert is_spam_display_name("Earn $100 via 2g.tel/promo") | ||
|
|
||
| def test_shorturl_without_slash_not_matched(self) -> None: | ||
| assert not is_spam_display_name("support.com Solutions") | ||
|
|
||
| def test_cta_plus_shorturl_rejected(self) -> None: | ||
| assert is_spam_display_name("Click Here: bit.ly/free") | ||
|
|
||
| def test_bare_shorturl_domain_without_path_passes(self) -> None: | ||
| assert not is_spam_display_name("Free BTC bit.ly") | ||
|
|
||
| def test_all_three_categories_rejected(self) -> None: | ||
| assert is_spam_display_name("Win $50 ETH bit.ly/offer Claim Now") | ||
|
|
||
| def test_case_insensitive(self) -> None: | ||
| assert is_spam_display_name("FREE BTC - CLICK HERE") | ||
|
|
||
| def test_currency_emoji_detected(self) -> None: | ||
| assert is_spam_display_name("\U0001f4b2Compensation Btc: 2g.tel/x Click Your Pay Link.") | ||
|
|
||
| def test_single_currency_emoji_passes(self) -> None: | ||
| assert not is_spam_display_name("My \U0001f4b0 Company") | ||
|
|
||
| def test_cta_novel_combo_rejected(self) -> None: | ||
| assert is_spam_display_name("Withdraw Now - Free BTC") | ||
|
|
||
| def test_substring_sol_in_solutions_not_matched(self) -> None: | ||
| assert not is_spam_display_name("Impactful Solutions") | ||
|
|
||
| def test_substring_eth_in_method_not_matched(self) -> None: | ||
| assert not is_spam_display_name("Method Analytics") | ||
|
|
||
| def test_substring_act_in_contact_not_matched(self) -> None: | ||
| assert not is_spam_display_name("Contact Knowledge Solutions") | ||
|
|
||
| def test_substring_now_in_knowledge_not_matched(self) -> None: | ||
| assert not is_spam_display_name("Knowledge Now Platform") | ||
|
|
||
| def test_substring_here_in_where_not_matched(self) -> None: | ||
| assert not is_spam_display_name("Where We Shine") | ||
|
|
||
| def test_empty_string_passes(self) -> None: | ||
| assert not is_spam_display_name("") |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.