Skip to content

Commit f40fa2f

Browse files
authored
Merge pull request #81 from Block-Guard/feat/#76/fraud-type-news-api
[Refactor] 뉴스 api 정확도 개선
2 parents f4b8693 + 73ae7b0 commit f40fa2f

File tree

2 files changed

+12
-19
lines changed

2 files changed

+12
-19
lines changed

src/main/java/com/blockguard/server/domain/news/domain/enums/Category.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,23 +51,4 @@ public static Category from(String input) {
5151
return Optional.ofNullable(MAPPING.get(input.trim()))
5252
.orElse(ETC);
5353
}
54-
/*return switch (input.trim()) {
55-
case "보이스피싱", "보이스 피싱" -> VOICE_PHISHING;
56-
case "스미싱" -> SMISHING;
57-
case "메신저 피싱", "메신저피싱" -> MESSAGE_VOICE_PHISHING;
58-
59-
case "기관 사칭형" -> INSTITUTION_IMPERSONATION;
60-
case "대출 사기형" -> LOAN_FRAUD;
61-
case "카드사 사칭형" -> CARD_IMPERSONATION;
62-
case "가족/지인 사칭형" -> FAMILY_IMPERSONATION;
63-
case "경조사 사칭형" -> EVENT_IMPERSONATION;
64-
case "공공기관 사칭형" -> PUBLIC_IMPERSONATION;
65-
case "알바/부업 사기형" -> PART_TIME_SCAM;
66-
case "정부지원금 위장형" -> GOVERNMENT_GRANT_SCAM;
67-
case "택배 사기형" -> DELIVERY_SCAM;
68-
case "투자 사기형" -> INVESTMENT_SCAM;
69-
case "허위결제 사기형" -> FALSE_PAYMENT_SCAM;
70-
default -> ETC;
71-
};*/
72-
7354
}

src/main/java/com/blockguard/server/infra/crawler/DaumNewsCrawler.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.time.LocalDateTime;
2121
import java.time.format.DateTimeFormatter;
2222
import java.util.Optional;
23+
import java.util.regex.Pattern;
2324

2425
@Service
2526
@RequiredArgsConstructor
@@ -29,6 +30,16 @@ public class DaumNewsCrawler {
2930
private static final int ARTICLE_RETENTION_DAYS = 365;
3031
private static final long CRAWL_DELAY_MS = 500L;
3132

33+
private static final Pattern NON_KO_ALNUM = Pattern.compile("[^가-힣a-zA-Z0-9]");
34+
private String norm(String s) {
35+
if (s == null) return "";
36+
return NON_KO_ALNUM.matcher(s).replaceAll("").toLowerCase();
37+
}
38+
39+
private boolean titleContains(String title, String keyword) {
40+
return norm(title).contains(norm(keyword));
41+
}
42+
3243
public void fetchNewsFromDaum(String keyword, Category forceCategory) {
3344
Category category = (forceCategory != null) ? forceCategory : Category.from(keyword);
3445
int savedCount = 0;
@@ -53,6 +64,7 @@ public void fetchNewsFromDaum(String keyword, Category forceCategory) {
5364
String title = titleEl.text();
5465
String url = titleEl.attr("href");
5566

67+
if (!titleContains(title, keyword)) continue;
5668
if (newsRepository.existsByUrl(url)) continue;
5769

5870
Element imageEl = item.selectFirst("a.thumb_bf img");

0 commit comments

Comments
 (0)