From 1e335813a28332fc71289357158b9747e858f139 Mon Sep 17 00:00:00 2001 From: Ogunaru Date: Tue, 9 Dec 2025 08:50:31 +0900 Subject: [PATCH 1/5] =?UTF-8?q?del:=20BOOTH.pm=20=ED=95=98=ED=8A=B8?= =?UTF-8?q?=EB=B9=84=ED=8A=B8=20=EC=B2=B4=ED=81=AC=20=EC=BD=94=EB=93=9C=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- booth_checker/__main__.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/booth_checker/__main__.py b/booth_checker/__main__.py index ca83332..2e91691 100644 --- a/booth_checker/__main__.py +++ b/booth_checker/__main__.py @@ -874,15 +874,6 @@ def strftime_now(): while True: logger.info("BoothChecker cycle started") - # BOOTH Heartbeat check once per cycle - try: - logger.info('Checking BOOTH heartbeat') - requests.get("https://booth.pm", timeout=10) - except requests.RequestException as e: - logger.error(f'BOOTH heartbeat failed: {e}. Skipping this cycle.') - sleep(refresh_interval) - continue - # Recreate temporary folders recreate_folder("./download") recreate_folder("./process") From a9cf82ed337a8bd221468ac491e404e7f451568a Mon Sep 17 00:00:00 2001 From: Ogunaru Date: Wed, 7 Jan 2026 15:39:18 +0900 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20=EC=B2=B4=EC=9D=B8=EC=A7=80=EB=A1=9C?= =?UTF-8?q?=EA=B7=B8=20=EA=B2=BD=EB=A1=9C=EA=B0=80=20=EC=95=84=EB=8B=8C=20?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=EB=AA=85=20=EA=B8=B0=EC=A4=80=EC=9C=BC?= =?UTF-8?q?=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- booth_checker/__main__.py | 112 +++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/booth_checker/__main__.py b/booth_checker/__main__.py index 2e91691..389333e 100644 --- a/booth_checker/__main__.py +++ b/booth_checker/__main__.py @@ -8,6 +8,7 @@ import uuid import logging import threading +from collections import defaultdict from datetime import datetime, timedelta from time import sleep from concurrent.futures import ThreadPoolExecutor @@ -255,6 +256,80 @@ def generate_changelog_and_summary(item_data, download_url_list, version_json): return changelog_html_path, s3_object_url, summary_result, diff_found, None +def _normalize_fbx_entries(fbx_records): + entries = [] + for path_str, file_hash in fbx_records.items(): + entries.append({'basename': os.path.basename(path_str), 'hash': file_hash}) + return entries + + +def _calculate_fbx_diff_by_name_hash(previous_fbx, current_fbx): + previous_entries = _normalize_fbx_entries(previous_fbx) + current_entries = _normalize_fbx_entries(current_fbx) + + previous_by_key = defaultdict(list) + current_by_key = defaultdict(list) + for entry in previous_entries: + previous_by_key[(entry['basename'], entry['hash'])].append(entry) + for entry in current_entries: + current_by_key[(entry['basename'], entry['hash'])].append(entry) + + remaining_previous = [] + for key, entries in previous_by_key.items(): + current_matches = current_by_key.get(key, []) + match_count = min(len(entries), len(current_matches)) + if match_count < len(entries): + remaining_previous.extend(entries[match_count:]) + if match_count < len(current_matches): + current_by_key[key] = current_matches[match_count:] + else: + current_by_key[key] = [] + + remaining_current = [] + for entries in current_by_key.values(): + remaining_current.extend(entries) + + previous_by_name = defaultdict(list) + current_by_name = defaultdict(list) + for entry in remaining_previous: + previous_by_name[entry['basename']].append(entry) + for entry in remaining_current: + current_by_name[entry['basename']].append(entry) + + added = [] + changed = [] + deleted = [] + + for name in sorted(set(previous_by_name) | set(current_by_name)): + previous_list = sorted(previous_by_name.get(name, []), key=lambda e: e['hash']) + current_list = sorted(current_by_name.get(name, []), key=lambda e: e['hash']) + if previous_list and current_list: + change_count = min(len(previous_list), len(current_list)) + changed.extend(current_list[:change_count]) + added.extend(current_list[change_count:]) + deleted.extend(previous_list[change_count:]) + elif current_list: + added.extend(current_list) + elif previous_list: + deleted.extend(previous_list) + + return added, changed, deleted + + +def _format_fbx_display_names(entries, used_name_counts): + names = [] + for entry in sorted(entries, key=lambda e: (e['basename'], e['hash'])): + base = entry['basename'] + index = used_name_counts.get(base, 0) + if index == 0: + display_name = base + else: + display_name = f'{base}({index})' + used_name_counts[base] = index + 1 + names.append(display_name) + return names + + def generate_fbx_changelog_and_summary(item_data, download_url_list, version_json): """Generates changelog information for FBX-only tracking.""" previous_fbx = version_json.get('fbx-files', {}) or {} @@ -269,44 +344,19 @@ def generate_fbx_changelog_and_summary(item_data, download_url_list, version_jso logger.error(f'An error occurred while parsing {filename}: {e}') logger.debug(traceback.format_exc()) - previous_hashes = {file_hash for file_hash in previous_fbx.values()} - current_hashes = {file_hash for file_hash in current_fbx.values()} - - added = [] - changed = [] - deleted = [] - - previous_remaining = dict(previous_fbx) - current_remaining = dict(current_fbx) - - for name in set(previous_fbx.keys()) & set(current_fbx.keys()): - old_hash = previous_fbx[name] - new_hash = current_fbx[name] - if old_hash != new_hash: - changed.append(name) - previous_remaining.pop(name, None) - current_remaining.pop(name, None) - - for name, new_hash in current_remaining.items(): - if new_hash in previous_hashes: - continue - added.append(name) - - for name, old_hash in previous_remaining.items(): - if old_hash in current_hashes: - continue - deleted.append(name) + added_entries, changed_entries, deleted_entries = _calculate_fbx_diff_by_name_hash(previous_fbx, current_fbx) - if not added and not changed and not deleted: + if not added_entries and not changed_entries and not deleted_entries: logger.info('No FBX hash differences detected; skipping changelog generation.') return None, None, None, False, current_fbx path_list = [] - for name in sorted(added): + used_name_counts = {} + for name in _format_fbx_display_names(added_entries, used_name_counts): path_list.append({'line_str': name, 'status': 1}) - for name in sorted(changed): + for name in _format_fbx_display_names(changed_entries, used_name_counts): path_list.append({'line_str': name, 'status': 3}) - for name in sorted(deleted): + for name in _format_fbx_display_names(deleted_entries, used_name_counts): path_list.append({'line_str': name, 'status': 2}) tree = build_tree(path_list) From 074362ce9b7cfec14d43cb0f911e06d56b58ca02 Mon Sep 17 00:00:00 2001 From: Ogunaru Date: Mon, 23 Feb 2026 13:42:21 +0900 Subject: [PATCH 3/5] =?UTF-8?q?fix:=20postgres=20=EC=9D=98=EC=A1=B4?= =?UTF-8?q?=EC=84=B1=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 780972d..ff76307 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,7 @@ services: - ./changelog:/root/boothchecker/changelog - ./config.json:/root/boothchecker/config.json depends_on: + - porstgres - chrome restart: unless-stopped logging: From 78fec0b0ac348b7b50df267dde3f74ffb704c526 Mon Sep 17 00:00:00 2001 From: Ogunaru Date: Mon, 23 Feb 2026 15:10:41 +0900 Subject: [PATCH 4/5] =?UTF-8?q?fix:=20=EC=95=84=EC=9D=B4=ED=85=9C=20?= =?UTF-8?q?=ED=8C=8C=EC=8B=B1=200=EB=B2=88=EC=A7=B8=20=EC=9D=B4=ED=9B=84?= =?UTF-8?q?=EB=A1=9C=20=EB=90=98=EC=A7=80=20=EC=95=8A=EC=9D=80=20=EB=AC=B8?= =?UTF-8?q?=EC=A0=9C=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- booth_checker/booth.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/booth_checker/booth.py b/booth_checker/booth.py index 3ee1d2e..8d2cb96 100644 --- a/booth_checker/booth.py +++ b/booth_checker/booth.py @@ -9,14 +9,17 @@ def _extract_download_info(div, link_selector, filename_selector): if not download_link or not filename_div: return None - href = download_link.get("data-href") - filename = filename_div.get_text() + href = download_link.get("data-href") or download_link.get("href") + filename = filename_div.get_text(strip=True) - if not href: + if not href or not filename: return None - href = re.sub(r'[^0-9]', '', href) - return [href, filename] + match = re.search(r'/downloadables/(\d+)', href) + if not match: + return None + + return [match.group(1), filename] def _crawling_base(url, cookie, selectors, shortlist, thumblist, product_only_filter=None): response = requests.get(url=url, cookies=cookie) @@ -72,9 +75,19 @@ def crawling(order_num, product_only, cookie, shortlist=None, thumblist=None): 'product_info_selector': 'a', 'product_info_index': 1, 'thumb_selector': 'img', - 'download_item_selector': 'div.legacy-list-item__center, div[data-test="downloadable"]', - 'download_link_selector': 'a.nav-reverse, div.js-download-button', - 'filename_selector': 'div.flex-\\[1\\] b' + 'download_item_selector': ( + 'div.legacy-list-item__center, ' + 'div.mt-16.desktop\\:flex.desktop\\:justify-between.desktop\\:items-center' + ), + 'download_link_selector': ( + 'div.js-download-button[data-test="downloadable"][data-href*="/downloadables/"], ' + 'a.nav-reverse[href*="/downloadables/"]' + ), + 'filename_selector': ( + 'div.min-w-0.u-text-wrap b, ' + 'div.min-w-0.break-words.whitespace-pre-line, ' + 'div.flex-\\[1\\] b' + ) } return _crawling_base(url, cookie, selectors, shortlist, thumblist, product_only_filter=product_only) @@ -85,9 +98,9 @@ def crawling_gift(order_num, cookie, shortlist=None, thumblist=None): 'product_div_class': 'rounded-16 bg-white p-40 mobile:px-16 mobile:pt-24 mobile:pb-40 mobile:rounded-none', 'product_info_selector': 'div.mt-24.text-left a', 'thumb_selector': 'img', - 'download_item_selector': 'div.w-full.text-left, div[data-test="downloadable"]', - 'download_link_selector': 'a.no-underline.flex.items-center.flex.gap-4, div.js-download-button', - 'filename_selector': "div[class='min-w-0 break-words whitespace-pre-line']" + 'download_item_selector': 'div.mt-16.desktop\\:flex.desktop\\:justify-between.desktop\\:items-center', + 'download_link_selector': 'div.js-download-button[data-test="downloadable"][data-href*="/downloadables/"]', + 'filename_selector': 'div.min-w-0.break-words.whitespace-pre-line, div.min-w-0.u-text-wrap b' } return _crawling_base(url, cookie, selectors, shortlist, thumblist) From 0d58a76600748be73fb8b3ba2ba017049cc960df Mon Sep 17 00:00:00 2001 From: Ogunaru Date: Mon, 23 Feb 2026 15:21:14 +0900 Subject: [PATCH 5/5] =?UTF-8?q?fix:=20=EC=95=84=EC=9D=B4=ED=85=9C=EC=9D=B4?= =?UTF-8?q?=20=EC=A0=95=EC=83=81=EC=A0=81=EC=9C=BC=EB=A1=9C=20=EB=93=B1?= =?UTF-8?q?=EB=A1=9D=EB=90=98=EC=A7=80=20=EC=95=8A=EB=8A=94=20=EB=AC=B8?= =?UTF-8?q?=EC=A0=9C=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- booth_discord/booth.py | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/booth_discord/booth.py b/booth_discord/booth.py index fded0a5..c56a97a 100644 --- a/booth_discord/booth.py +++ b/booth_discord/booth.py @@ -4,6 +4,7 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup class BoothCrawler(): @@ -11,6 +12,8 @@ def __init__(self, selenium_url): self.selenium_url = selenium_url def get_booth_order_info(self, item_number, cookie): + wait_timeout_seconds = 30 + chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") @@ -26,32 +29,41 @@ def get_booth_order_info(self, item_number, cookie): driver.refresh() try: - WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, "flex.desktop\\:flex-row.mobile\\:flex-col")) + WebDriverWait(driver, wait_timeout_seconds).until( + EC.presence_of_element_located( + ( + By.CSS_SELECTOR, + "#js-item-order a[href*='/orders/'], #js-item-gift a[href*='/gifts/']" + ) + ) ) html = driver.page_source soup = BeautifulSoup(html, "html.parser") - - product_div = soup.find("div", class_="flex desktop:flex-row mobile:flex-col") - if not product_div: - raise Exception("상품이 존재하지 않거나, 구매하지 않은 상품입니다.") - - order_page = product_div.find("a").get("href") - order_parse = self.parse_url(order_page) + + # Prefer direct purchase order when both order/gift sections are present. + order_link = soup.select_one("#js-item-order a[href*='/orders/']") + if order_link is None: + order_link = soup.select_one("#js-item-gift a[href*='/gifts/']") + if order_link is None: + raise Exception("주문/기프트 링크를 찾지 못했습니다. 쿠키 만료 또는 미구매 상품일 수 있습니다.") + + order_parse = self.parse_url(order_link.get("href", "")) return order_parse - + except TimeoutException as exc: + raise Exception( + f"페이지 로딩이 지연되어 주문 정보를 찾지 못했습니다. ({wait_timeout_seconds}초 대기)" + ) from exc finally: driver.quit() def parse_url(self, url): - # 정규식 정의 - pattern = r"https://(?:accounts\.)?booth\.pm/(orders|gifts)/([\w-]+)" - match = re.match(pattern, url) + pattern = r"(?:https://(?:accounts\.)?booth\.pm)?/(orders|gifts)/([\w-]+)" + match = re.search(pattern, url) if match: gift_flag = match.group(1) == "gifts" # gifts이면 True, orders이면 False order_number = match.group(2) return gift_flag, order_number else: - raise ValueError("URL 형식이 잘못되었습니다.") \ No newline at end of file + raise ValueError("URL 형식이 잘못되었습니다.")