Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 81 additions & 40 deletions booth_checker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import uuid
import logging
import threading
from collections import defaultdict
from datetime import datetime, timedelta
from time import sleep
from concurrent.futures import ThreadPoolExecutor
Expand Down Expand Up @@ -255,6 +256,80 @@ def generate_changelog_and_summary(item_data, download_url_list, version_json):
return changelog_html_path, s3_object_url, summary_result, diff_found, None


def _normalize_fbx_entries(fbx_records):
entries = []
for path_str, file_hash in fbx_records.items():
entries.append({'basename': os.path.basename(path_str), 'hash': file_hash})
return entries


def _calculate_fbx_diff_by_name_hash(previous_fbx, current_fbx):
previous_entries = _normalize_fbx_entries(previous_fbx)
current_entries = _normalize_fbx_entries(current_fbx)

previous_by_key = defaultdict(list)
current_by_key = defaultdict(list)
for entry in previous_entries:
previous_by_key[(entry['basename'], entry['hash'])].append(entry)
for entry in current_entries:
current_by_key[(entry['basename'], entry['hash'])].append(entry)

remaining_previous = []
for key, entries in previous_by_key.items():
current_matches = current_by_key.get(key, [])
match_count = min(len(entries), len(current_matches))
if match_count < len(entries):
remaining_previous.extend(entries[match_count:])
if match_count < len(current_matches):
current_by_key[key] = current_matches[match_count:]
else:
current_by_key[key] = []

remaining_current = []
for entries in current_by_key.values():
remaining_current.extend(entries)

previous_by_name = defaultdict(list)
current_by_name = defaultdict(list)
for entry in remaining_previous:
previous_by_name[entry['basename']].append(entry)
for entry in remaining_current:
current_by_name[entry['basename']].append(entry)

added = []
changed = []
deleted = []

for name in sorted(set(previous_by_name) | set(current_by_name)):
previous_list = sorted(previous_by_name.get(name, []), key=lambda e: e['hash'])
current_list = sorted(current_by_name.get(name, []), key=lambda e: e['hash'])
if previous_list and current_list:
change_count = min(len(previous_list), len(current_list))
changed.extend(current_list[:change_count])
added.extend(current_list[change_count:])
deleted.extend(previous_list[change_count:])
elif current_list:
added.extend(current_list)
elif previous_list:
deleted.extend(previous_list)

return added, changed, deleted


def _format_fbx_display_names(entries, used_name_counts):
names = []
for entry in sorted(entries, key=lambda e: (e['basename'], e['hash'])):
base = entry['basename']
index = used_name_counts.get(base, 0)
if index == 0:
display_name = base
else:
display_name = f'{base}({index})'
used_name_counts[base] = index + 1
names.append(display_name)
return names


def generate_fbx_changelog_and_summary(item_data, download_url_list, version_json):
"""Generates changelog information for FBX-only tracking."""
previous_fbx = version_json.get('fbx-files', {}) or {}
Expand All @@ -269,44 +344,19 @@ def generate_fbx_changelog_and_summary(item_data, download_url_list, version_jso
logger.error(f'An error occurred while parsing {filename}: {e}')
logger.debug(traceback.format_exc())

previous_hashes = {file_hash for file_hash in previous_fbx.values()}
current_hashes = {file_hash for file_hash in current_fbx.values()}

added = []
changed = []
deleted = []

previous_remaining = dict(previous_fbx)
current_remaining = dict(current_fbx)

for name in set(previous_fbx.keys()) & set(current_fbx.keys()):
old_hash = previous_fbx[name]
new_hash = current_fbx[name]
if old_hash != new_hash:
changed.append(name)
previous_remaining.pop(name, None)
current_remaining.pop(name, None)

for name, new_hash in current_remaining.items():
if new_hash in previous_hashes:
continue
added.append(name)

for name, old_hash in previous_remaining.items():
if old_hash in current_hashes:
continue
deleted.append(name)
added_entries, changed_entries, deleted_entries = _calculate_fbx_diff_by_name_hash(previous_fbx, current_fbx)

if not added and not changed and not deleted:
if not added_entries and not changed_entries and not deleted_entries:
logger.info('No FBX hash differences detected; skipping changelog generation.')
return None, None, None, False, current_fbx

path_list = []
for name in sorted(added):
used_name_counts = {}
for name in _format_fbx_display_names(added_entries, used_name_counts):
path_list.append({'line_str': name, 'status': 1})
for name in sorted(changed):
for name in _format_fbx_display_names(changed_entries, used_name_counts):
path_list.append({'line_str': name, 'status': 3})
for name in sorted(deleted):
for name in _format_fbx_display_names(deleted_entries, used_name_counts):
path_list.append({'line_str': name, 'status': 2})

tree = build_tree(path_list)
Expand Down Expand Up @@ -874,15 +924,6 @@ def strftime_now():
while True:
logger.info("BoothChecker cycle started")

# BOOTH Heartbeat check once per cycle
try:
logger.info('Checking BOOTH heartbeat')
requests.get("https://booth.pm", timeout=10)
except requests.RequestException as e:
logger.error(f'BOOTH heartbeat failed: {e}. Skipping this cycle.')
sleep(refresh_interval)
continue

# Recreate temporary folders
recreate_folder("./download")
recreate_folder("./process")
Expand Down
35 changes: 24 additions & 11 deletions booth_checker/booth.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ def _extract_download_info(div, link_selector, filename_selector):
if not download_link or not filename_div:
return None

href = download_link.get("data-href")
filename = filename_div.get_text()
href = download_link.get("data-href") or download_link.get("href")
filename = filename_div.get_text(strip=True)

if not href:
if not href or not filename:
return None

href = re.sub(r'[^0-9]', '', href)
return [href, filename]
match = re.search(r'/downloadables/(\d+)', href)
if not match:
return None

return [match.group(1), filename]

def _crawling_base(url, cookie, selectors, shortlist, thumblist, product_only_filter=None):
response = requests.get(url=url, cookies=cookie)
Expand Down Expand Up @@ -72,9 +75,19 @@ def crawling(order_num, product_only, cookie, shortlist=None, thumblist=None):
'product_info_selector': 'a',
'product_info_index': 1,
'thumb_selector': 'img',
'download_item_selector': 'div.legacy-list-item__center, div[data-test="downloadable"]',
'download_link_selector': 'a.nav-reverse, div.js-download-button',
'filename_selector': 'div.flex-\\[1\\] b'
'download_item_selector': (
'div.legacy-list-item__center, '
'div.mt-16.desktop\\:flex.desktop\\:justify-between.desktop\\:items-center'
),
'download_link_selector': (
'div.js-download-button[data-test="downloadable"][data-href*="/downloadables/"], '
'a.nav-reverse[href*="/downloadables/"]'
),
'filename_selector': (
'div.min-w-0.u-text-wrap b, '
'div.min-w-0.break-words.whitespace-pre-line, '
'div.flex-\\[1\\] b'
)
}
return _crawling_base(url, cookie, selectors, shortlist, thumblist, product_only_filter=product_only)

Expand All @@ -85,9 +98,9 @@ def crawling_gift(order_num, cookie, shortlist=None, thumblist=None):
'product_div_class': 'rounded-16 bg-white p-40 mobile:px-16 mobile:pt-24 mobile:pb-40 mobile:rounded-none',
'product_info_selector': 'div.mt-24.text-left a',
'thumb_selector': 'img',
'download_item_selector': 'div.w-full.text-left, div[data-test="downloadable"]',
'download_link_selector': 'a.no-underline.flex.items-center.flex.gap-4, div.js-download-button',
'filename_selector': "div[class='min-w-0 break-words whitespace-pre-line']"
'download_item_selector': 'div.mt-16.desktop\\:flex.desktop\\:justify-between.desktop\\:items-center',
'download_link_selector': 'div.js-download-button[data-test="downloadable"][data-href*="/downloadables/"]',
'filename_selector': 'div.min-w-0.break-words.whitespace-pre-line, div.min-w-0.u-text-wrap b'
}
return _crawling_base(url, cookie, selectors, shortlist, thumblist)

Expand Down
40 changes: 26 additions & 14 deletions booth_discord/booth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

class BoothCrawler():
def __init__(self, selenium_url):
self.selenium_url = selenium_url

def get_booth_order_info(self, item_number, cookie):
wait_timeout_seconds = 30

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
Expand All @@ -26,32 +29,41 @@ def get_booth_order_info(self, item_number, cookie):
driver.refresh()

try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "flex.desktop\\:flex-row.mobile\\:flex-col"))
WebDriverWait(driver, wait_timeout_seconds).until(
EC.presence_of_element_located(
(
By.CSS_SELECTOR,
"#js-item-order a[href*='/orders/'], #js-item-gift a[href*='/gifts/']"
)
)
)

html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

product_div = soup.find("div", class_="flex desktop:flex-row mobile:flex-col")
if not product_div:
raise Exception("상품이 존재하지 않거나, 구매하지 않은 상품입니다.")

order_page = product_div.find("a").get("href")
order_parse = self.parse_url(order_page)

# Prefer direct purchase order when both order/gift sections are present.
order_link = soup.select_one("#js-item-order a[href*='/orders/']")
if order_link is None:
order_link = soup.select_one("#js-item-gift a[href*='/gifts/']")
if order_link is None:
raise Exception("주문/기프트 링크를 찾지 못했습니다. 쿠키 만료 또는 미구매 상품일 수 있습니다.")

order_parse = self.parse_url(order_link.get("href", ""))
return order_parse

except TimeoutException as exc:
raise Exception(
f"페이지 로딩이 지연되어 주문 정보를 찾지 못했습니다. ({wait_timeout_seconds}초 대기)"
) from exc
finally:
driver.quit()

def parse_url(self, url):
# 정규식 정의
pattern = r"https://(?:accounts\.)?booth\.pm/(orders|gifts)/([\w-]+)"
match = re.match(pattern, url)
pattern = r"(?:https://(?:accounts\.)?booth\.pm)?/(orders|gifts)/([\w-]+)"
match = re.search(pattern, url)

if match:
gift_flag = match.group(1) == "gifts" # gifts이면 True, orders이면 False
order_number = match.group(2)
return gift_flag, order_number
else:
raise ValueError("URL 형식이 잘못되었습니다.")
raise ValueError("URL 형식이 잘못되었습니다.")
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ services:
- ./changelog:/root/boothchecker/changelog
- ./config.json:/root/boothchecker/config.json
depends_on:
- porstgres
- chrome
restart: unless-stopped
logging:
Expand Down