From f96fbb45ea1044a16d83b3c5542db2b2b2e6218d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alfred=20T=C3=A5ng?= Date: Sun, 29 Jun 2025 14:20:08 +0200 Subject: [PATCH] feat: integrate Puppeteer for enhanced web scraping in UntappdModule - Added Puppeteer dependency for improved browser automation. - Refactored _getLatestCheckin method to utilize Puppeteer for navigating and scraping Untappd user pages. - Enhanced error handling for Cloudflare challenges during page loading. - Updated beer rating display logic in BeerizerModule for better clarity. --- bin/modules/beerizer/beerizer_module.dart | 5 ++- bin/modules/beerizer/beerizer_service.dart | 7 +++- bin/modules/untappd/untapped_module.dart | 48 ++++++++++++++++++++-- pubspec.yaml | 1 + 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/bin/modules/beerizer/beerizer_module.dart b/bin/modules/beerizer/beerizer_module.dart index 4a3f8ba..a0a4d5e 100644 --- a/bin/modules/beerizer/beerizer_module.dart +++ b/bin/modules/beerizer/beerizer_module.dart @@ -96,9 +96,12 @@ class BeerizerModule extends BotModule { beerString = 'Woho! New beers are releasing today! :beers:\n\n'; } for (var beer in beers) { + final rating = + beer.untappdRating == 'N/A' ? 'N/A' : '${beer.untappdRating} :star:'; + beerString += '**${beer.name}**\n' '${beer.brewery}\n' - '<:untappd:1333124979386220604> ${beer.untappdRating} :star:\n' + '<:untappd:1333124979386220604> $rating\n' '*${beer.style}*\n' '\n'; } diff --git a/bin/modules/beerizer/beerizer_service.dart b/bin/modules/beerizer/beerizer_service.dart index ba6902e..e4279e6 100644 --- a/bin/modules/beerizer/beerizer_service.dart +++ b/bin/modules/beerizer/beerizer_service.dart @@ -308,7 +308,12 @@ class BeerizerService { String _cleanUpUntappdRating(String rating) { try { - if (rating.length < 5) return 'N/A'; + if (rating.length < 5 || + rating.contains('Missi') || + rating.contains('?') || + rating.contains('N/A')) { + return 'N/A'; + } return rating.trim().substring(0, 5).trim(); } catch (e) { e.recordError( diff --git a/bin/modules/untappd/untapped_module.dart b/bin/modules/untappd/untapped_module.dart index dd88a36..ee083ae 100644 --- a/bin/modules/untappd/untapped_module.dart +++ b/bin/modules/untappd/untapped_module.dart @@ -3,6 +3,7 @@ import 'dart:async'; // import 'package:hive/hive.dart'; import 'package:nyxx/nyxx.dart'; import 'package:nyxx_commands/nyxx_commands.dart'; +import 'package:puppeteer/puppeteer.dart'; import 'package:web_scraper/web_scraper.dart'; import '../../utils/error_monitor.dart'; @@ -444,16 +445,57 @@ class UntappdModule extends BotModule { /// Get latest checkin for given untapped username Future _getLatestCheckin(String untappdUsername) async { + Browser? browser; try { - final webScraper = WebScraper('https://untappd.com'); + print('Untappd: Launching browser...'); + // Launch puppeteer browser + browser = await puppeteer.launch( + headless: true, + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--no-first-run', + '--no-zygote', + '--disable-gpu', + ], + ); - var loadSuccess = await webScraper.loadWebPage('/user/$untappdUsername'); + final page = await browser.newPage(); - if (!loadSuccess) { + // Set user agent to mimic a real browser + await page.setUserAgent( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'); + + print('Untappd: Navigating to Untappd page for $untappdUsername...'); + // Navigate to the untappd user page + final url = 'https://untappd.com/user/$untappdUsername'; + await page.goto(url, wait: Until.networkIdle); + + print('Untappd: Waiting for page to load completely...'); + // Wait a bit longer to ensure Cloudflare challenge completes + await Future.delayed(Duration(seconds: 5)); + + // Check if we're still on a Cloudflare challenge page + final title = await page.title; + if (title?.contains('Just a moment') == true) { + print('Untappd: Still on Cloudflare challenge page, waiting longer...'); + await Future.delayed(Duration(seconds: 10)); + } + + print('Untappd: Getting page content for $untappdUsername...'); + // Get the HTML content after Cloudflare challenge is completed + final body = await page.content ?? ''; + + if (body.isEmpty) { throw Exception( 'Failed to load Untappd page for user $untappdUsername'); } + final webScraper = WebScraper(); + webScraper.loadFromString(body); + final checkins = webScraper.getElementAttribute( 'div#main-stream > *', 'data-checkin-id'); diff --git a/pubspec.yaml b/pubspec.yaml index 685f47e..4769200 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -16,6 +16,7 @@ dependencies: web_scraper: git: https://github.com/oelburk/web_scraper.git sentry: ^9.0.0 + puppeteer: ^3.18.0 dev_dependencies: lints: ^5.1.1