diff --git a/.gitignore b/.gitignore index dbef116..baf0baa 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ doc/api/ *.js_ *.js.deps *.js.map +.flutter-plugins-dependencies diff --git a/bin/config/web_scraping_config.dart b/bin/config/web_scraping_config.dart new file mode 100644 index 0000000..8cd5af6 --- /dev/null +++ b/bin/config/web_scraping_config.dart @@ -0,0 +1,54 @@ +/// Configuration for web scraping operations +class WebScrapingConfig { + /// Maximum number of retry attempts for web scraping operations + static const int maxRetries = 3; + + /// Base delay between retries (will be exponentially increased) + static const Duration baseRetryDelay = Duration(seconds: 2); + + /// Rate limiting delay between requests to avoid being blocked + static const Duration rateLimitDelay = Duration(seconds: 10); + + /// Timeout for web scraping operations + static const Duration timeout = Duration(seconds: 30); + + /// User agent string to use for requests - looks like a regular browser + static const String userAgent = + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'; + + /// Maximum number of concurrent scraping operations + static const int maxConcurrentOperations = 2; + + /// Whether to enable detailed logging + static const bool enableDetailedLogging = true; + + /// Retry delays for different types of failures + static const Map retryDelays = { + 'network': Duration(seconds: 5), + 'rate_limit': Duration(seconds: 30), + 'server_error': Duration(seconds: 10), + 'timeout': Duration(seconds: 15), + }; + + /// URLs that should be treated as rate-limited + static const List rateLimitedDomains = [ + 'untappd.com', + 'beerizer.com', + ]; + + /// Custom delays for specific domains + static const Map domainDelays = { + 'untappd.com': Duration(seconds: 15), + 'beerizer.com': Duration(seconds: 10), + }; + + /// Get delay for a specific domain + static Duration getDelayForDomain(String domain) { + return domainDelays[domain] ?? rateLimitDelay; + } + + /// Check if a domain should be rate-limited + static bool isRateLimitedDomain(String domain) { + return rateLimitedDomains.contains(domain); + } +} diff --git a/bin/main.dart b/bin/main.dart index dbc16ed..3af6422 100644 --- a/bin/main.dart +++ b/bin/main.dart @@ -1,10 +1,17 @@ import 'dart:io'; +import 'package:sentry/sentry.dart'; + import 'beer_bot.dart'; String BOT_TOKEN = Platform.environment['DISCORD_TOKEN'] ?? ''; +String SENTRY_DSN = Platform.environment['SENTRY_DSN'] ?? ''; -void main(List arguments) async { - // Initialize the bot - BeerBot().init(BOT_TOKEN); +Future main(List arguments) async { + await Sentry.init( + (options) { + options.dsn = SENTRY_DSN; + }, + appRunner: () => BeerBot().init(BOT_TOKEN), + ); } diff --git a/bin/modules/beerizer/beerizer_service.dart b/bin/modules/beerizer/beerizer_service.dart index bd3986c..ba6902e 100644 --- a/bin/modules/beerizer/beerizer_service.dart +++ b/bin/modules/beerizer/beerizer_service.dart @@ -1,5 +1,8 @@ +import 'dart:async'; + import 'package:web_scraper/web_scraper.dart'; +import '../../utils/error_monitor.dart'; import 'models/beerizer_beer.dart'; class BeerizerService { @@ -18,111 +21,304 @@ class BeerizerService { /// List of latest beers scraped from Beerizer List get beers => _beers; + /// Maximum number of retry attempts for web scraping operations + static const int _maxRetries = 3; + + /// Base delay between retries (will be exponentially increased) + static const Duration _baseRetryDelay = Duration(seconds: 2); + + /// Scrape beers with retry logic and error handling Future> _scrape(DateTime date) async { var formattedDate = date.toIso8601String().substring(0, 10); - var webScraper = WebScraper(); - await webScraper - .loadFullURL('https://beerizer.com/shop/systembolaget/$formattedDate'); + var url = 'https://beerizer.com/shop/systembolaget/$formattedDate'; + + // Start performance transaction + final transaction = startPerformanceTransaction( + name: 'beerizer_scrape', + operation: 'web_scraping', + description: 'Scrape beers from Beerizer for date $formattedDate', + data: {'date': formattedDate, 'url': url}, + ); + + try { + // Add breadcrumb for context + ErrorMonitor().addBreadcrumb( + message: 'Starting Beerizer scrape', + category: 'scraping', + data: {'date': formattedDate, 'url': url}, + ); + + for (var attempt = 1; attempt <= _maxRetries; attempt++) { + try { + print( + 'Beerizer: Attempting to scrape $url (attempt $attempt/$_maxRetries)'); + + var webScraper = WebScraper(); + var loadSuccess = await webScraper.loadFullURL(url); + + if (!loadSuccess) { + throw Exception('Failed to load URL: $url'); + } + + final checkins = webScraper.getElementAttribute( + 'div.beers > div.beer-table > *', 'data-id'); + + print('Beerizer: Found ${checkins.length} checkins'); + + if (checkins.isEmpty) { + print('Beerizer: No beers found for date $formattedDate'); + return []; + } + + var beers = []; + + for (var latestCheckin in checkins) { + try { + if (latestCheckin == null) continue; + var beer = await _scrapeBeerDetails(webScraper, latestCheckin); + if (beer != null) { + beers.add(beer); + } + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error scraping beer $latestCheckin', + severity: ErrorSeverity.medium, + context: { + 'checkinId': latestCheckin, + 'url': url, + 'attempt': attempt + }, + ); + // Continue with other beers even if one fails + continue; + } + } + + print( + 'Beerizer: Successfully scraped ${beers.length} beers from Beerizer'); + + // Add success breadcrumb + ErrorMonitor().addBreadcrumb( + message: 'Beerizer scrape completed successfully', + category: 'scraping', + data: {'beers_count': beers.length, 'date': formattedDate}, + ); + + return beers; + } catch (e) { + print('Beerizer: Attempt $attempt failed: $e'); + + e.recordError( + source: 'Beerizer', + message: 'Scraping attempt $attempt failed', + severity: attempt == _maxRetries + ? ErrorSeverity.high + : ErrorSeverity.medium, + context: {'attempt': attempt, 'url': url, 'date': formattedDate}, + ); - final checkins = webScraper.getElementAttribute( - 'div.beers > div.beer-table > *', 'data-id'); + if (attempt == _maxRetries) { + print( + 'Beerizer: All retry attempts failed for date $formattedDate'); + return []; + } - print(checkins); + // Exponential backoff + var delay = Duration( + milliseconds: + _baseRetryDelay.inMilliseconds * (1 << (attempt - 1))); + print('Beerizer: Retrying in ${delay.inSeconds} seconds...'); + await Future.delayed(delay); + } + } - if (checkins.isEmpty) { return []; + } finally { + // Finish performance transaction + await transaction?.finish(); } + } - var beers = []; - - for (var latestCheckin in checkins) { + /// Scrape individual beer details with error handling + Future _scrapeBeerDetails( + WebScraper webScraper, String checkinId) async { + try { // Get the name of the beer var beerTitleAddress = - 'div.beers > div.beer-table > div#beer-$latestCheckin > div.beer-inner-top > div.left-col > div.left-col-inner > div.left-col-topper > div.left-top > div.beer-name > a.beer-title > span.title'; + 'div.beers > div.beer-table > div#beer-$checkinId > div.beer-inner-top > div.left-col > div.left-col-inner > div.left-col-topper > div.left-top > div.beer-name > a.beer-title > span.title'; final scrapedName = webScraper.getElementTitle(beerTitleAddress); + if (scrapedName.isEmpty) { + print('Beerizer: Could not find beer name for checkin $checkinId'); + return null; + } final beerName = _cleanUpName(scrapedName.first); // Get the brewery of the beer var beerBreweryAddress = - 'div.beers > div.beer-table > div#beer-$latestCheckin > div.beer-inner-top > div.left-col > div.left-col-inner > div.left-col-topper > div.left-top > div.beer-name > a.beer-title > span.brewery-title'; + 'div.beers > div.beer-table > div#beer-$checkinId > div.beer-inner-top > div.left-col > div.left-col-inner > div.left-col-topper > div.left-top > div.beer-name > a.beer-title > span.brewery-title'; final scrapedBrewery = webScraper.getElementTitle(beerBreweryAddress); - final beerBrewery = _cleanUpName(scrapedBrewery.first); + final beerBrewery = scrapedBrewery.isEmpty + ? 'Unknown Brewery' + : _cleanUpName(scrapedBrewery.first); // Get the price of the beer var beerPriceAdress = - 'div.beers > div.beer-table > div#beer-$latestCheckin > div.beer-inner-top > div.left-col > div.left-col-inner > div.mid-col > div.mid-price-col'; + 'div.beers > div.beer-table > div#beer-$checkinId > div.beer-inner-top > div.left-col > div.left-col-inner > div.mid-col > div.mid-price-col'; final scrapedTitle = webScraper.getElementTitle(beerPriceAdress); - final beerPrice = _cleanUpPrice(scrapedTitle.first); + final beerPrice = + scrapedTitle.isEmpty ? 'N/A' : _cleanUpPrice(scrapedTitle.first); // Get Untappd rating var untappdRatingAddress = - 'div.beers > div.beer-table > div#beer-$latestCheckin > div.beer-inner-top > div.right-col'; + 'div.beers > div.beer-table > div#beer-$checkinId > div.beer-inner-top > div.right-col'; final scrapedUntappdRating = webScraper.getElementTitle(untappdRatingAddress); - final untappdRating = _cleanUpUntappdRating(scrapedUntappdRating.first); + final untappdRating = scrapedUntappdRating.isEmpty + ? 'N/A' + : _cleanUpUntappdRating(scrapedUntappdRating.first); // Get style of the beer var beerStyleAddress = - 'div.beers > div.beer-table > div#beer-$latestCheckin > div.beer-inner-top > div.right-col'; + 'div.beers > div.beer-table > div#beer-$checkinId > div.beer-inner-top > div.right-col'; final scrapedStyle = webScraper.getElementTitle(beerStyleAddress); - final beerStyle = _cleanUpStyle(scrapedStyle.first); + final beerStyle = scrapedStyle.isEmpty + ? 'Unknown Style' + : _cleanUpStyle(scrapedStyle.first); - var value = BeerizerBeer( + return BeerizerBeer( name: beerName, brewery: beerBrewery, price: beerPrice, untappdRating: untappdRating, style: beerStyle, ); - beers.add(value); + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error scraping beer details for $checkinId', + severity: ErrorSeverity.medium, + context: {'checkinId': checkinId}, + ); + return null; } - print('Scraped ${beers.length} beers from Beerizer'); - return beers; } /// Scrape the given date's beers from Beerizer Future scrapeBeer(DateTime date) async { - _beers = await _scrape(date); + try { + _beers = await _scrape(date); + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error in scrapeBeer', + severity: ErrorSeverity.high, + context: {'date': date.toIso8601String()}, + ); + print('Beerizer: Error in scrapeBeer: $e'); + _beers = []; + } } Future> quickScrape(String date) async { - return await _scrape(DateTime.parse(date)); + try { + return await _scrape(DateTime.parse(date)); + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error in quickScrape', + severity: ErrorSeverity.high, + context: {'date': date}, + ); + print('Beerizer: Error in quickScrape: $e'); + return []; + } } String _cleanUpStyle(String style) { - var onlyStyle = style.trim(); - final stringlist = onlyStyle.split('\n'); + try { + var onlyStyle = style.trim(); + final stringlist = onlyStyle.split('\n'); - onlyStyle = stringlist[17].trimLeft(); - if (onlyStyle.isEmpty) { - onlyStyle = stringlist[14].trimLeft(); - } + if (stringlist.length > 17) { + onlyStyle = stringlist[17].trimLeft(); + } else if (stringlist.length > 14) { + onlyStyle = stringlist[14].trimLeft(); + } else { + onlyStyle = 'Unknown Style'; + } - return onlyStyle; + return onlyStyle.isEmpty ? 'Unknown Style' : onlyStyle; + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error cleaning up style', + severity: ErrorSeverity.low, + context: {'style': style}, + ); + print('Beerizer: Error cleaning up style: $e'); + return 'Unknown Style'; + } } String _cleanUpName(String name) { - var onlyPrice = name.trim(); - onlyPrice = onlyPrice.replaceAll('\n', '').trim(); + try { + var onlyPrice = name.trim(); + onlyPrice = onlyPrice.replaceAll('\n', '').trim(); - final firstWhitespace = onlyPrice.indexOf(' '); - if (firstWhitespace != -1) { - onlyPrice = onlyPrice.substring(0, firstWhitespace + 1) + - onlyPrice.substring(firstWhitespace + 1).replaceAll(' ', ''); - } + final firstWhitespace = onlyPrice.indexOf(' '); + if (firstWhitespace != -1) { + onlyPrice = onlyPrice.substring(0, firstWhitespace + 1) + + onlyPrice.substring(firstWhitespace + 1).replaceAll(' ', ''); + } - return onlyPrice; + return onlyPrice.isEmpty ? 'Unknown Beer' : onlyPrice; + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error cleaning up name', + severity: ErrorSeverity.low, + context: {'name': name}, + ); + print('Beerizer: Error cleaning up name: $e'); + return 'Unknown Beer'; + } } String _cleanUpPrice(String price) { - final onlyPrice = price.trim().substring(3); - final firstWhitespace = onlyPrice.indexOf(' '); + try { + if (price.length < 3) return 'N/A'; + final onlyPrice = price.trim().substring(3); + final firstWhitespace = onlyPrice.indexOf(' '); - return onlyPrice.substring(0, firstWhitespace - 1).trim(); + if (firstWhitespace == -1) return onlyPrice.trim(); + return onlyPrice.substring(0, firstWhitespace - 1).trim(); + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error cleaning up price', + severity: ErrorSeverity.low, + context: {'price': price}, + ); + print('Beerizer: Error cleaning up price: $e'); + return 'N/A'; + } } String _cleanUpUntappdRating(String rating) { - return rating.trim().substring(0, 5).trim(); + try { + if (rating.length < 5) return 'N/A'; + return rating.trim().substring(0, 5).trim(); + } catch (e) { + e.recordError( + source: 'Beerizer', + message: 'Error cleaning up Untappd rating', + severity: ErrorSeverity.low, + context: {'rating': rating}, + ); + print('Beerizer: Error cleaning up Untappd rating: $e'); + return 'N/A'; + } } } diff --git a/bin/modules/untappd/untapped_module.dart b/bin/modules/untappd/untapped_module.dart index ed68c1b..dd88a36 100644 --- a/bin/modules/untappd/untapped_module.dart +++ b/bin/modules/untappd/untapped_module.dart @@ -5,6 +5,7 @@ import 'package:nyxx/nyxx.dart'; import 'package:nyxx_commands/nyxx_commands.dart'; import 'package:web_scraper/web_scraper.dart'; +import '../../utils/error_monitor.dart'; import '../bot_module.dart'; import 'models/untappd_checkin.dart'; import 'repository/data_repository.dart'; @@ -23,6 +24,15 @@ class UntappdModule extends BotModule { bool persistData = true; + /// Maximum number of retry attempts for web scraping operations + static const int _maxRetries = 3; + + /// Base delay between retries (will be exponentially increased) + static const Duration _baseRetryDelay = Duration(seconds: 5); + + /// Rate limiting delay between requests to avoid being blocked + static const Duration _rateLimitDelay = Duration(seconds: 10); + factory UntappdModule() { return _singleton; } @@ -44,6 +54,16 @@ class UntappdModule extends BotModule { // Set module as initialized _isInitialized = true; + + // Add breadcrumb for module initialization + ErrorMonitor().addBreadcrumb( + message: 'Untappd module initialized', + category: 'module', + data: { + 'updateInterval': updateInterval.inMinutes, + 'persistData': persistData + }, + ); } @override @@ -100,84 +120,242 @@ class UntappdModule extends BotModule { /// Fetches and updates untappd checkins for all users void _checkUntappd() async { if (!_isInitialized) { - print('Untappd module not initialized!'); - throw Exception('Untappd module not initialized!'); + print('Untappd: Module not initialized!'); + return; } - final listOfUsers = await _repository.getUserList(); - final latestCheckins = await _repository.getLatestCheckins(); - final updateChannelId = await _repository.getUpdateChannelId(); + // Start performance transaction + final transaction = startPerformanceTransaction( + name: 'untappd_check', + operation: 'web_scraping', + description: 'Check for new Untappd checkins', + ); - if (updateChannelId == null) { - print('No channel available for updates!'); - return; - } + try { + final listOfUsers = await _repository.getUserList(); + final latestCheckins = await _repository.getLatestCheckins(); + final updateChannelId = await _repository.getUpdateChannelId(); - if (listOfUsers.isEmpty) { - print('No users available to scrape!'); - return; - } + if (updateChannelId == null) { + print('Untappd: No channel available for updates!'); + return; + } - listOfUsers.forEach((untappdUsername, userSnowflake) async { - var latestCheckinDisk = latestCheckins[untappdUsername]; - try { - var latestCheckinUntappd = await _getLatestCheckin(untappdUsername); - - // If a new ID is available, post update! - if (latestCheckinUntappd != null && - latestCheckinDisk != latestCheckinUntappd.id) { - // Update latest saved checkin - latestCheckins[untappdUsername] = latestCheckinUntappd.id; - await _repository.setLatestCheckins(latestCheckins); - - // Build update message with info from untappd checkin - var user = await _bot.users.fetch(Snowflake(userSnowflake)); - var embedBuilder = EmbedBuilder(); - embedBuilder.title = '${user.username} is drinking beer!'; - embedBuilder.url = Uri.parse( - _getCheckinUrl(latestCheckinUntappd.id, untappdUsername)); - embedBuilder.description = latestCheckinUntappd.title; - embedBuilder.fields = []; - - if (latestCheckinUntappd.comment.isNotEmpty) { - embedBuilder.fields?.add(EmbedFieldBuilder( - name: 'Comment', - value: latestCheckinUntappd.comment, - isInline: false)); - } + if (listOfUsers.isEmpty) { + print('Untappd: No users available to scrape!'); + return; + } - if (latestCheckinUntappd.rating.isNotEmpty) { - final rating = double.parse(latestCheckinUntappd.rating); - embedBuilder.fields!.add( - EmbedFieldBuilder( - name: 'Rating', - value: _buildRatingEmoji( - rating, - ), - isInline: true, - ), - ); - } + print('Untappd: Starting check for ${listOfUsers.length} users'); + + // Add breadcrumb for context + ErrorMonitor().addBreadcrumb( + message: 'Starting Untappd check', + category: 'scraping', + data: {'users_count': listOfUsers.length}, + ); + + for (var entry in listOfUsers.entries) { + var untappdUsername = entry.key; + var userSnowflake = entry.value; + var latestCheckinDisk = latestCheckins[untappdUsername]; + + try { + print('Untappd: Checking user $untappdUsername'); + var latestCheckinUntappd = + await _getLatestCheckinWithRetry(untappdUsername); - if (latestCheckinUntappd.photoAddress != null) { - embedBuilder.image = EmbedImageBuilder( - url: Uri.parse(latestCheckinUntappd.photoAddress!)); + // If a new ID is available, post update! + if (latestCheckinUntappd != null && + latestCheckinDisk != latestCheckinUntappd.id) { + print('Untappd: New checkin found for $untappdUsername'); + + // Update latest saved checkin + latestCheckins[untappdUsername] = latestCheckinUntappd.id; + await _repository.setLatestCheckins(latestCheckins); + + await _postCheckinUpdate(latestCheckinUntappd, untappdUsername, + userSnowflake, updateChannelId); } - // Get channel used for untappd updates, previously set by discord admin. - var updateChannel = await _bot.channels - .fetch(Snowflake(updateChannelId)) as PartialTextChannel; + // Rate limiting delay between users + await Future.delayed(_rateLimitDelay); + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error processing user $untappdUsername', + severity: ErrorSeverity.medium, + context: { + 'username': untappdUsername, + 'userSnowflake': userSnowflake + }, + userId: userSnowflake.toString(), + ); + // Continue with other users even if one fails + continue; + } + } + + print('Untappd: Completed check for all users'); + + // Add success breadcrumb + ErrorMonitor().addBreadcrumb( + message: 'Untappd check completed successfully', + category: 'scraping', + data: {'users_processed': listOfUsers.length}, + ); + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error in _checkUntappd', + severity: ErrorSeverity.high, + ); + print('Untappd: Error in _checkUntappd: $e'); + } finally { + // Finish performance transaction + await transaction?.finish(); + } + } - // Send update message - await updateChannel - .sendMessage(MessageBuilder(embeds: [embedBuilder])); + /// Post checkin update to Discord channel + Future _postCheckinUpdate(UntappdCheckin checkin, String username, + int userSnowflake, int updateChannelId) async { + try { + // Build update message with info from untappd checkin + var user = await _bot.users.fetch(Snowflake(userSnowflake)); + var embedBuilder = EmbedBuilder(); + embedBuilder.title = '${user.username} is drinking beer!'; + embedBuilder.url = Uri.parse(_getCheckinUrl(checkin.id, username)); + embedBuilder.description = checkin.title; + embedBuilder.fields = []; + + if (checkin.comment.isNotEmpty) { + embedBuilder.fields?.add(EmbedFieldBuilder( + name: 'Comment', value: checkin.comment, isInline: false)); + } + + if (checkin.rating.isNotEmpty && checkin.rating != '0') { + try { + final rating = double.parse(checkin.rating); + embedBuilder.fields!.add( + EmbedFieldBuilder( + name: 'Rating', + value: _buildRatingEmoji(rating), + isInline: true, + ), + ); + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error parsing rating', + severity: ErrorSeverity.low, + context: {'rating': checkin.rating, 'username': username}, + ); + print('Untappd: Error parsing rating: $e'); } - // Sleep 5 seconds per user to avoid suspicious requests to untappd server - await Future.delayed(Duration(seconds: 5)); + } + + if (checkin.photoAddress != null) { + embedBuilder.image = + EmbedImageBuilder(url: Uri.parse(checkin.photoAddress!)); + } + + // Get channel used for untappd updates, previously set by discord admin. + var updateChannel = await _bot.channels.fetch(Snowflake(updateChannelId)) + as PartialTextChannel; + + // Send update message + await updateChannel.sendMessage(MessageBuilder(embeds: [embedBuilder])); + + print('Untappd: Posted update for $username'); + + // Add breadcrumb for successful post + ErrorMonitor().addBreadcrumb( + message: 'Posted Untappd update', + category: 'discord', + data: {'username': username, 'checkinId': checkin.id}, + ); + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error posting update for $username', + severity: ErrorSeverity.medium, + context: {'username': username, 'checkinId': checkin.id}, + userId: userSnowflake.toString(), + ); + print('Untappd: Error posting update for $username: $e'); + } + } + + /// Get latest checkin with retry logic + Future _getLatestCheckinWithRetry( + String untappdUsername) async { + for (var attempt = 1; attempt <= _maxRetries; attempt++) { + try { + print( + 'Untappd: Attempting to get latest checkin for $untappdUsername (attempt $attempt/$_maxRetries)'); + return await _getLatestCheckin(untappdUsername); } catch (e) { - print(e.toString()); + print('Untappd: Attempt $attempt failed for $untappdUsername: $e'); + + e.recordError( + source: 'Untappd', + message: 'Get latest checkin attempt $attempt failed', + severity: attempt == _maxRetries + ? ErrorSeverity.high + : ErrorSeverity.medium, + context: {'username': untappdUsername, 'attempt': attempt}, + ); + + if (attempt == _maxRetries) { + print('Untappd: All retry attempts failed for $untappdUsername'); + return null; + } + + // Exponential backoff + var delay = Duration( + milliseconds: + _baseRetryDelay.inMilliseconds * (1 << (attempt - 1))); + print('Untappd: Retrying in ${delay.inSeconds} seconds...'); + await Future.delayed(delay); + } + } + return null; + } + + /// Check validity of the username provided with retry logic + Future _isValidUsernameWithRetry(String untappdUsername) async { + for (var attempt = 1; attempt <= _maxRetries; attempt++) { + try { + print( + 'Untappd: Validating username $untappdUsername (attempt $attempt/$_maxRetries)'); + return await _isValidUsername(untappdUsername); + } catch (e) { + print( + 'Untappd: Username validation attempt $attempt failed for $untappdUsername: $e'); + + e.recordError( + source: 'Untappd', + message: 'Username validation attempt $attempt failed', + severity: attempt == _maxRetries + ? ErrorSeverity.high + : ErrorSeverity.medium, + context: {'username': untappdUsername, 'attempt': attempt}, + ); + + if (attempt == _maxRetries) { + print('Untappd: All validation attempts failed for $untappdUsername'); + return false; + } + + // Exponential backoff + var delay = Duration( + milliseconds: + _baseRetryDelay.inMilliseconds * (1 << (attempt - 1))); + await Future.delayed(delay); } - }); + } + return false; } /// Builds the rating emoji string @@ -193,17 +371,37 @@ class UntappdModule extends BotModule { Future _regUntappdUser( Snowflake userSnowflake, String untappdUsername) async { try { - if (!await _isValidUsername(untappdUsername)) { - print('No checkins available for user, ignoring add.'); + if (!await _isValidUsernameWithRetry(untappdUsername)) { + print( + 'Untappd: No checkins available for user $untappdUsername, ignoring add.'); return false; } var currentList = await _repository.getUserList(); currentList[untappdUsername] = userSnowflake.value; await _repository.setUserList(currentList); - print('Saved ${currentList.toString()} to Hive box!'); + print('Untappd: Saved user $untappdUsername to repository!'); + + // Add breadcrumb for successful registration + ErrorMonitor().addBreadcrumb( + message: 'User registered successfully', + category: 'user', + data: {'username': untappdUsername, 'userId': userSnowflake.value}, + ); + return true; } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error registering user $untappdUsername', + severity: ErrorSeverity.medium, + context: { + 'username': untappdUsername, + 'userSnowflake': userSnowflake.value + }, + userId: userSnowflake.value.toString(), + ); + print('Untappd: Error registering user $untappdUsername: $e'); return false; } } @@ -212,67 +410,105 @@ class UntappdModule extends BotModule { /// /// Will return **true** if given username has at least one checkin on Untappd. Future _isValidUsername(String untappdUsername) async { - final webScraper = WebScraper('https://untappd.com'); - if (await webScraper.loadWebPage('/user/$untappdUsername')) { + try { + final webScraper = WebScraper('https://untappd.com'); + var loadSuccess = await webScraper.loadWebPage('/user/$untappdUsername'); + + if (!loadSuccess) { + throw Exception( + 'Failed to load Untappd page for user $untappdUsername'); + } + final checkins = webScraper.getElementAttribute( 'div#main-stream > *', 'data-checkin-id'); if (checkins.isEmpty) { + print('Untappd: No checkins found for user $untappdUsername'); return false; } + + print( + 'Untappd: Found ${checkins.length} checkins for user $untappdUsername'); return true; - } else { - throw 'Error during fetching of Untappd data'; + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error validating username $untappdUsername', + severity: ErrorSeverity.medium, + context: {'username': untappdUsername}, + ); + print('Untappd: Error validating username $untappdUsername: $e'); + rethrow; } } /// Get latest checkin for given untapped username Future _getLatestCheckin(String untappdUsername) async { - final webScraper = WebScraper('https://untappd.com'); + try { + final webScraper = WebScraper('https://untappd.com'); + + var loadSuccess = await webScraper.loadWebPage('/user/$untappdUsername'); + + if (!loadSuccess) { + throw Exception( + 'Failed to load Untappd page for user $untappdUsername'); + } - if (await webScraper.loadWebPage('/user/$untappdUsername')) { final checkins = webScraper.getElementAttribute( 'div#main-stream > *', 'data-checkin-id'); if (checkins.isEmpty) { - throw 'No checkins are available for $untappdUsername'; + print('Untappd: No checkins are available for $untappdUsername'); + return null; } - var latestCheckin = checkins.first!; + var latestCheckin = checkins.first; + if (latestCheckin == null) { + print('Untappd: Latest checkin is null for $untappdUsername'); + return null; + } - var baseCheckinAddress = - 'div#main-stream > #checkin_$latestCheckin > div.checkin > div.top'; + var baseCheckinAddress = 'div#main-stream > #checkin_$latestCheckin'; - final checkinTitleElement = - webScraper.getElementTitle('$baseCheckinAddress > p.text'); + final checkinTitleElement = webScraper.getElementAttribute( + '$baseCheckinAddress > div > div.checkin > div.top > a > img', 'alt'); final checkinTitle = - checkinTitleElement.isEmpty ? '' : checkinTitleElement.first.trim(); + checkinTitleElement.isEmpty ? '' : checkinTitleElement.first; final checkinRatingElement = webScraper.getElement( - '$baseCheckinAddress > div.checkin-comment > div.rating-serving > div.caps ', + '$baseCheckinAddress > div > div.checkin > div.top > div > div.rating-serving > div', ['data-rating']); final String checkinRating = checkinRatingElement.isEmpty ? '0' - : checkinRatingElement.first['attributes']['data-rating']; + : checkinRatingElement.first['attributes']['data-rating'] ?? '0'; - final checkinCommentElement = webScraper.getElementTitle( - '$baseCheckinAddress > div.checkin-comment > p.comment-text'); + final checkinCommentElement = + webScraper.getElementTitle('#translate_$latestCheckin'); final checkinComment = checkinCommentElement.isEmpty ? '' : checkinCommentElement.first.trim(); final photo = webScraper.getElementAttribute( - '$baseCheckinAddress > p.photo > a > img', 'src'); + '$baseCheckinAddress > div > div.checkin > div.top > p.photo > a > img', + 'src'); final checkinPhotoAddress = photo.isNotEmpty ? photo.first : null; return UntappdCheckin( id: latestCheckin, - title: checkinTitle, + title: checkinTitle ?? '', rating: checkinRating, comment: checkinComment, photoAddress: checkinPhotoAddress); + } catch (e) { + e.recordError( + source: 'Untappd', + message: 'Error getting latest checkin for $untappdUsername', + severity: ErrorSeverity.medium, + context: {'username': untappdUsername}, + ); + print('Untappd: Error getting latest checkin for $untappdUsername: $e'); + rethrow; } - return null; } /// Get untappd detailed checkin URL diff --git a/bin/utils/error_monitor.dart b/bin/utils/error_monitor.dart new file mode 100644 index 0000000..cfa9c08 --- /dev/null +++ b/bin/utils/error_monitor.dart @@ -0,0 +1,463 @@ +import 'dart:async'; + +import 'package:sentry/sentry.dart'; + +/// Error severity levels +enum ErrorSeverity { low, medium, high, critical } + +/// Error monitoring and tracking system with Sentry integration +class ErrorMonitor { + static final ErrorMonitor _instance = ErrorMonitor._internal(); + factory ErrorMonitor() => _instance; + ErrorMonitor._internal(); + + /// Track errors by type and source + final Map> _errorHistory = {}; + + /// Maximum number of errors to keep in history + static const int maxErrorHistory = 100; + + /// Whether Sentry is available + bool get isSentryAvailable => Sentry.isEnabled; + + /// Record an error with Sentry integration + void recordError({ + required String source, + required String message, + required dynamic error, + ErrorSeverity severity = ErrorSeverity.medium, + Map? context, + String? userId, + String? transactionName, + }) { + final record = ErrorRecord( + timestamp: DateTime.now(), + source: source, + message: message, + error: error, + severity: severity, + context: context ?? {}, + ); + + // Store in local history + _storeErrorRecord(record); + + // Send to Sentry if available + _sendToSentry(record, userId: userId, transactionName: transactionName); + + // Log locally + _logError(record); + } + + /// Store error record in local history + void _storeErrorRecord(ErrorRecord record) { + if (!_errorHistory.containsKey(record.source)) { + _errorHistory[record.source] = []; + } + + _errorHistory[record.source]!.add(record); + + // Keep only the latest errors + if (_errorHistory[record.source]!.length > maxErrorHistory) { + _errorHistory[record.source]!.removeAt(0); + } + } + + /// Send error to Sentry + void _sendToSentry(ErrorRecord record, + {String? userId, String? transactionName}) { + if (!isSentryAvailable) { + print('Sentry: Not available, skipping Sentry reporting'); + return; + } + + try { + // Create Sentry event + final event = SentryEvent( + message: SentryMessage(record.message), + level: _mapSeverityToSentryLevel(record.severity), + tags: { + 'source': record.source, + 'severity': record.severity.name, + }, + timestamp: record.timestamp, + // ignore: deprecated_member_use + extra: { + 'context': record.context, + }, + user: userId != null ? SentryUser(id: userId) : null, + ); + + // Add breadcrumb for better context + Sentry.addBreadcrumb( + Breadcrumb( + message: 'Error in ${record.source}', + category: 'error', + level: _mapSeverityToSentryLevel(record.severity), + data: { + 'message': record.message, + 'context': record.context, + }, + ), + ); + + // Capture the event + Sentry.captureEvent(event); + + print('Sentry: Error sent to Sentry successfully'); + } catch (e) { + print('Sentry: Failed to send error to Sentry: $e'); + } + } + + /// Map internal severity to Sentry level + SentryLevel _mapSeverityToSentryLevel(ErrorSeverity severity) { + switch (severity) { + case ErrorSeverity.low: + return SentryLevel.info; + case ErrorSeverity.medium: + return SentryLevel.warning; + case ErrorSeverity.high: + return SentryLevel.error; + case ErrorSeverity.critical: + return SentryLevel.fatal; + } + } + + /// Start a performance transaction + ISentrySpan? startTransaction({ + required String name, + required String operation, + String? description, + Map? data, + }) { + if (!isSentryAvailable) { + print('Sentry: Not available, skipping transaction'); + return null; + } + + try { + final transaction = Sentry.startTransaction( + name, + operation, + description: description, + bindToScope: true, + ); + + if (data != null) { + transaction.setData('custom_data', data); + } + + print('Sentry: Started transaction: $name'); + return transaction; + } catch (e) { + print('Sentry: Failed to start transaction: $e'); + return null; + } + } + + /// Add breadcrumb for better context + void addBreadcrumb({ + required String message, + required String category, + ErrorSeverity severity = ErrorSeverity.low, + Map? data, + }) { + if (!isSentryAvailable) return; + + try { + Sentry.addBreadcrumb( + Breadcrumb( + message: message, + category: category, + level: _mapSeverityToSentryLevel(severity), + data: data, + ), + ); + } catch (e) { + print('Sentry: Failed to add breadcrumb: $e'); + } + } + + /// Set user context for better error tracking + void setUserContext(String userId, {String? username, String? email}) { + if (!isSentryAvailable) return; + + try { + Sentry.configureScope((scope) { + scope.setUser(SentryUser( + id: userId, + username: username, + email: email, + )); + }); + print('Sentry: User context set for $userId'); + } catch (e) { + print('Sentry: Failed to set user context: $e'); + } + } + + /// Set tag for filtering and grouping + void setTag(String key, String value) { + if (!isSentryAvailable) return; + + try { + Sentry.configureScope((scope) { + scope.setTag(key, value); + }); + } catch (e) { + print('Sentry: Failed to set tag: $e'); + } + } + + /// Set extra data for additional context + void setExtra(String key, dynamic value) { + if (!isSentryAvailable) return; + + try { + Sentry.configureScope((scope) { + // ignore: deprecated_member_use + scope.setExtra(key, value); + }); + } catch (e) { + print('Sentry: Failed to set extra data: $e'); + } + } + + /// Log error with appropriate formatting + void _logError(ErrorRecord record) { + final severityEmoji = _getSeverityEmoji(record.severity); + final timestamp = record.timestamp.toIso8601String(); + + print('$severityEmoji [${record.source}] $timestamp: ${record.message}'); + + if (record.error != null) { + print(' Error: ${record.error}'); + } + + if (record.context.isNotEmpty) { + print(' Context: ${record.context}'); + } + } + + /// Get emoji for severity level + String _getSeverityEmoji(ErrorSeverity severity) { + switch (severity) { + case ErrorSeverity.low: + return 'ā„¹ļø'; + case ErrorSeverity.medium: + return 'āš ļø'; + case ErrorSeverity.high: + return '🚨'; + case ErrorSeverity.critical: + return 'šŸ’„'; + } + } + + /// Get error statistics for a source + Map getErrorStats(String source) { + final errors = _errorHistory[source] ?? []; + + if (errors.isEmpty) { + return { + 'total_errors': 0, + 'recent_errors': 0, + 'severity_breakdown': {}, + 'most_common_errors': [], + 'sentry_enabled': isSentryAvailable, + }; + } + + final now = DateTime.now(); + final recentErrors = + errors.where((e) => now.difference(e.timestamp).inHours < 24).length; + + final severityBreakdown = {}; + final errorMessages = {}; + + for (final error in errors) { + severityBreakdown[error.severity.name] = + (severityBreakdown[error.severity.name] ?? 0) + 1; + + errorMessages[error.message] = (errorMessages[error.message] ?? 0) + 1; + } + + final mostCommonErrors = errorMessages.entries.toList() + ..sort((a, b) => b.value.compareTo(a.value)); + + return { + 'total_errors': errors.length, + 'recent_errors': recentErrors, + 'severity_breakdown': severityBreakdown, + 'most_common_errors': mostCommonErrors + .take(5) + .map((e) => { + 'message': e.key, + 'count': e.value, + }) + .toList(), + 'sentry_enabled': isSentryAvailable, + }; + } + + /// Get all error statistics + Map> getAllErrorStats() { + final stats = >{}; + + for (final source in _errorHistory.keys) { + stats[source] = getErrorStats(source); + } + + return stats; + } + + /// Clear error history for a source + void clearErrorHistory(String source) { + _errorHistory.remove(source); + } + + /// Clear all error history + void clearAllErrorHistory() { + _errorHistory.clear(); + } + + /// Check if a source has too many recent errors + bool hasTooManyRecentErrors( + String source, { + int threshold = 10, + Duration window = const Duration(hours: 1), + }) { + final errors = _errorHistory[source] ?? []; + final now = DateTime.now(); + + final recentErrors = + errors.where((e) => now.difference(e.timestamp) < window).length; + + return recentErrors >= threshold; + } + + /// Get recommendations based on error patterns + List getRecommendations(String source) { + final stats = getErrorStats(source); + final recommendations = []; + + if (stats['recent_errors'] > 5) { + recommendations.add( + 'High error rate detected. Consider implementing circuit breaker pattern.'); + } + + final severityBreakdown = + stats['severity_breakdown'] as Map; + if ((severityBreakdown['critical'] ?? 0) > 0) { + recommendations + .add('Critical errors detected. Immediate attention required.'); + } + + if ((severityBreakdown['high'] ?? 0) > 3) { + recommendations.add( + 'Multiple high-severity errors. Review error handling and retry logic.'); + } + + if (!stats['sentry_enabled']) { + recommendations.add( + 'Sentry is not enabled. Consider enabling for better error tracking.'); + } + + return recommendations; + } + + /// Print error summary + void printErrorSummary() { + print('\n=== Error Summary ==='); + print('Sentry Status: ${isSentryAvailable ? "āœ… Enabled" : "āŒ Disabled"}'); + + for (final entry in _errorHistory.entries) { + final source = entry.key; + final stats = getErrorStats(source); + final recommendations = getRecommendations(source); + + print('\nšŸ“Š $source:'); + print(' Total errors: ${stats['total_errors']}'); + print(' Recent errors (24h): ${stats['recent_errors']}'); + + if (recommendations.isNotEmpty) { + print(' šŸ’” Recommendations:'); + for (final rec in recommendations) { + print(' - $rec'); + } + } + } + + print('\n===================\n'); + } + + /// Flush Sentry events (useful before shutdown) + Future flush() async { + if (!isSentryAvailable) return; + + try { + await Sentry.close(); + print('Sentry: Events flushed successfully'); + } catch (e) { + print('Sentry: Failed to flush events: $e'); + } + } +} + +/// Error record for tracking +class ErrorRecord { + final DateTime timestamp; + final String source; + final String message; + final dynamic error; + final ErrorSeverity severity; + final Map context; + + ErrorRecord({ + required this.timestamp, + required this.source, + required this.message, + required this.error, + required this.severity, + required this.context, + }); +} + +/// Extension to easily record errors with Sentry +extension ErrorRecording on Object { + void recordError({ + required String source, + required String message, + ErrorSeverity severity = ErrorSeverity.medium, + Map? context, + String? userId, + String? transactionName, + }) { + ErrorMonitor().recordError( + source: source, + message: message, + error: this, + severity: severity, + context: context, + userId: userId, + transactionName: transactionName, + ); + } +} + +/// Extension for performance monitoring +extension PerformanceMonitoring on Object { + ISentrySpan? startPerformanceTransaction({ + required String name, + required String operation, + String? description, + Map? data, + }) { + return ErrorMonitor().startTransaction( + name: name, + operation: operation, + description: description, + data: data, + ); + } +} diff --git a/bin/utils/web_scraper_utils.dart b/bin/utils/web_scraper_utils.dart new file mode 100644 index 0000000..bc40a35 --- /dev/null +++ b/bin/utils/web_scraper_utils.dart @@ -0,0 +1,192 @@ +import 'dart:async'; + +import 'package:web_scraper/web_scraper.dart'; + +/// Utility class for robust web scraping operations +class WebScraperUtils { + /// Maximum number of retry attempts for web scraping operations + static const int defaultMaxRetries = 3; + + /// Base delay between retries (will be exponentially increased) + static const Duration defaultBaseRetryDelay = Duration(seconds: 2); + + /// Default rate limiting delay between requests + static const Duration defaultRateLimitDelay = Duration(seconds: 5); + + /// Scrape a web page with retry logic and error handling + static Future scrapePageWithRetry( + String baseUrl, + String path, { + int maxRetries = defaultMaxRetries, + Duration baseRetryDelay = defaultBaseRetryDelay, + String? logPrefix, + }) async { + final prefix = logPrefix ?? 'WebScraper'; + + for (var attempt = 1; attempt <= maxRetries; attempt++) { + try { + print( + '$prefix: Attempting to scrape $baseUrl$path (attempt $attempt/$maxRetries)'); + + var webScraper = WebScraper(baseUrl); + var loadSuccess = await webScraper.loadWebPage(path); + + if (!loadSuccess) { + throw Exception('Failed to load URL: $baseUrl$path'); + } + + print('$prefix: Successfully loaded page'); + return webScraper; + } catch (e) { + print('$prefix: Attempt $attempt failed: $e'); + + if (attempt == maxRetries) { + print('$prefix: All retry attempts failed for $baseUrl$path'); + return null; + } + + // Exponential backoff + var delay = Duration( + milliseconds: baseRetryDelay.inMilliseconds * (1 << (attempt - 1))); + print('$prefix: Retrying in ${delay.inSeconds} seconds...'); + await Future.delayed(delay); + } + } + + return null; + } + + /// Scrape a full URL with retry logic and error handling + static Future scrapeFullUrlWithRetry( + String url, { + int maxRetries = defaultMaxRetries, + Duration baseRetryDelay = defaultBaseRetryDelay, + String? logPrefix, + }) async { + final prefix = logPrefix ?? 'WebScraper'; + + for (var attempt = 1; attempt <= maxRetries; attempt++) { + try { + print( + '$prefix: Attempting to scrape $url (attempt $attempt/$maxRetries)'); + + var webScraper = WebScraper(); + var loadSuccess = await webScraper.loadFullURL(url); + + if (!loadSuccess) { + throw Exception('Failed to load URL: $url'); + } + + print('$prefix: Successfully loaded page'); + return webScraper; + } catch (e) { + print('$prefix: Attempt $attempt failed: $e'); + + if (attempt == maxRetries) { + print('$prefix: All retry attempts failed for $url'); + return null; + } + + // Exponential backoff + var delay = Duration( + milliseconds: baseRetryDelay.inMilliseconds * (1 << (attempt - 1))); + print('$prefix: Retrying in ${delay.inSeconds} seconds...'); + await Future.delayed(delay); + } + } + + return null; + } + + /// Safely get element attribute with error handling + static List getElementAttributeSafely( + WebScraper webScraper, + String selector, + String attribute, { + String? logPrefix, + }) { + try { + final result = webScraper.getElementAttribute(selector, attribute); + final prefix = logPrefix ?? 'WebScraper'; + print( + '$prefix: Found ${result.length} elements with selector: $selector'); + return result; + } catch (e) { + print( + '${logPrefix ?? 'WebScraper'}: Error getting element attribute: $e'); + return []; + } + } + + /// Safely get element title with error handling + static List getElementTitleSafely( + WebScraper webScraper, + String selector, { + String? logPrefix, + }) { + try { + final result = webScraper.getElementTitle(selector); + final prefix = logPrefix ?? 'WebScraper'; + print('$prefix: Found ${result.length} titles with selector: $selector'); + return result; + } catch (e) { + print('${logPrefix ?? 'WebScraper'}: Error getting element title: $e'); + return []; + } + } + + /// Safely get element with error handling + static List> getElementSafely( + WebScraper webScraper, + String selector, + List attributes, { + String? logPrefix, + }) { + try { + final result = webScraper.getElement(selector, attributes); + final prefix = logPrefix ?? 'WebScraper'; + print( + '$prefix: Found ${result.length} elements with selector: $selector'); + return result; + } catch (e) { + print('${logPrefix ?? 'WebScraper'}: Error getting element: $e'); + return []; + } + } + + /// Rate limiting delay + static Future rateLimitDelay(Duration delay) async { + print('WebScraper: Rate limiting for ${delay.inSeconds} seconds...'); + await Future.delayed(delay); + } + + /// Validate that a list is not empty and contains valid data + static bool isValidData(List data, {String? logPrefix}) { + if (data.isEmpty) { + print('${logPrefix ?? 'WebScraper'}: No data found'); + return false; + } + return true; + } + + /// Clean up text data safely + static String cleanTextSafely(String text, {String defaultValue = 'N/A'}) { + try { + final cleaned = text.trim(); + return cleaned.isEmpty ? defaultValue : cleaned; + } catch (e) { + print('WebScraper: Error cleaning text: $e'); + return defaultValue; + } + } + + /// Parse number safely + static double parseNumberSafely(String text, {double defaultValue = 0.0}) { + try { + return double.parse(text); + } catch (e) { + print('WebScraper: Error parsing number "$text": $e'); + return defaultValue; + } + } +} diff --git a/pubspec.yaml b/pubspec.yaml index 1b45d9f..25661f5 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -8,15 +8,16 @@ environment: sdk: ">=3.0.0 <4.0.0" dependencies: - cron: ^0.6.1 + cron: ^0.6.2 hive: ^2.2.3 intl: ^0.20.2 - nyxx: ^6.5.2 - nyxx_commands: ^6.0.3 + nyxx: ^6.7.0 + nyxx_commands: ^6.0.4 web_scraper: git: https://github.com/oelburk/web_scraper.git + sentry: ^9.0.0 dev_dependencies: - lints: ^5.1.1 + lints: ^6.0.0 test: ^1.24.4 pedantic: ^1.11.1