From 836633637147ba4e2c1a3e9af3c6ab7a6bb0f8e7 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Sat, 3 Jan 2026 22:06:22 +0000 Subject: [PATCH 1/6] Improved GitHub action for automatic collector implementation with test summary. --- .agent/prompts/implement-collector.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.agent/prompts/implement-collector.md b/.agent/prompts/implement-collector.md index d3d879d9..126718ce 100644 --- a/.agent/prompts/implement-collector.md +++ b/.agent/prompts/implement-collector.md @@ -194,7 +194,7 @@ namespace BinDays.Api.IntegrationTests.Collectors.Councils ### 4.1 Run Tests ```bash -dotnet test --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj +dotnet test --no-restore --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj ``` ### 4.2 Debug Failures @@ -203,7 +203,7 @@ If tests fail, enable HTTP logging: ```bash export BINDAYS_ENABLE_HTTP_LOGGING=true -dotnet test --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj +dotnet test --no-restore --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj ``` Compare the logged requests against the HAR file: From e6cb547bf29d8ad18180b1f50e0fb9f56353556d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 3 Jan 2026 22:27:36 +0000 Subject: [PATCH 2/6] Add collector for TheMorayCouncil Closes #87 Generated with Codex CLI --- .../Collectors/Councils/TheMorayCouncil.cs | 407 ++++++++++++++++++ .../Councils/TheMorayCouncilTests.cs | 37 ++ 2 files changed, 444 insertions(+) create mode 100644 BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs create mode 100644 BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs diff --git a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs new file mode 100644 index 00000000..581d6b85 --- /dev/null +++ b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs @@ -0,0 +1,407 @@ +namespace BinDays.Api.Collectors.Collectors.Councils +{ + using BinDays.Api.Collectors.Collectors.Vendors; + using BinDays.Api.Collectors.Models; + using BinDays.Api.Collectors.Utilities; + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Linq; + using System.Text.RegularExpressions; + using System.Web; + + /// + /// Collector implementation for The Moray Council. + /// + internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector + { + private const string BinDaysMetadataKey = "binDays"; + private const string RemainingCalendarsMetadataKey = "remainingCalendars"; + private const string GeneralWasteName = "General Waste"; + private const string GardenWasteName = "Garden Waste"; + private const string PaperAndCardName = "Paper and Card"; + private const string PlasticsAndCansName = "Plastics and Cans"; + private const string GlassName = "Glass"; + + /// + public string Name => "The Moray Council"; + + /// + public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); + + /// + public override string GovUkId => "moray"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = new List() + { + new() + { + Name = GeneralWasteName, + Colour = BinColour.Green, + Type = BinType.Bin, + Keys = new List() { "Green bin", "General waste" }.AsReadOnly(), + }, + new() + { + Name = GardenWasteName, + Colour = BinColour.Brown, + Type = BinType.Bin, + Keys = new List() { "Brown bin", "Garden" }.AsReadOnly(), + }, + new() + { + Name = PaperAndCardName, + Colour = BinColour.Blue, + Type = BinType.Bin, + Keys = new List() { "Blue bin", "Paper", "Card" }.AsReadOnly(), + }, + new() + { + Name = PlasticsAndCansName, + Colour = BinColour.Purple, + Type = BinType.Bin, + Keys = new List() { "Purple bin", "Cans", "Plastic" }.AsReadOnly(), + }, + new() + { + Name = GlassName, + Colour = BinColour.Orange, + Type = BinType.Box, + Keys = new List() { "Orange box", "Glass" }.AsReadOnly(), + }, + }.AsReadOnly(); + + /// + /// Regex for extracting addresses from the results page. + /// + [GeneratedRegex("\\d+)\">(?
.*?)", RegexOptions.Singleline)] + private static partial Regex AddressesRegex(); + + /// + /// Regex for extracting calendar links. + /// + [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] + private static partial Regex CalendarLinksRegex(); + + /// + /// Regex for extracting month blocks from the calendar. + /// + [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] + private static partial Regex CalendarMonthRegex(); + + /// + /// Regex for extracting day entries from the calendar month. + /// + [GeneratedRegex("
[^\\\"']*)['\\\"]>(?[^<]+)
")] + private static partial Regex CalendarDayRegex(); + + /// + /// Regex for extracting the calendar year. + /// + [GeneratedRegex("Collections for (?\\d{4})")] + private static partial Regex CalendarYearRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting addresses + if (clientSideResponse == null) + { + var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 1) + { + var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); + var addresses = new List
(); + + foreach (Match addressMatch in addressMatches) + { + var property = Regex.Replace(addressMatch.Groups["Address"].Value, "\\s+", " ").Trim(); + + addresses.Add(new Address + { + Property = property, + Postcode = postcode, + Uid = addressMatch.Groups["Id"].Value, + }); + } + + var getAddressesResponse = new GetAddressesResponse + { + Addresses = addresses.AsReadOnly(), + }; + + return getAddressesResponse; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting bin days + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/disp_bins.php?id={address.Uid}", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + else if (clientSideResponse.RequestId == 1) + { + var calendarUrls = CalendarLinksRegex() + .Matches(clientSideResponse.Content) + .Select(match => NormalizeCalendarUrl(match.Groups["Url"].Value)) + .Distinct() + .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) + .ToList(); + + if (calendarUrls.Count == 0) + { + throw new InvalidOperationException("No calendar links found for the selected address."); + } + + var remainingCalendars = calendarUrls.Skip(1).ToList(); + var metadata = new Dictionary(); + if (remainingCalendars.Count > 0) + { + metadata.Add(RemainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); + } + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = calendarUrls.First(), + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + Options = new ClientSideOptions + { + Metadata = metadata + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + else if (clientSideResponse.RequestId >= 2) + { + var metadata = clientSideResponse.Options.Metadata; + var binDays = new List<(DateOnly Date, string Code)>(); + + if (metadata.TryGetValue(BinDaysMetadataKey, out var existingBinDays)) + { + binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); + } + + binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); + + var remainingCalendarMetadata = metadata.GetValueOrDefault(RemainingCalendarsMetadataKey); + var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) + ? new List() + : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + + if (remainingCalendarUrls.Count > 0) + { + var nextCalendarUrl = remainingCalendarUrls.First(); + var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); + var nextMetadata = new Dictionary + { + { BinDaysMetadataKey, SerialiseBinDaysMetadata(binDays) } + }; + + if (nextRemainingCalendars.Count > 0) + { + nextMetadata.Add(RemainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); + } + + var clientSideRequest = new ClientSideRequest + { + RequestId = clientSideResponse.RequestId + 1, + Url = nextCalendarUrl, + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + Options = new ClientSideOptions + { + Metadata = nextMetadata + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + + var binDayResults = binDays.Select(binDay => new BinDay + { + Date = binDay.Date, + Address = address, + Bins = GetBinsForCode(binDay.Code) + }).ToList(); + + var getBinDaysResponseFinal = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDayResults), + }; + + return getBinDaysResponseFinal; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + private static string NormalizeCalendarUrl(string calendarUrl) + { + if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return calendarUrl; + } + + return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; + } + + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) + { + var yearMatch = CalendarYearRegex().Match(content); + if (!yearMatch.Success) + { + throw new InvalidOperationException("Calendar year not found in response."); + } + + var year = int.Parse(yearMatch.Groups["Year"].Value, CultureInfo.InvariantCulture); + + var binDays = new List<(DateOnly Date, string Code)>(); + var monthMatches = CalendarMonthRegex().Matches(content); + + foreach (Match monthMatch in monthMatches) + { + var monthName = monthMatch.Groups["Month"].Value.Trim(); + var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; + var daysHtml = monthMatch.Groups["Days"].Value; + + foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) + { + var className = dayMatch.Groups["Class"].Value.Trim(); + var dayText = dayMatch.Groups["Day"].Value.Trim(); + + if (string.IsNullOrWhiteSpace(dayText) || + string.IsNullOrWhiteSpace(className) || + string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + var date = DateOnly.ParseExact( + $"{dayText}-{monthNumber}-{year}", + "d-M-yyyy", + CultureInfo.InvariantCulture + ); + + binDays.Add((date, className)); + } + } + + return binDays.AsReadOnly(); + } + + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) + { + var binDays = new List<(DateOnly Date, string Code)>(); + var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + foreach (var entry in entries) + { + var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length != 2) + { + continue; + } + + var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var code = parts[1]; + + binDays.Add((date, code)); + } + + return binDays.AsReadOnly(); + } + + private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) + { + return string.Join( + "|", + binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") + ); + } + + private IReadOnlyCollection GetBinsForCode(string code) + { + var upperCode = code.ToUpperInvariant(); + + if (upperCode == "B") + { + return _binTypes.Where(bin => bin.Name == GardenWasteName).ToList().AsReadOnly(); + } + else if (upperCode == "GPOC") + { + return _binTypes.Where(bin => bin.Name != GardenWasteName).ToList().AsReadOnly(); + } + else if (upperCode == "GBPOC") + { + return _binTypes.ToList().AsReadOnly(); + } + + throw new InvalidOperationException($"Unknown bin code: {code}"); + } + } +} diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs new file mode 100644 index 00000000..51d8666f --- /dev/null +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs @@ -0,0 +1,37 @@ +namespace BinDays.Api.IntegrationTests.Collectors.Councils +{ + using BinDays.Api.Collectors.Collectors; + using BinDays.Api.Collectors.Collectors.Councils; + using BinDays.Api.Collectors.Services; + using BinDays.Api.IntegrationTests.Helpers; + using System.Threading.Tasks; + using Xunit; + using Xunit.Abstractions; + + public class TheMorayCouncilTests + { + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new TheMorayCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; + + public TheMorayCouncilTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } + + [Theory] + [InlineData("IV30 6LH")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); + } + } +} From 0abbae56469bc32509dd3fd9817d1602aec6cdcf Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 3 Jan 2026 22:28:20 +0000 Subject: [PATCH 3/6] Auto-format code with dotnet format --- .../Collectors/Councils/TheMorayCouncil.cs | 817 +++++++++--------- .../Councils/TheMorayCouncilTests.cs | 74 +- 2 files changed, 447 insertions(+), 444 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs index 581d6b85..dcb58014 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs @@ -1,407 +1,410 @@ -namespace BinDays.Api.Collectors.Collectors.Councils -{ - using BinDays.Api.Collectors.Collectors.Vendors; - using BinDays.Api.Collectors.Models; - using BinDays.Api.Collectors.Utilities; - using System; - using System.Collections.Generic; - using System.Globalization; - using System.Linq; - using System.Text.RegularExpressions; - using System.Web; - - /// - /// Collector implementation for The Moray Council. - /// - internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector - { - private const string BinDaysMetadataKey = "binDays"; - private const string RemainingCalendarsMetadataKey = "remainingCalendars"; - private const string GeneralWasteName = "General Waste"; - private const string GardenWasteName = "Garden Waste"; - private const string PaperAndCardName = "Paper and Card"; - private const string PlasticsAndCansName = "Plastics and Cans"; - private const string GlassName = "Glass"; - - /// - public string Name => "The Moray Council"; - - /// - public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); - - /// - public override string GovUkId => "moray"; - - /// - /// The list of bin types for this collector. - /// - private readonly IReadOnlyCollection _binTypes = new List() - { - new() - { - Name = GeneralWasteName, - Colour = BinColour.Green, - Type = BinType.Bin, - Keys = new List() { "Green bin", "General waste" }.AsReadOnly(), - }, - new() - { - Name = GardenWasteName, - Colour = BinColour.Brown, - Type = BinType.Bin, - Keys = new List() { "Brown bin", "Garden" }.AsReadOnly(), - }, - new() - { - Name = PaperAndCardName, - Colour = BinColour.Blue, - Type = BinType.Bin, - Keys = new List() { "Blue bin", "Paper", "Card" }.AsReadOnly(), - }, - new() - { - Name = PlasticsAndCansName, - Colour = BinColour.Purple, - Type = BinType.Bin, - Keys = new List() { "Purple bin", "Cans", "Plastic" }.AsReadOnly(), - }, - new() - { - Name = GlassName, - Colour = BinColour.Orange, - Type = BinType.Box, - Keys = new List() { "Orange box", "Glass" }.AsReadOnly(), - }, - }.AsReadOnly(); - - /// - /// Regex for extracting addresses from the results page. - /// - [GeneratedRegex("\\d+)\">(?
.*?)", RegexOptions.Singleline)] - private static partial Regex AddressesRegex(); - - /// - /// Regex for extracting calendar links. - /// - [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] - private static partial Regex CalendarLinksRegex(); - - /// - /// Regex for extracting month blocks from the calendar. - /// - [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] - private static partial Regex CalendarMonthRegex(); - - /// - /// Regex for extracting day entries from the calendar month. - /// - [GeneratedRegex("
[^\\\"']*)['\\\"]>(?[^<]+)
")] - private static partial Regex CalendarDayRegex(); - - /// - /// Regex for extracting the calendar year. - /// - [GeneratedRegex("Collections for (?\\d{4})")] - private static partial Regex CalendarYearRegex(); - - /// - public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) - { - // Prepare client-side request for getting addresses - if (clientSideResponse == null) - { - var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getAddressesResponse; - } - // Process addresses from response - else if (clientSideResponse.RequestId == 1) - { - var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); - var addresses = new List
(); - - foreach (Match addressMatch in addressMatches) - { - var property = Regex.Replace(addressMatch.Groups["Address"].Value, "\\s+", " ").Trim(); - - addresses.Add(new Address - { - Property = property, - Postcode = postcode, - Uid = addressMatch.Groups["Id"].Value, - }); - } - - var getAddressesResponse = new GetAddressesResponse - { - Addresses = addresses.AsReadOnly(), - }; - - return getAddressesResponse; - } - - // Throw exception for invalid request - throw new InvalidOperationException("Invalid client-side request."); - } - - /// - public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) - { - // Prepare client-side request for getting bin days - if (clientSideResponse == null) - { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = $"https://bindayfinder.moray.gov.uk/disp_bins.php?id={address.Uid}", - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - else if (clientSideResponse.RequestId == 1) - { - var calendarUrls = CalendarLinksRegex() - .Matches(clientSideResponse.Content) - .Select(match => NormalizeCalendarUrl(match.Groups["Url"].Value)) - .Distinct() - .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) - .ToList(); - - if (calendarUrls.Count == 0) - { - throw new InvalidOperationException("No calendar links found for the selected address."); - } - - var remainingCalendars = calendarUrls.Skip(1).ToList(); - var metadata = new Dictionary(); - if (remainingCalendars.Count > 0) - { - metadata.Add(RemainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); - } - - var clientSideRequest = new ClientSideRequest - { - RequestId = 2, - Url = calendarUrls.First(), - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - Options = new ClientSideOptions - { - Metadata = metadata - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - else if (clientSideResponse.RequestId >= 2) - { - var metadata = clientSideResponse.Options.Metadata; - var binDays = new List<(DateOnly Date, string Code)>(); - - if (metadata.TryGetValue(BinDaysMetadataKey, out var existingBinDays)) - { - binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); - } - - binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); - - var remainingCalendarMetadata = metadata.GetValueOrDefault(RemainingCalendarsMetadataKey); - var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) - ? new List() - : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); - - if (remainingCalendarUrls.Count > 0) - { - var nextCalendarUrl = remainingCalendarUrls.First(); - var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); - var nextMetadata = new Dictionary - { - { BinDaysMetadataKey, SerialiseBinDaysMetadata(binDays) } - }; - - if (nextRemainingCalendars.Count > 0) - { - nextMetadata.Add(RemainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); - } - - var clientSideRequest = new ClientSideRequest - { - RequestId = clientSideResponse.RequestId + 1, - Url = nextCalendarUrl, - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - Options = new ClientSideOptions - { - Metadata = nextMetadata - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - - var binDayResults = binDays.Select(binDay => new BinDay - { - Date = binDay.Date, - Address = address, - Bins = GetBinsForCode(binDay.Code) - }).ToList(); - - var getBinDaysResponseFinal = new GetBinDaysResponse - { - BinDays = ProcessingUtilities.ProcessBinDays(binDayResults), - }; - - return getBinDaysResponseFinal; - } - - // Throw exception for invalid request - throw new InvalidOperationException("Invalid client-side request."); - } - - private static string NormalizeCalendarUrl(string calendarUrl) - { - if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) - { - return calendarUrl; - } - - return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; - } - - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) - { - var yearMatch = CalendarYearRegex().Match(content); - if (!yearMatch.Success) - { - throw new InvalidOperationException("Calendar year not found in response."); - } - - var year = int.Parse(yearMatch.Groups["Year"].Value, CultureInfo.InvariantCulture); - - var binDays = new List<(DateOnly Date, string Code)>(); - var monthMatches = CalendarMonthRegex().Matches(content); - - foreach (Match monthMatch in monthMatches) - { - var monthName = monthMatch.Groups["Month"].Value.Trim(); - var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; - var daysHtml = monthMatch.Groups["Days"].Value; - - foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) - { - var className = dayMatch.Groups["Class"].Value.Trim(); - var dayText = dayMatch.Groups["Day"].Value.Trim(); - - if (string.IsNullOrWhiteSpace(dayText) || - string.IsNullOrWhiteSpace(className) || - string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) - { - continue; - } - - var date = DateOnly.ParseExact( - $"{dayText}-{monthNumber}-{year}", - "d-M-yyyy", - CultureInfo.InvariantCulture - ); - - binDays.Add((date, className)); - } - } - - return binDays.AsReadOnly(); - } - - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) - { - var binDays = new List<(DateOnly Date, string Code)>(); - var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - - foreach (var entry in entries) - { - var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - if (parts.Length != 2) - { - continue; - } - - var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); - var code = parts[1]; - - binDays.Add((date, code)); - } - - return binDays.AsReadOnly(); - } - - private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) - { - return string.Join( - "|", - binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") - ); - } - - private IReadOnlyCollection GetBinsForCode(string code) - { - var upperCode = code.ToUpperInvariant(); - - if (upperCode == "B") - { - return _binTypes.Where(bin => bin.Name == GardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GPOC") - { - return _binTypes.Where(bin => bin.Name != GardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GBPOC") - { - return _binTypes.ToList().AsReadOnly(); - } - - throw new InvalidOperationException($"Unknown bin code: {code}"); - } - } -} +namespace BinDays.Api.Collectors.Collectors.Councils +{ + using BinDays.Api.Collectors.Collectors.Vendors; + using BinDays.Api.Collectors.Models; + using BinDays.Api.Collectors.Utilities; + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Linq; + using System.Text.RegularExpressions; + using System.Web; + + /// + /// Collector implementation for The Moray Council. + /// + internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector + { + private const string BinDaysMetadataKey = "binDays"; + private const string RemainingCalendarsMetadataKey = "remainingCalendars"; + private const string GeneralWasteName = "General Waste"; + private const string GardenWasteName = "Garden Waste"; + private const string PaperAndCardName = "Paper and Card"; + private const string PlasticsAndCansName = "Plastics and Cans"; + private const string GlassName = "Glass"; + + /// + public string Name => "The Moray Council"; + + /// + public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); + + /// + public override string GovUkId => "moray"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = new List() + { + new() + { + Name = GeneralWasteName, + Colour = BinColour.Green, + Type = BinType.Bin, + Keys = new List() { "Green bin", "General waste" }.AsReadOnly(), + }, + new() + { + Name = GardenWasteName, + Colour = BinColour.Brown, + Type = BinType.Bin, + Keys = new List() { "Brown bin", "Garden" }.AsReadOnly(), + }, + new() + { + Name = PaperAndCardName, + Colour = BinColour.Blue, + Type = BinType.Bin, + Keys = new List() { "Blue bin", "Paper", "Card" }.AsReadOnly(), + }, + new() + { + Name = PlasticsAndCansName, + Colour = BinColour.Purple, + Type = BinType.Bin, + Keys = new List() { "Purple bin", "Cans", "Plastic" }.AsReadOnly(), + }, + new() + { + Name = GlassName, + Colour = BinColour.Orange, + Type = BinType.Box, + Keys = new List() { "Orange box", "Glass" }.AsReadOnly(), + }, + }.AsReadOnly(); + + /// + /// Regex for extracting addresses from the results page. + /// + [GeneratedRegex("\\d+)\">(?
.*?)", RegexOptions.Singleline)] + private static partial Regex AddressesRegex(); + + /// + /// Regex for extracting calendar links. + /// + [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] + private static partial Regex CalendarLinksRegex(); + + /// + /// Regex for extracting month blocks from the calendar. + /// + [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] + private static partial Regex CalendarMonthRegex(); + + /// + /// Regex for extracting day entries from the calendar month. + /// + [GeneratedRegex("
[^\\\"']*)['\\\"]>(?[^<]+)
")] + private static partial Regex CalendarDayRegex(); + + /// + /// Regex for extracting the calendar year. + /// + [GeneratedRegex("Collections for (?\\d{4})")] + private static partial Regex CalendarYearRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting addresses + if (clientSideResponse == null) + { + var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 1) + { + var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); + var addresses = new List
(); + + foreach (Match addressMatch in addressMatches) + { + var property = MyRegex().Replace(addressMatch.Groups["Address"].Value, " ").Trim(); + + addresses.Add(new Address + { + Property = property, + Postcode = postcode, + Uid = addressMatch.Groups["Id"].Value, + }); + } + + var getAddressesResponse = new GetAddressesResponse + { + Addresses = addresses.AsReadOnly(), + }; + + return getAddressesResponse; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting bin days + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/disp_bins.php?id={address.Uid}", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + else if (clientSideResponse.RequestId == 1) + { + var calendarUrls = CalendarLinksRegex() + .Matches(clientSideResponse.Content) + .Select(match => NormalizeCalendarUrl(match.Groups["Url"].Value)) + .Distinct() + .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) + .ToList(); + + if (calendarUrls.Count == 0) + { + throw new InvalidOperationException("No calendar links found for the selected address."); + } + + var remainingCalendars = calendarUrls.Skip(1).ToList(); + var metadata = new Dictionary(); + if (remainingCalendars.Count > 0) + { + metadata.Add(RemainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); + } + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = calendarUrls.First(), + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + Options = new ClientSideOptions + { + Metadata = metadata + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + else if (clientSideResponse.RequestId >= 2) + { + var metadata = clientSideResponse.Options.Metadata; + var binDays = new List<(DateOnly Date, string Code)>(); + + if (metadata.TryGetValue(BinDaysMetadataKey, out var existingBinDays)) + { + binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); + } + + binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); + + var remainingCalendarMetadata = metadata.GetValueOrDefault(RemainingCalendarsMetadataKey); + var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) + ? [] + : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + + if (remainingCalendarUrls.Count > 0) + { + var nextCalendarUrl = remainingCalendarUrls.First(); + var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); + var nextMetadata = new Dictionary + { + { BinDaysMetadataKey, SerialiseBinDaysMetadata(binDays) } + }; + + if (nextRemainingCalendars.Count > 0) + { + nextMetadata.Add(RemainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); + } + + var clientSideRequest = new ClientSideRequest + { + RequestId = clientSideResponse.RequestId + 1, + Url = nextCalendarUrl, + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + Options = new ClientSideOptions + { + Metadata = nextMetadata + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + + var binDayResults = binDays.Select(binDay => new BinDay + { + Date = binDay.Date, + Address = address, + Bins = GetBinsForCode(binDay.Code) + }).ToList(); + + var getBinDaysResponseFinal = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDayResults), + }; + + return getBinDaysResponseFinal; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + private static string NormalizeCalendarUrl(string calendarUrl) + { + if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return calendarUrl; + } + + return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; + } + + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) + { + var yearMatch = CalendarYearRegex().Match(content); + if (!yearMatch.Success) + { + throw new InvalidOperationException("Calendar year not found in response."); + } + + var year = int.Parse(yearMatch.Groups["Year"].Value, CultureInfo.InvariantCulture); + + var binDays = new List<(DateOnly Date, string Code)>(); + var monthMatches = CalendarMonthRegex().Matches(content); + + foreach (Match monthMatch in monthMatches) + { + var monthName = monthMatch.Groups["Month"].Value.Trim(); + var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; + var daysHtml = monthMatch.Groups["Days"].Value; + + foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) + { + var className = dayMatch.Groups["Class"].Value.Trim(); + var dayText = dayMatch.Groups["Day"].Value.Trim(); + + if (string.IsNullOrWhiteSpace(dayText) || + string.IsNullOrWhiteSpace(className) || + string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + var date = DateOnly.ParseExact( + $"{dayText}-{monthNumber}-{year}", + "d-M-yyyy", + CultureInfo.InvariantCulture + ); + + binDays.Add((date, className)); + } + } + + return binDays.AsReadOnly(); + } + + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) + { + var binDays = new List<(DateOnly Date, string Code)>(); + var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + foreach (var entry in entries) + { + var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length != 2) + { + continue; + } + + var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var code = parts[1]; + + binDays.Add((date, code)); + } + + return binDays.AsReadOnly(); + } + + private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) + { + return string.Join( + "|", + binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") + ); + } + + private IReadOnlyCollection GetBinsForCode(string code) + { + var upperCode = code.ToUpperInvariant(); + + if (upperCode == "B") + { + return _binTypes.Where(bin => bin.Name == GardenWasteName).ToList().AsReadOnly(); + } + else if (upperCode == "GPOC") + { + return _binTypes.Where(bin => bin.Name != GardenWasteName).ToList().AsReadOnly(); + } + else if (upperCode == "GBPOC") + { + return _binTypes.ToList().AsReadOnly(); + } + + throw new InvalidOperationException($"Unknown bin code: {code}"); + } + + [GeneratedRegex("\\s+")] + private static partial Regex MyRegex(); + } +} diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs index 51d8666f..68429c02 100644 --- a/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs @@ -1,37 +1,37 @@ -namespace BinDays.Api.IntegrationTests.Collectors.Councils -{ - using BinDays.Api.Collectors.Collectors; - using BinDays.Api.Collectors.Collectors.Councils; - using BinDays.Api.Collectors.Services; - using BinDays.Api.IntegrationTests.Helpers; - using System.Threading.Tasks; - using Xunit; - using Xunit.Abstractions; - - public class TheMorayCouncilTests - { - private readonly IntegrationTestClient _client; - private static readonly ICollector _collector = new TheMorayCouncil(); - private readonly CollectorService _collectorService = new([_collector]); - private readonly ITestOutputHelper _outputHelper; - - public TheMorayCouncilTests(ITestOutputHelper outputHelper) - { - _outputHelper = outputHelper; - _client = new IntegrationTestClient(outputHelper); - } - - [Theory] - [InlineData("IV30 6LH")] - public async Task GetBinDaysTest(string postcode) - { - await TestSteps.EndToEnd( - _client, - _collectorService, - _collector, - postcode, - _outputHelper - ); - } - } -} +namespace BinDays.Api.IntegrationTests.Collectors.Councils +{ + using BinDays.Api.Collectors.Collectors; + using BinDays.Api.Collectors.Collectors.Councils; + using BinDays.Api.Collectors.Services; + using BinDays.Api.IntegrationTests.Helpers; + using System.Threading.Tasks; + using Xunit; + using Xunit.Abstractions; + + public class TheMorayCouncilTests + { + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new TheMorayCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; + + public TheMorayCouncilTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } + + [Theory] + [InlineData("IV30 6LH")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); + } + } +} From ced1a32a1f90ff82cdabaa4ba8705ddbc4afffa9 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Sat, 10 Jan 2026 00:42:47 +0000 Subject: [PATCH 4/6] Format TheMorayCouncil --- .../Collectors/Councils/TheMorayCouncil.cs | 677 +++++++++--------- .../Councils/TheMorayCouncilTests.cs | 61 +- 2 files changed, 385 insertions(+), 353 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs index dcb58014..92f76d84 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs @@ -1,214 +1,304 @@ -namespace BinDays.Api.Collectors.Collectors.Councils +namespace BinDays.Api.Collectors.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors.Vendors; +using BinDays.Api.Collectors.Models; +using BinDays.Api.Collectors.Utilities; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; +using System.Web; + +/// +/// Collector implementation for The Moray Council. +/// +internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector { - using BinDays.Api.Collectors.Collectors.Vendors; - using BinDays.Api.Collectors.Models; - using BinDays.Api.Collectors.Utilities; - using System; - using System.Collections.Generic; - using System.Globalization; - using System.Linq; - using System.Text.RegularExpressions; - using System.Web; + /// + public string Name => "The Moray Council"; + + /// + public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); + + /// + public override string GovUkId => "moray"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = + [ + new() + { + Name = _generalWasteName, + Colour = BinColour.Green, + Type = BinType.Bin, + Keys = [ "Green bin", "General waste" ], + }, + new() + { + Name = _gardenWasteName, + Colour = BinColour.Brown, + Type = BinType.Bin, + Keys = [ "Brown bin", "Garden" ], + }, + new() + { + Name = _paperAndCardName, + Colour = BinColour.Blue, + Type = BinType.Bin, + Keys = [ "Blue bin", "Paper", "Card" ], + }, + new() + { + Name = _plasticsAndCansName, + Colour = BinColour.Purple, + Type = BinType.Bin, + Keys = [ "Purple bin", "Cans", "Plastic" ], + }, + new() + { + Name = _glassName, + Colour = BinColour.Orange, + Type = BinType.Box, + Keys = [ "Orange box", "Glass" ], + }, + ]; /// - /// Collector implementation for The Moray Council. + /// Key for storing bin days in metadata. /// - internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector + private const string _binDaysMetadataKey = "binDays"; + + /// + /// Name for the garden waste bin. + /// + private const string _gardenWasteName = "Garden Waste"; + + /// + /// Name for the general waste bin. + /// + private const string _generalWasteName = "General Waste"; + + /// + /// Name for the glass bin. + /// + private const string _glassName = "Glass"; + + /// + /// Name for the paper and card bin. + /// + private const string _paperAndCardName = "Paper and Card"; + + /// + /// Name for the plastics and cans bin. + /// + private const string _plasticsAndCansName = "Plastics and Cans"; + + /// + /// Key for storing remaining calendars in metadata. + /// + private const string _remainingCalendarsMetadataKey = "remainingCalendars"; + + /// + /// Regex for extracting addresses from the results page. + /// + [GeneratedRegex("\\d+)\">(?
.*?)", RegexOptions.Singleline)] + private static partial Regex AddressesRegex(); + + /// + /// Regex for extracting day entries from the calendar month. + /// + [GeneratedRegex("
[^\\\"']*)['\\\"]>(?[^<]+)
")] + private static partial Regex CalendarDayRegex(); + + /// + /// Regex for extracting calendar links. + /// + [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] + private static partial Regex CalendarLinksRegex(); + + /// + /// Regex for extracting month blocks from the calendar. + /// + [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] + private static partial Regex CalendarMonthRegex(); + + /// + /// Regex for extracting the calendar year. + /// + [GeneratedRegex("Collections for (?\\d{4})")] + private static partial Regex CalendarYearRegex(); + + /// + /// Regex for matching whitespace. + /// + [GeneratedRegex("\\s+")] + private static partial Regex WhitespaceRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) { - private const string BinDaysMetadataKey = "binDays"; - private const string RemainingCalendarsMetadataKey = "remainingCalendars"; - private const string GeneralWasteName = "General Waste"; - private const string GardenWasteName = "Garden Waste"; - private const string PaperAndCardName = "Paper and Card"; - private const string PlasticsAndCansName = "Plastics and Cans"; - private const string GlassName = "Glass"; - - /// - public string Name => "The Moray Council"; - - /// - public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); - - /// - public override string GovUkId => "moray"; - - /// - /// The list of bin types for this collector. - /// - private readonly IReadOnlyCollection _binTypes = new List() + // Prepare client-side request for getting addresses + if (clientSideResponse == null) { - new() - { - Name = GeneralWasteName, - Colour = BinColour.Green, - Type = BinType.Bin, - Keys = new List() { "Green bin", "General waste" }.AsReadOnly(), - }, - new() - { - Name = GardenWasteName, - Colour = BinColour.Brown, - Type = BinType.Bin, - Keys = new List() { "Brown bin", "Garden" }.AsReadOnly(), - }, - new() - { - Name = PaperAndCardName, - Colour = BinColour.Blue, - Type = BinType.Bin, - Keys = new List() { "Blue bin", "Paper", "Card" }.AsReadOnly(), - }, - new() + var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); + + var clientSideRequest = new ClientSideRequest { - Name = PlasticsAndCansName, - Colour = BinColour.Purple, - Type = BinType.Bin, - Keys = new List() { "Purple bin", "Cans", "Plastic" }.AsReadOnly(), - }, - new() + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + + var getAddressesResponse = new GetAddressesResponse { - Name = GlassName, - Colour = BinColour.Orange, - Type = BinType.Box, - Keys = new List() { "Orange box", "Glass" }.AsReadOnly(), - }, - }.AsReadOnly(); - - /// - /// Regex for extracting addresses from the results page. - /// - [GeneratedRegex("\\d+)\">(?
.*?)", RegexOptions.Singleline)] - private static partial Regex AddressesRegex(); - - /// - /// Regex for extracting calendar links. - /// - [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] - private static partial Regex CalendarLinksRegex(); - - /// - /// Regex for extracting month blocks from the calendar. - /// - [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] - private static partial Regex CalendarMonthRegex(); - - /// - /// Regex for extracting day entries from the calendar month. - /// - [GeneratedRegex("
[^\\\"']*)['\\\"]>(?[^<]+)
")] - private static partial Regex CalendarDayRegex(); - - /// - /// Regex for extracting the calendar year. - /// - [GeneratedRegex("Collections for (?\\d{4})")] - private static partial Regex CalendarYearRegex(); - - /// - public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + NextClientSideRequest = clientSideRequest, + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 1) { - // Prepare client-side request for getting addresses - if (clientSideResponse == null) - { - var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); + var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); + var addresses = new List
(); - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - }; + foreach (Match addressMatch in addressMatches) + { + var property = WhitespaceRegex().Replace(addressMatch.Groups["address"].Value, " ").Trim(); - var getAddressesResponse = new GetAddressesResponse + var address = new Address { - NextClientSideRequest = clientSideRequest + Property = property, + Postcode = postcode, + Uid = addressMatch.Groups["id"].Value, }; - return getAddressesResponse; + addresses.Add(address); } - // Process addresses from response - else if (clientSideResponse.RequestId == 1) + + var getAddressesResponse = new GetAddressesResponse { - var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); - var addresses = new List
(); + Addresses = [.. addresses], + }; - foreach (Match addressMatch in addressMatches) - { - var property = MyRegex().Replace(addressMatch.Groups["Address"].Value, " ").Trim(); + return getAddressesResponse; + } - addresses.Add(new Address - { - Property = property, - Postcode = postcode, - Uid = addressMatch.Groups["Id"].Value, - }); - } + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } - var getAddressesResponse = new GetAddressesResponse + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting bin days + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = $"https://bindayfinder.moray.gov.uk/disp_bins.php?id={address.Uid}", + Method = "GET", + Headers = new() { - Addresses = addresses.AsReadOnly(), - }; + { "user-agent", Constants.UserAgent }, + }, + }; - return getAddressesResponse; - } + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest, + }; - // Throw exception for invalid request - throw new InvalidOperationException("Invalid client-side request."); + return getBinDaysResponse; } - - /// - public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + // Process bin days from response + else if (clientSideResponse.RequestId == 1) { - // Prepare client-side request for getting bin days - if (clientSideResponse == null) + var calendarUrls = CalendarLinksRegex() + .Matches(clientSideResponse.Content) + .Select(match => NormalizeCalendarUrl(match.Groups["url"].Value)) + .Distinct() + .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) + .ToList(); + + if (calendarUrls.Count == 0) { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = $"https://bindayfinder.moray.gov.uk/disp_bins.php?id={address.Uid}", - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - }; + throw new InvalidOperationException("No calendar links found for the selected address."); + } - var getBinDaysResponse = new GetBinDaysResponse + var remainingCalendars = calendarUrls.Skip(1).ToList(); + var metadata = new Dictionary(); + if (remainingCalendars.Count > 0) + { + metadata.Add(_remainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); + } + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = calendarUrls.First(), + Method = "GET", + Headers = new() { - NextClientSideRequest = clientSideRequest - }; + { "user-agent", Constants.UserAgent }, + }, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; - return getBinDaysResponse; + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getBinDaysResponse; + } + // Process bin days from response + else if (clientSideResponse.RequestId >= 2) + { + var metadata = clientSideResponse.Options.Metadata; + var binDays = new List<(DateOnly Date, string Code)>(); + + if (metadata.TryGetValue(_binDaysMetadataKey, out var existingBinDays)) + { + binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); } - else if (clientSideResponse.RequestId == 1) + + binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); + + var remainingCalendarMetadata = metadata.GetValueOrDefault(_remainingCalendarsMetadataKey); + var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) + ? [] + : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + + if (remainingCalendarUrls.Count > 0) { - var calendarUrls = CalendarLinksRegex() - .Matches(clientSideResponse.Content) - .Select(match => NormalizeCalendarUrl(match.Groups["Url"].Value)) - .Distinct() - .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) - .ToList(); - - if (calendarUrls.Count == 0) + var nextCalendarUrl = remainingCalendarUrls.First(); + var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); + var nextMetadata = new Dictionary { - throw new InvalidOperationException("No calendar links found for the selected address."); - } + { _binDaysMetadataKey, SerialiseBinDaysMetadata(binDays) }, + }; - var remainingCalendars = calendarUrls.Skip(1).ToList(); - var metadata = new Dictionary(); - if (remainingCalendars.Count > 0) + if (nextRemainingCalendars.Count > 0) { - metadata.Add(RemainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); + nextMetadata.Add(_remainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); } var clientSideRequest = new ClientSideRequest { - RequestId = 2, - Url = calendarUrls.First(), + RequestId = clientSideResponse.RequestId + 1, + Url = nextCalendarUrl, Method = "GET", Headers = new() { @@ -216,195 +306,138 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client }, Options = new ClientSideOptions { - Metadata = metadata + Metadata = nextMetadata, }, }; var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; } - else if (clientSideResponse.RequestId >= 2) + + var binDayResults = binDays.Select(binDay => new BinDay { - var metadata = clientSideResponse.Options.Metadata; - var binDays = new List<(DateOnly Date, string Code)>(); + Date = binDay.Date, + Address = address, + Bins = GetBinsForCode(binDay.Code), + }).ToList(); - if (metadata.TryGetValue(BinDaysMetadataKey, out var existingBinDays)) - { - binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); - } + var getBinDaysResponseFinal = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDayResults), + }; - binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); + return getBinDaysResponseFinal; + } - var remainingCalendarMetadata = metadata.GetValueOrDefault(RemainingCalendarsMetadataKey); - var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) - ? [] - : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } - if (remainingCalendarUrls.Count > 0) - { - var nextCalendarUrl = remainingCalendarUrls.First(); - var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); - var nextMetadata = new Dictionary - { - { BinDaysMetadataKey, SerialiseBinDaysMetadata(binDays) } - }; + private static string NormalizeCalendarUrl(string calendarUrl) + { + if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return calendarUrl; + } - if (nextRemainingCalendars.Count > 0) - { - nextMetadata.Add(RemainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); - } + return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; + } - var clientSideRequest = new ClientSideRequest - { - RequestId = clientSideResponse.RequestId + 1, - Url = nextCalendarUrl, - Method = "GET", - Headers = new() - { - { "user-agent", Constants.UserAgent }, - }, - Options = new ClientSideOptions - { - Metadata = nextMetadata - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) + { + var yearMatch = CalendarYearRegex().Match(content); + if (!yearMatch.Success) + { + throw new InvalidOperationException("Calendar year not found in response."); + } - return getBinDaysResponse; - } + var year = int.Parse(yearMatch.Groups["year"].Value, CultureInfo.InvariantCulture); - var binDayResults = binDays.Select(binDay => new BinDay - { - Date = binDay.Date, - Address = address, - Bins = GetBinsForCode(binDay.Code) - }).ToList(); + var binDays = new List<(DateOnly Date, string Code)>(); + var monthMatches = CalendarMonthRegex().Matches(content); - var getBinDaysResponseFinal = new GetBinDaysResponse + foreach (Match monthMatch in monthMatches) + { + var monthName = monthMatch.Groups["month"].Value.Trim(); + var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; + var daysHtml = monthMatch.Groups["days"].Value; + + foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) + { + var className = dayMatch.Groups["class"].Value.Trim(); + var dayText = dayMatch.Groups["day"].Value.Trim(); + + if (string.IsNullOrWhiteSpace(dayText) || + string.IsNullOrWhiteSpace(className) || + string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) { - BinDays = ProcessingUtilities.ProcessBinDays(binDayResults), - }; + continue; + } - return getBinDaysResponseFinal; - } + var date = DateOnly.ParseExact( + $"{dayText}-{monthNumber}-{year}", + "d-M-yyyy", + CultureInfo.InvariantCulture + ); - // Throw exception for invalid request - throw new InvalidOperationException("Invalid client-side request."); + binDays.Add((date, className)); + } } - private static string NormalizeCalendarUrl(string calendarUrl) - { - if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) - { - return calendarUrl; - } + return binDays.AsReadOnly(); + } - return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; - } + private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) + { + var binDays = new List<(DateOnly Date, string Code)>(); + var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) + foreach (var entry in entries) { - var yearMatch = CalendarYearRegex().Match(content); - if (!yearMatch.Success) + var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length != 2) { - throw new InvalidOperationException("Calendar year not found in response."); + continue; } - var year = int.Parse(yearMatch.Groups["Year"].Value, CultureInfo.InvariantCulture); - - var binDays = new List<(DateOnly Date, string Code)>(); - var monthMatches = CalendarMonthRegex().Matches(content); - - foreach (Match monthMatch in monthMatches) - { - var monthName = monthMatch.Groups["Month"].Value.Trim(); - var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; - var daysHtml = monthMatch.Groups["Days"].Value; - - foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) - { - var className = dayMatch.Groups["Class"].Value.Trim(); - var dayText = dayMatch.Groups["Day"].Value.Trim(); - - if (string.IsNullOrWhiteSpace(dayText) || - string.IsNullOrWhiteSpace(className) || - string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) - { - continue; - } + var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var code = parts[1]; - var date = DateOnly.ParseExact( - $"{dayText}-{monthNumber}-{year}", - "d-M-yyyy", - CultureInfo.InvariantCulture - ); - - binDays.Add((date, className)); - } - } - - return binDays.AsReadOnly(); + binDays.Add((date, code)); } - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) - { - var binDays = new List<(DateOnly Date, string Code)>(); - var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - - foreach (var entry in entries) - { - var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - if (parts.Length != 2) - { - continue; - } + return binDays.AsReadOnly(); + } - var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); - var code = parts[1]; + private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) + { + return string.Join( + "|", + binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") + ); + } - binDays.Add((date, code)); - } + private IReadOnlyCollection GetBinsForCode(string code) + { + var upperCode = code.ToUpperInvariant(); - return binDays.AsReadOnly(); + if (upperCode == "B") + { + return _binTypes.Where(bin => bin.Name == _gardenWasteName).ToList().AsReadOnly(); } - - private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) + else if (upperCode == "GPOC") { - return string.Join( - "|", - binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") - ); + return _binTypes.Where(bin => bin.Name != _gardenWasteName).ToList().AsReadOnly(); } - - private IReadOnlyCollection GetBinsForCode(string code) + else if (upperCode == "GBPOC") { - var upperCode = code.ToUpperInvariant(); - - if (upperCode == "B") - { - return _binTypes.Where(bin => bin.Name == GardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GPOC") - { - return _binTypes.Where(bin => bin.Name != GardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GBPOC") - { - return _binTypes.ToList().AsReadOnly(); - } - - throw new InvalidOperationException($"Unknown bin code: {code}"); + return _binTypes.ToList().AsReadOnly(); } - [GeneratedRegex("\\s+")] - private static partial Regex MyRegex(); + throw new InvalidOperationException($"Unknown bin code: {code}"); } } diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs index 68429c02..e885ae7a 100644 --- a/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/TheMorayCouncilTests.cs @@ -1,37 +1,36 @@ -namespace BinDays.Api.IntegrationTests.Collectors.Councils +namespace BinDays.Api.IntegrationTests.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors; +using BinDays.Api.Collectors.Collectors.Councils; +using BinDays.Api.Collectors.Services; +using BinDays.Api.IntegrationTests.Helpers; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +public class TheMorayCouncilTests { - using BinDays.Api.Collectors.Collectors; - using BinDays.Api.Collectors.Collectors.Councils; - using BinDays.Api.Collectors.Services; - using BinDays.Api.IntegrationTests.Helpers; - using System.Threading.Tasks; - using Xunit; - using Xunit.Abstractions; + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new TheMorayCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; - public class TheMorayCouncilTests + public TheMorayCouncilTests(ITestOutputHelper outputHelper) { - private readonly IntegrationTestClient _client; - private static readonly ICollector _collector = new TheMorayCouncil(); - private readonly CollectorService _collectorService = new([_collector]); - private readonly ITestOutputHelper _outputHelper; - - public TheMorayCouncilTests(ITestOutputHelper outputHelper) - { - _outputHelper = outputHelper; - _client = new IntegrationTestClient(outputHelper); - } + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } - [Theory] - [InlineData("IV30 6LH")] - public async Task GetBinDaysTest(string postcode) - { - await TestSteps.EndToEnd( - _client, - _collectorService, - _collector, - postcode, - _outputHelper - ); - } + [Theory] + [InlineData("IV30 6LH")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); } } From 582c38969c106d3775e12cbf3e9a2ad976389df4 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Sun, 18 Jan 2026 15:03:01 +0000 Subject: [PATCH 5/6] Address PR #93 code review feedback - Simplified bin Keys to single-letter codes only - Removed GetBinsForCode method in favor of inline matching - Removed URL normalization logic - Removed defensive conditionals for metadata - Limited calendar processing to 3 months - Improved code readability by breaking long lines - Extracted bins LINQ query to separate variable Co-Authored-By: Claude Sonnet 4.5 --- .../Collectors/Councils/TheMorayCouncil.cs | 314 +++++++----------- 1 file changed, 123 insertions(+), 191 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs index 92f76d84..b8818299 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/TheMorayCouncil.cs @@ -8,7 +8,6 @@ namespace BinDays.Api.Collectors.Collectors.Councils; using System.Globalization; using System.Linq; using System.Text.RegularExpressions; -using System.Web; /// /// Collector implementation for The Moray Council. @@ -19,7 +18,7 @@ internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector public string Name => "The Moray Council"; /// - public Uri WebsiteUrl => new("http://www.moray.gov.uk/"); + public Uri WebsiteUrl => new("https://www.moray.gov.uk/"); /// public override string GovUkId => "moray"; @@ -31,76 +30,41 @@ internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector [ new() { - Name = _generalWasteName, + Name = "General Waste", Colour = BinColour.Green, Type = BinType.Bin, - Keys = [ "Green bin", "General waste" ], + Keys = [ "G" ], }, new() { - Name = _gardenWasteName, + Name = "Garden Waste", Colour = BinColour.Brown, Type = BinType.Bin, - Keys = [ "Brown bin", "Garden" ], + Keys = [ "B" ], }, new() { - Name = _paperAndCardName, + Name = "Paper and Card", Colour = BinColour.Blue, Type = BinType.Bin, - Keys = [ "Blue bin", "Paper", "Card" ], + Keys = [ "P" ], }, new() { - Name = _plasticsAndCansName, + Name = "Plastics and Cans", Colour = BinColour.Purple, Type = BinType.Bin, - Keys = [ "Purple bin", "Cans", "Plastic" ], + Keys = [ "C" ], }, new() { - Name = _glassName, + Name = "Glass", Colour = BinColour.Orange, Type = BinType.Box, - Keys = [ "Orange box", "Glass" ], + Keys = [ "O" ], }, ]; - /// - /// Key for storing bin days in metadata. - /// - private const string _binDaysMetadataKey = "binDays"; - - /// - /// Name for the garden waste bin. - /// - private const string _gardenWasteName = "Garden Waste"; - - /// - /// Name for the general waste bin. - /// - private const string _generalWasteName = "General Waste"; - - /// - /// Name for the glass bin. - /// - private const string _glassName = "Glass"; - - /// - /// Name for the paper and card bin. - /// - private const string _paperAndCardName = "Paper and Card"; - - /// - /// Name for the plastics and cans bin. - /// - private const string _plasticsAndCansName = "Plastics and Cans"; - - /// - /// Key for storing remaining calendars in metadata. - /// - private const string _remainingCalendarsMetadataKey = "remainingCalendars"; - /// /// Regex for extracting addresses from the results page. /// @@ -116,13 +80,19 @@ internal sealed partial class TheMorayCouncil : GovUkCollectorBase, ICollector /// /// Regex for extracting calendar links. /// - [GeneratedRegex("href=['\\\"](?(?:https?://bindayfinder\\.moray\\.gov\\.uk/)?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", RegexOptions.IgnoreCase | RegexOptions.Singleline)] + [GeneratedRegex( + "href=['\\\"](?:https?://bindayfinder\\.moray\\.gov\\.uk/)?(?cal_(?\\d{4})_view\\.php\\?id=(?\\d+))['\\\"]", + RegexOptions.IgnoreCase | RegexOptions.Singleline + )] private static partial Regex CalendarLinksRegex(); /// /// Regex for extracting month blocks from the calendar. /// - [GeneratedRegex("

(?[^<]+)

.*?
(?.*?)
\\s*", RegexOptions.Singleline)] + [GeneratedRegex( + "

(?[^<]+)

.*?
(?.*?)
\\s*", + RegexOptions.Singleline + )] private static partial Regex CalendarMonthRegex(); /// @@ -143,12 +113,10 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl // Prepare client-side request for getting addresses if (clientSideResponse == null) { - var formattedPostcode = HttpUtility.UrlEncode(ProcessingUtilities.FormatPostcode(postcode)); - var clientSideRequest = new ClientSideRequest { RequestId = 1, - Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={formattedPostcode}", + Url = $"https://bindayfinder.moray.gov.uk/refuse_roads.php?strname=&pcode={postcode}", Method = "GET", Headers = new() { @@ -166,9 +134,10 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl // Process addresses from response else if (clientSideResponse.RequestId == 1) { - var addressMatches = AddressesRegex().Matches(clientSideResponse.Content); + var addressMatches = AddressesRegex().Matches(clientSideResponse.Content)!; var addresses = new List
(); + // Iterate through each address, and create a new address object foreach (Match addressMatch in addressMatches) { var property = WhitespaceRegex().Replace(addressMatch.Groups["address"].Value, " ").Trim(); @@ -219,14 +188,15 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client return getBinDaysResponse; } - // Process bin days from response + // Extract calendar links and prepare first calendar request else if (clientSideResponse.RequestId == 1) { - var calendarUrls = CalendarLinksRegex() - .Matches(clientSideResponse.Content) - .Select(match => NormalizeCalendarUrl(match.Groups["url"].Value)) - .Distinct() - .OrderBy(url => url, StringComparer.OrdinalIgnoreCase) + var calendarMatches = CalendarLinksRegex().Matches(clientSideResponse.Content)!; + var calendarUrls = calendarMatches + .Select(match => $"https://bindayfinder.moray.gov.uk/{match.Groups["url"].Value}") + .Distinct(StringComparer.OrdinalIgnoreCase) + .Order(StringComparer.OrdinalIgnoreCase) + .Take(3) .ToList(); if (calendarUrls.Count == 0) @@ -234,17 +204,19 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client throw new InvalidOperationException("No calendar links found for the selected address."); } - var remainingCalendars = calendarUrls.Skip(1).ToList(); - var metadata = new Dictionary(); - if (remainingCalendars.Count > 0) + var metadata = new Dictionary { - metadata.Add(_remainingCalendarsMetadataKey, string.Join(",", remainingCalendars)); - } + { "binDays", string.Empty }, + { + "remainingCalendars", + calendarUrls.Count > 1 ? string.Join(",", calendarUrls.Skip(1)) : string.Empty + }, + }; var clientSideRequest = new ClientSideRequest { RequestId = 2, - Url = calendarUrls.First(), + Url = calendarUrls[0], Method = "GET", Headers = new() { @@ -263,42 +235,95 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client return getBinDaysResponse; } - // Process bin days from response + // Parse calendar and either request next calendar or return bin days else if (clientSideResponse.RequestId >= 2) { var metadata = clientSideResponse.Options.Metadata; var binDays = new List<(DateOnly Date, string Code)>(); - if (metadata.TryGetValue(_binDaysMetadataKey, out var existingBinDays)) + // Parse any existing bin days from metadata + var entries = metadata["binDays"].Split( + "|", + StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries + ); + + foreach (var entry in entries) { - binDays.AddRange(ParseBinDaysMetadata(existingBinDays)); + var parts = entry.Split( + ":", + StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries + ); + var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var code = parts[1]; + + binDays.Add((date, code)); } - binDays.AddRange(ParseCalendarContent(clientSideResponse.Content)); + // Parse current calendar content + var yearMatch = CalendarYearRegex().Match(clientSideResponse.Content); + if (!yearMatch.Success) + { + throw new InvalidOperationException("Calendar year not found in response."); + } - var remainingCalendarMetadata = metadata.GetValueOrDefault(_remainingCalendarsMetadataKey); - var remainingCalendarUrls = string.IsNullOrWhiteSpace(remainingCalendarMetadata) - ? [] - : remainingCalendarMetadata.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + var year = int.Parse(yearMatch.Groups["year"].Value, CultureInfo.InvariantCulture); + var monthMatches = CalendarMonthRegex().Matches(clientSideResponse.Content)!; - if (remainingCalendarUrls.Count > 0) + foreach (Match monthMatch in monthMatches) { - var nextCalendarUrl = remainingCalendarUrls.First(); - var nextRemainingCalendars = remainingCalendarUrls.Skip(1).ToList(); - var nextMetadata = new Dictionary - { - { _binDaysMetadataKey, SerialiseBinDaysMetadata(binDays) }, - }; + var monthName = monthMatch.Groups["month"].Value.Trim(); + var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; + var daysHtml = monthMatch.Groups["days"].Value; + var dayMatches = CalendarDayRegex().Matches(daysHtml)!; - if (nextRemainingCalendars.Count > 0) + foreach (Match dayMatch in dayMatches) { - nextMetadata.Add(_remainingCalendarsMetadataKey, string.Join(",", nextRemainingCalendars)); + var className = dayMatch.Groups["class"].Value.Trim(); + var dayText = dayMatch.Groups["day"].Value.Trim(); + + if (string.IsNullOrWhiteSpace(dayText) || + string.IsNullOrWhiteSpace(className) || + string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + var date = DateOnly.ParseExact( + $"{dayText}-{monthNumber}-{year}", + "d-M-yyyy", + CultureInfo.InvariantCulture + ); + + binDays.Add((date, className)); } + } + + // Check if there are more calendars to process + var remainingCalendars = metadata["remainingCalendars"] + .Split( + ",", + StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries + ) + .ToList(); + + if (remainingCalendars.Count > 0) + { + var nextMetadata = new Dictionary + { + { + "binDays", + string.Join("|", binDays.Select(bd => $"{bd.Date:yyyy-MM-dd}:{bd.Code}")) + }, + { + "remainingCalendars", + remainingCalendars.Count > 1 ? string.Join(",", remainingCalendars.Skip(1)) : string.Empty + }, + }; var clientSideRequest = new ClientSideRequest { RequestId = clientSideResponse.RequestId + 1, - Url = nextCalendarUrl, + Url = remainingCalendars[0], Method = "GET", Headers = new() { @@ -318,11 +343,22 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client return getBinDaysResponse; } - var binDayResults = binDays.Select(binDay => new BinDay + // All calendars processed, return bin days + var binDayResults = binDays.Select(binDay => { - Date = binDay.Date, - Address = address, - Bins = GetBinsForCode(binDay.Code), + var bins = binDay.Code.ToCharArray() + .SelectMany(c => _binTypes.Where(bin => bin.Keys.Any(key => + string.Equals(key, c.ToString(), StringComparison.OrdinalIgnoreCase) + ))) + .Distinct() + .ToList(); + + return new BinDay + { + Date = binDay.Date, + Address = address, + Bins = bins, + }; }).ToList(); var getBinDaysResponseFinal = new GetBinDaysResponse @@ -336,108 +372,4 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client // Throw exception for invalid request throw new InvalidOperationException("Invalid client-side request."); } - - private static string NormalizeCalendarUrl(string calendarUrl) - { - if (calendarUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) - { - return calendarUrl; - } - - return $"https://bindayfinder.moray.gov.uk/{calendarUrl.TrimStart('/')}"; - } - - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseCalendarContent(string content) - { - var yearMatch = CalendarYearRegex().Match(content); - if (!yearMatch.Success) - { - throw new InvalidOperationException("Calendar year not found in response."); - } - - var year = int.Parse(yearMatch.Groups["year"].Value, CultureInfo.InvariantCulture); - - var binDays = new List<(DateOnly Date, string Code)>(); - var monthMatches = CalendarMonthRegex().Matches(content); - - foreach (Match monthMatch in monthMatches) - { - var monthName = monthMatch.Groups["month"].Value.Trim(); - var monthNumber = DateTime.ParseExact(monthName, "MMMM", CultureInfo.InvariantCulture).Month; - var daysHtml = monthMatch.Groups["days"].Value; - - foreach (Match dayMatch in CalendarDayRegex().Matches(daysHtml)) - { - var className = dayMatch.Groups["class"].Value.Trim(); - var dayText = dayMatch.Groups["day"].Value.Trim(); - - if (string.IsNullOrWhiteSpace(dayText) || - string.IsNullOrWhiteSpace(className) || - string.Equals(className, "blank", StringComparison.OrdinalIgnoreCase)) - { - continue; - } - - var date = DateOnly.ParseExact( - $"{dayText}-{monthNumber}-{year}", - "d-M-yyyy", - CultureInfo.InvariantCulture - ); - - binDays.Add((date, className)); - } - } - - return binDays.AsReadOnly(); - } - - private static IReadOnlyCollection<(DateOnly Date, string Code)> ParseBinDaysMetadata(string metadata) - { - var binDays = new List<(DateOnly Date, string Code)>(); - var entries = metadata.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - - foreach (var entry in entries) - { - var parts = entry.Split(":", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); - if (parts.Length != 2) - { - continue; - } - - var date = DateOnly.ParseExact(parts[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); - var code = parts[1]; - - binDays.Add((date, code)); - } - - return binDays.AsReadOnly(); - } - - private static string SerialiseBinDaysMetadata(IEnumerable<(DateOnly Date, string Code)> binDays) - { - return string.Join( - "|", - binDays.Select(binDay => $"{binDay.Date:yyyy-MM-dd}:{binDay.Code}") - ); - } - - private IReadOnlyCollection GetBinsForCode(string code) - { - var upperCode = code.ToUpperInvariant(); - - if (upperCode == "B") - { - return _binTypes.Where(bin => bin.Name == _gardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GPOC") - { - return _binTypes.Where(bin => bin.Name != _gardenWasteName).ToList().AsReadOnly(); - } - else if (upperCode == "GBPOC") - { - return _binTypes.ToList().AsReadOnly(); - } - - throw new InvalidOperationException($"Unknown bin code: {code}"); - } } From b44cbc4c0c9278d2c0a04b1644687a1ddfbd9143 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Sun, 18 Jan 2026 15:06:35 +0000 Subject: [PATCH 6/6] Remove --no-restore from implement collector prompt. --- .agent/prompts/implement-collector.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.agent/prompts/implement-collector.md b/.agent/prompts/implement-collector.md index 126718ce..d3d879d9 100644 --- a/.agent/prompts/implement-collector.md +++ b/.agent/prompts/implement-collector.md @@ -194,7 +194,7 @@ namespace BinDays.Api.IntegrationTests.Collectors.Councils ### 4.1 Run Tests ```bash -dotnet test --no-restore --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj +dotnet test --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj ``` ### 4.2 Debug Failures @@ -203,7 +203,7 @@ If tests fail, enable HTTP logging: ```bash export BINDAYS_ENABLE_HTTP_LOGGING=true -dotnet test --no-restore --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj +dotnet test --filter "FullyQualifiedName~{CouncilName}Tests.GetBinDaysTest" --logger "console;verbosity=detailed" BinDays.Api.IntegrationTests/BinDays.Api.IntegrationTests.csproj ``` Compare the logged requests against the HAR file: