diff --git a/BinDays.Api.Collectors/Collectors/Councils/TamesideMetropolitanBoroughCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/TamesideMetropolitanBoroughCouncil.cs new file mode 100644 index 00000000..5ee6213f --- /dev/null +++ b/BinDays.Api.Collectors/Collectors/Councils/TamesideMetropolitanBoroughCouncil.cs @@ -0,0 +1,354 @@ +namespace BinDays.Api.Collectors.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors.Vendors; +using BinDays.Api.Collectors.Models; +using BinDays.Api.Collectors.Utilities; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text.RegularExpressions; + +/// +/// Collector implementation for Tameside Metropolitan Borough Council. +/// +internal sealed partial class TamesideMetropolitanBoroughCouncil : GovUkCollectorBase, ICollector +{ + /// + public string Name => "Tameside Metropolitan Borough Council"; + + /// + public Uri WebsiteUrl => new("https://www.tameside.gov.uk/"); + + /// + public override string GovUkId => "tameside"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = + [ + new() + { + Name = "General Waste", + Colour = BinColour.Green, + Keys = [ "green_bin_icon" ], + }, + new() + { + Name = "Recycling", + Colour = BinColour.Black, + Keys = [ "black_bin_icon" ], + }, + new() + { + Name = "Paper", + Colour = BinColour.Blue, + Keys = [ "blue_bin_icon" ], + }, + new() + { + Name = "Garden Waste", + Colour = BinColour.Brown, + Keys = [ "brown_bin_icon" ], + }, + ]; + + /// + /// Regex for extracting addresses. + /// + [GeneratedRegex(@"[^""]+)"">\s*(?
[^<]+)\s*")] + private static partial Regex AddressRegex(); + + /// + /// Regex for extracting year sections. + /// + [GeneratedRegex(@"
\s*

(?\d{4})

(?.*?)
", RegexOptions.Singleline)] + private static partial Regex YearRegex(); + + /// + /// Regex for extracting month rows. + /// + [GeneratedRegex(@"\s*(?[^<]+)(?.*?)", RegexOptions.Singleline)] + private static partial Regex MonthRegex(); + + /// + /// Regex for extracting individual day cells. + /// + [GeneratedRegex(@"(?.*?)", RegexOptions.Singleline)] + private static partial Regex DayCellRegex(); + + /// + /// Regex for extracting the collection day. + /// + [GeneratedRegex(@"
(?\d+)", RegexOptions.Singleline)] + private static partial Regex DayRegex(); + + /// + /// Regex for extracting bin icons. + /// + [GeneratedRegex(@"alt=""(?[^""]+)""", RegexOptions.Singleline)] + private static partial Regex BinIconRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting session cookie + if (clientSideResponse == null) + { + var clientSideRequest = CreateSessionCookieRequest(); + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getAddressesResponse; + } + // Prepare client-side request for getting addresses + else if (clientSideResponse.RequestId == 1) + { + var setCookieHeader = clientSideResponse.Headers["set-cookie"]; + var sessionCookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie(setCookieHeader); + + var clientSideRequest = CreatePostcodeRequest(postcode, sessionCookie); + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 2) + { + // Iterate through each address, and create a new address object + var addresses = new List
(); + foreach (Match rawAddress in AddressRegex().Matches(clientSideResponse.Content)!) + { + var uid = rawAddress.Groups["uid"].Value.Trim(); + if (string.IsNullOrWhiteSpace(uid)) + { + continue; + } + + var address = new Address + { + Property = rawAddress.Groups["address"].Value.Trim(), + Postcode = postcode, + Uid = uid, + }; + + addresses.Add(address); + } + + var getAddressesResponse = new GetAddressesResponse + { + Addresses = [.. addresses], + }; + + return getAddressesResponse; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting session cookie + if (clientSideResponse == null) + { + var clientSideRequest = CreateSessionCookieRequest(); + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getBinDaysResponse; + } + // Prepare client-side request for confirming postcode + else if (clientSideResponse.RequestId == 1) + { + var setCookieHeader = clientSideResponse.Headers["set-cookie"]; + var sessionCookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie(setCookieHeader); + + var clientSideRequest = CreatePostcodeRequest(address.Postcode!, sessionCookie); + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getBinDaysResponse; + } + // Prepare client-side request for getting bin days + else if (clientSideResponse.RequestId == 2) + { + var formattedPostcode = ProcessingUtilities.FormatPostcode(address.Postcode!); + var setCookieHeader = clientSideResponse.Headers["set-cookie"]; + var sessionCookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie(setCookieHeader); + + Dictionary requestHeaders = new() + { + { "content-type", "application/x-www-form-urlencoded" }, + { "cookie", $"cookieconsent_dismissed=yes; {sessionCookie}" }, + { "user-agent", Constants.UserAgent }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + { "F03_I01_SelectAddress", address.Uid! }, + { "AdvanceSearch", "Continue" }, + { "F01_I02_Postcode", formattedPostcode }, + { "F01_I03_Street", string.Empty }, + { "F01_I04_Town", string.Empty }, + { "history", ",1,3," }, + }); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 3, + Url = "https://public.tameside.gov.uk/forms/bin-dates.asp", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest, + }; + + return getBinDaysResponse; + } + // Process bin days from response + else if (clientSideResponse.RequestId == 3) + { + var binDays = new List(); + + // Extract all year blocks from the response + var yearMatches = YearRegex().Matches(clientSideResponse.Content)!; + foreach (Match yearMatch in yearMatches) + { + var year = yearMatch.Groups["year"].Value; + var yearContent = yearMatch.Groups["content"].Value; + + // Extract all month rows from the year block + var monthMatches = MonthRegex().Matches(yearContent)!; + foreach (Match monthMatch in monthMatches) + { + var month = monthMatch.Groups["month"].Value.Trim(); + var cellsContent = monthMatch.Groups["cells"].Value; + + // Extract all day cells from the month row + var dayCellMatches = DayCellRegex().Matches(cellsContent)!; + foreach (Match dayMatch in dayCellMatches) + { + var cellContent = dayMatch.Groups["cell"].Value; + var dayRegexMatch = DayRegex().Match(cellContent); + var day = dayRegexMatch.Groups["day"].Value; + + if (string.IsNullOrWhiteSpace(day)) + { + continue; + } + + var date = DateOnly.ParseExact( + $"{day} {month} {year}", + "d MMMM yyyy", + CultureInfo.InvariantCulture, + DateTimeStyles.None + ); + + // Extract bin types from icons in the cell + var bins = new List(); + var binIconMatches = BinIconRegex().Matches(cellContent)!; + foreach (Match binIcon in binIconMatches) + { + var binIconValue = binIcon.Groups["bin"].Value; + var matchingBins = ProcessingUtilities.GetMatchingBins(_binTypes, binIconValue); + bins.AddRange(matchingBins); + } + + if (bins.Count == 0) + { + continue; + } + + var binDay = new BinDay + { + Date = date, + Address = address, + Bins = [.. bins], + }; + + binDays.Add(binDay); + } + } + } + + var getBinDaysResponse = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDays), + }; + + return getBinDaysResponse; + } + + // Throw exception for invalid request + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + /// Creates a client-side request for getting the initial session cookie. + /// + private static ClientSideRequest CreateSessionCookieRequest() + { + return new ClientSideRequest + { + RequestId = 1, + Url = "https://public.tameside.gov.uk/forms/bin-dates.asp", + Method = "GET", + Headers = new() + { + { "user-agent", Constants.UserAgent }, + }, + }; + } + + /// + /// Creates a client-side request for posting the postcode. + /// + private static ClientSideRequest CreatePostcodeRequest(string postcode, string sessionCookie) + { + var formattedPostcode = ProcessingUtilities.FormatPostcode(postcode); + + Dictionary requestHeaders = new() + { + { "content-type", "application/x-www-form-urlencoded" }, + { "cookie", $"cookieconsent_dismissed=yes; {sessionCookie}" }, + { "user-agent", Constants.UserAgent }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + { "F01_I02_Postcode", formattedPostcode }, + { "F01_I03_Street", string.Empty }, + { "F01_I04_Town", string.Empty }, + { "Form_1", "Continue" }, + { "history", ",1," }, + }); + + return new ClientSideRequest + { + RequestId = 2, + Url = "https://public.tameside.gov.uk/forms/bin-dates.asp", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + }; + } +} diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/TamesideMetropolitanBoroughCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/TamesideMetropolitanBoroughCouncilTests.cs new file mode 100644 index 00000000..89c48729 --- /dev/null +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/TamesideMetropolitanBoroughCouncilTests.cs @@ -0,0 +1,36 @@ +namespace BinDays.Api.IntegrationTests.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors; +using BinDays.Api.Collectors.Collectors.Councils; +using BinDays.Api.Collectors.Services; +using BinDays.Api.IntegrationTests.Helpers; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +public class TamesideMetropolitanBoroughCouncilTests +{ + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new TamesideMetropolitanBoroughCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; + + public TamesideMetropolitanBoroughCouncilTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } + + [Theory] + [InlineData("M34 7TQ")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); + } +}