From 5ab840ad51c0f00da40db417d468c075cd5e8881 Mon Sep 17 00:00:00 2001 From: Moley-Bot Date: Fri, 16 Jan 2026 10:53:04 +0000 Subject: [PATCH 1/6] Add collector for BradfordCouncil Closes #60 Generated with Codex CLI by Moley-Bot --- .../Collectors/Councils/BradfordCouncil.cs | 807 ++++++++++++++++++ .../Councils/BradfordCouncilTests.cs | 36 + 2 files changed, 843 insertions(+) create mode 100644 BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs create mode 100644 BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs new file mode 100644 index 0000000..fd1a35f --- /dev/null +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -0,0 +1,807 @@ +namespace BinDays.Api.Collectors.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors.Vendors; +using BinDays.Api.Collectors.Models; +using BinDays.Api.Collectors.Utilities; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Net; +using System.Text.Json; +using System.Text.RegularExpressions; + +/// +/// Collector implementation for Bradford Council. +/// +internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector +{ + /// + public string Name => "Bradford Council"; + + /// + public Uri WebsiteUrl => new("https://www.bradford.gov.uk/"); + + /// + public override string GovUkId => "bradford"; + + private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; + private const string _formId = "/Forms/COLLECTIONDATES"; + private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; + private const string _findButton = "CTRL:2eDPaBQA:_"; + private const string _addressPageField = "CTID-Go9IHRTP-1-A"; + private const string _showCollectionsField = "CTID-PieY14aw-_"; + private const string _showButton = "CTRL:PieY14aw:_"; + private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + private const string _showCollectionsHidInputs = "ACTRL:PieY14aw:_,ACTRL:EstZqKRj:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = + [ + new() + { + Name = "General waste", + Colour = BinColour.Green, + Keys = [ "General waste" ], + }, + new() + { + Name = "Recycling waste", + Colour = BinColour.Grey, + Keys = [ "Recycling waste" ], + }, + new() + { + Name = "Garden waste", + Colour = BinColour.Brown, + Keys = [ "Garden waste" ], + }, + ]; + + /// + /// Regex for the formstack value from the HTML. + /// + [GeneratedRegex("name=\"formstack\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex FormstackRegex(); + + /// + /// Regex for the original request url from the HTML. + /// + [GeneratedRegex("name=\"origrequrl\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex OrigRequestUrlRegex(); + + /// + /// Regex for the ebs value from the HTML. + /// + [GeneratedRegex("name=\"ebs\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex EbsRegex(); + + /// + /// Regex for the page sequence value from the HTML. + /// + [GeneratedRegex("name=\"pageSeq\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex PageSequenceRegex(); + + /// + /// Regex for the page id value from the HTML. + /// + [GeneratedRegex("name=\"pageId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex PageIdRegex(); + + /// + /// Regex for the form state id value from the HTML. + /// + [GeneratedRegex("name=\"formStateId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex FormStateRegex(); + + /// + /// Regex for the addresses from the data. + /// + [GeneratedRegex("data-eb-input-name=\"(?CTRL:Go9IHRTP:\\d+:B\\.h)\"[^>]*>(?
[^<]+)", RegexOptions.IgnoreCase)] + private static partial Regex AddressesRegex(); + + /// + /// Regex for the bin sections on the collection page. + /// + [GeneratedRegex("

(?[^<]+)

(?.*?)((?=

)|\\z)", RegexOptions.Singleline)] + private static partial Regex BinSectionRegex(); + + /// + /// Regex for the bin day dates. + /// + [GeneratedRegex("Thu [A-Za-z]{3} \\d{2} \\d{4}")] + private static partial Regex DateRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting the initial token + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = _initialUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + }, + Options = new ClientSideOptions + { + FollowRedirects = false, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Prepare client-side request for loading the form + else if (clientSideResponse.RequestId == 1) + { + var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( + clientSideResponse.Headers["set-cookie"]); + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", cookie}, + }, + Options = new ClientSideOptions + { + Metadata = { + { "cookie", cookie }, + } + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Prepare client-side request for searching for addresses + else if (clientSideResponse.RequestId == 2) + { + var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( + clientSideResponse.Content); + + var metadata = new Dictionary + { + { "cookie", clientSideResponse.Options.Metadata["cookie"] }, + { "ebs", ebs }, + { "formstack", formstack }, + { "origRequestUrl", origRequestUrl }, + { "pageSeq", pageSequence }, + { "pageId", pageId }, + { "formStateId", formStateId }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", ebs}, + {"origrequrl", origRequestUrl}, + {"formstack", formstack}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", "CTID-2eDPaBQA-_"}, + {"ufsEndUser*", "1"}, + {"pageSeq", pageSequence}, + {"pageId", pageId}, + {"formStateId", formStateId}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, postcode}, + {"HID:inputs", _addressHidInputs}, + {_findButton, "Find address"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 3, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 3) + { + var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); + + var rawAddresses = AddressesRegex().Matches(addressesHtml)!; + + // Iterate through each address, and create a new address object + var addresses = new List
(); + foreach (Match rawAddress in rawAddresses) + { + var address = new Address + { + Property = WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()), + Postcode = postcode, + Uid = rawAddress.Groups["field"].Value, + }; + + addresses.Add(address); + } + + addresses.Sort((first, second) => + { + var firstNumber = GetAddressNumber(first); + var secondNumber = GetAddressNumber(second); + + var lengthComparison = secondNumber.Length.CompareTo(firstNumber.Length); + if (lengthComparison != 0) + { + return lengthComparison; + } + + var valueComparison = firstNumber.Value.CompareTo(secondNumber.Value); + if (valueComparison != 0) + { + return valueComparison; + } + + return string.Compare(first.Property, second.Property, StringComparison.Ordinal); + }); + + var getAddressesResponse = new GetAddressesResponse + { + Addresses = [.. addresses], + }; + + return getAddressesResponse; + } + + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting the initial token + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = _initialUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + }, + Options = new ClientSideOptions + { + FollowRedirects = false, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for loading the form + else if (clientSideResponse.RequestId == 1) + { + var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( + clientSideResponse.Headers["set-cookie"]); + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", cookie}, + }, + Options = new ClientSideOptions + { + Metadata = { + { "cookie", cookie }, + } + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for searching for addresses + else if (clientSideResponse.RequestId == 2) + { + var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( + clientSideResponse.Content); + + var metadata = new Dictionary + { + { "cookie", clientSideResponse.Options.Metadata["cookie"] }, + { "ebs", ebs }, + { "formstack", formstack }, + { "origRequestUrl", origRequestUrl }, + { "pageSeq", pageSequence }, + { "pageId", pageId }, + { "formStateId", formStateId }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", ebs}, + {"origrequrl", origRequestUrl}, + {"formstack", formstack}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", "CTID-2eDPaBQA-_"}, + {"ufsEndUser*", "1"}, + {"pageSeq", pageSequence}, + {"pageId", pageId}, + {"formStateId", formStateId}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, address.Postcode!}, + {"HID:inputs", _addressHidInputs}, + {_findButton, "Find address"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 3, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for selecting the address + else if (clientSideResponse.RequestId == 3) + { + var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); + var rawAddresses = AddressesRegex().Matches(addressesHtml)!; + + var addressFields = new List<(string Field, string Property)>(); + foreach (Match rawAddress in rawAddresses) + { + addressFields.Add((rawAddress.Groups["field"].Value, WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()))); + } + + var selectedAddress = addressFields.Find(x => + string.Equals(x.Field, address.Uid, StringComparison.OrdinalIgnoreCase)); + + if (selectedAddress == default) + { + selectedAddress = addressFields.Find(x => + string.Equals(x.Property, address.Property, StringComparison.OrdinalIgnoreCase)); + } + + if (selectedAddress == default) + { + throw new InvalidOperationException("Selected address not found."); + } + + var metadata = clientSideResponse.Options.Metadata; + + var requestBodyDictionary = new Dictionary + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _addressPageField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, address.Postcode!}, + }; + + // Iterate through each address, and create the request fields + foreach (var rawAddress in addressFields) + { + requestBodyDictionary.Add(rawAddress.Field, rawAddress == selectedAddress + ? rawAddress.Property + : string.Empty); + } + + var hidInputs = new List + { + "ICTRL:Q2YAUZ5b:_:A", + "ACTRL:2eDPaBQA:_", + }; + + foreach (var rawAddress in addressFields) + { + hidInputs.Add(rawAddress.Field.Replace("CTRL:", "ACTRL:")); + } + + hidInputs.Add("APAGE:E.h"); + hidInputs.Add("APAGE:B.h"); + hidInputs.Add("APAGE:N.h"); + hidInputs.Add("APAGE:P.h"); + hidInputs.Add("APAGE:S.h"); + hidInputs.Add("APAGE:R.h"); + + requestBodyDictionary.Add("HID:inputs", string.Join(",", hidInputs)); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(requestBodyDictionary); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 4, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for showing the collection dates + else if (clientSideResponse.RequestId == 4) + { + var metadata = clientSideResponse.Options.Metadata; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _showCollectionsField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {"HID:inputs", _showCollectionsHidInputs}, + {_showButton, "Show collection dates"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 5, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for navigating to the collection dates page + else if (clientSideResponse.RequestId == 5) + { + var metadata = clientSideResponse.Options.Metadata; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _showCollectionsField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {"HID:inputs", _showCollectionsHidInputs}, + {"ebReshow", "true"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 6, + Url = $"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + FollowRedirects = false, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for loading the collection dates page + else if (clientSideResponse.RequestId == 6) + { + var metadata = clientSideResponse.Options.Metadata; + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 7, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", metadata["cookie"]}, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Process bin days from the response + else if (clientSideResponse.RequestId == 7) + { + var binSections = BinSectionRegex().Matches(clientSideResponse.Content)!; + + // Iterate through each bin section, and create a new bin day object + var binDays = new List(); + foreach (Match binSection in binSections) + { + var service = binSection.Groups["service"].Value.Trim(); + var datesText = binSection.Groups["content"].Value; + + foreach (Match dateMatch in DateRegex().Matches(datesText)!) + { + var date = DateOnly.ParseExact( + dateMatch.Value, + "ddd MMM dd yyyy", + CultureInfo.InvariantCulture, + DateTimeStyles.None + ); + + var matchingBins = ProcessingUtilities.GetMatchingBins(_binTypes, service); + + var binDay = new BinDay + { + Date = date, + Address = address, + Bins = matchingBins, + }; + + binDays.Add(binDay); + } + } + + var getBinDaysResponse = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDays), + }; + + return getBinDaysResponse; + } + + throw new InvalidOperationException("Invalid client-side request."); + } + + private static (int Length, int Value) GetAddressNumber(Address address) + { + var digits = GetLeadingNumber(address.Property!); + + if (digits == null) + { + return (0, 0); + } + + return (digits.Length, int.Parse(digits, CultureInfo.InvariantCulture)); + } + + private static string? GetLeadingNumber(string property) + { + var digits = string.Empty; + + foreach (var character in property) + { + if (character == ',') + { + break; + } + + if (char.IsDigit(character)) + { + digits += character; + continue; + } + + if (!string.IsNullOrWhiteSpace(digits)) + { + break; + } + } + + return string.IsNullOrWhiteSpace(digits) ? null : digits; + } + + private static string BuildAbsoluteUrl(string relativeUrl) + { + if (relativeUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return relativeUrl; + } + + return $"https://onlineforms.bradford.gov.uk/ufs/{relativeUrl}"; + } + + private static (string Ebs, string Formstack, string OrigRequestUrl, string PageSequence, string PageId, string FormStateId) ParseFormValues( + string html) + { + var ebs = EbsRegex().Match(html).Groups["value"].Value; + var formstack = FormstackRegex().Match(html).Groups["value"].Value; + var origRequestUrl = OrigRequestUrlRegex().Match(html).Groups["url"].Value.Replace("&", "&"); + var pageSequence = PageSequenceRegex().Match(html).Groups["value"].Value; + var pageId = PageIdRegex().Match(html).Groups["value"].Value; + var formStateId = FormStateRegex().Match(html).Groups["value"].Value; + + return (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId); + } + + private static string ExtractUpdatedHtml(string content, string identifier) + { + using var jsonDoc = JsonDocument.Parse(content); + + if (jsonDoc.RootElement.TryGetProperty("updatedControls", out var updatedControls)) + { + foreach (var control in updatedControls.EnumerateArray()) + { + if (control.TryGetProperty("html", out var htmlProperty) && + htmlProperty.ValueKind == JsonValueKind.String && + htmlProperty.GetString()!.Contains(identifier, StringComparison.OrdinalIgnoreCase)) + { + return htmlProperty.GetString()!.Replace("\\\"", "\""); + } + } + } + + throw new InvalidOperationException("Could not find updated HTML content."); + } +} diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs new file mode 100644 index 0000000..0825ac2 --- /dev/null +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs @@ -0,0 +1,36 @@ +namespace BinDays.Api.IntegrationTests.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors; +using BinDays.Api.Collectors.Collectors.Councils; +using BinDays.Api.Collectors.Services; +using BinDays.Api.IntegrationTests.Helpers; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +public class BradfordCouncilTests +{ + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new BradfordCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; + + public BradfordCouncilTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } + + [Theory] + [InlineData("BD5 9ND")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); + } +} From 05abf70027fb463ed08199a2c9bfae56240a2c26 Mon Sep 17 00:00:00 2001 From: Moley-Bot Date: Fri, 16 Jan 2026 10:54:03 +0000 Subject: [PATCH 2/6] Auto-format code with dotnet format Formatted by Moley-Bot --- .../Collectors/Councils/BradfordCouncil.cs | 1614 ++++++++--------- .../Councils/BradfordCouncilTests.cs | 72 +- 2 files changed, 843 insertions(+), 843 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs index fd1a35f..01137c1 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -1,807 +1,807 @@ -namespace BinDays.Api.Collectors.Collectors.Councils; - -using BinDays.Api.Collectors.Collectors.Vendors; -using BinDays.Api.Collectors.Models; -using BinDays.Api.Collectors.Utilities; -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Net; -using System.Text.Json; -using System.Text.RegularExpressions; - -/// -/// Collector implementation for Bradford Council. -/// -internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector -{ - /// - public string Name => "Bradford Council"; - - /// - public Uri WebsiteUrl => new("https://www.bradford.gov.uk/"); - - /// - public override string GovUkId => "bradford"; - - private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; - private const string _formId = "/Forms/COLLECTIONDATES"; - private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; - private const string _findButton = "CTRL:2eDPaBQA:_"; - private const string _addressPageField = "CTID-Go9IHRTP-1-A"; - private const string _showCollectionsField = "CTID-PieY14aw-_"; - private const string _showButton = "CTRL:PieY14aw:_"; - private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; - private const string _showCollectionsHidInputs = "ACTRL:PieY14aw:_,ACTRL:EstZqKRj:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; - - /// - /// The list of bin types for this collector. - /// - private readonly IReadOnlyCollection _binTypes = - [ - new() - { - Name = "General waste", - Colour = BinColour.Green, - Keys = [ "General waste" ], - }, - new() - { - Name = "Recycling waste", - Colour = BinColour.Grey, - Keys = [ "Recycling waste" ], - }, - new() - { - Name = "Garden waste", - Colour = BinColour.Brown, - Keys = [ "Garden waste" ], - }, - ]; - - /// - /// Regex for the formstack value from the HTML. - /// - [GeneratedRegex("name=\"formstack\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex FormstackRegex(); - - /// - /// Regex for the original request url from the HTML. - /// - [GeneratedRegex("name=\"origrequrl\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex OrigRequestUrlRegex(); - - /// - /// Regex for the ebs value from the HTML. - /// - [GeneratedRegex("name=\"ebs\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex EbsRegex(); - - /// - /// Regex for the page sequence value from the HTML. - /// - [GeneratedRegex("name=\"pageSeq\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex PageSequenceRegex(); - - /// - /// Regex for the page id value from the HTML. - /// - [GeneratedRegex("name=\"pageId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex PageIdRegex(); - - /// - /// Regex for the form state id value from the HTML. - /// - [GeneratedRegex("name=\"formStateId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] - private static partial Regex FormStateRegex(); - - /// - /// Regex for the addresses from the data. - /// - [GeneratedRegex("data-eb-input-name=\"(?CTRL:Go9IHRTP:\\d+:B\\.h)\"[^>]*>(?
[^<]+)", RegexOptions.IgnoreCase)] - private static partial Regex AddressesRegex(); - - /// - /// Regex for the bin sections on the collection page. - /// - [GeneratedRegex("

(?[^<]+)

(?.*?)((?=

)|\\z)", RegexOptions.Singleline)] - private static partial Regex BinSectionRegex(); - - /// - /// Regex for the bin day dates. - /// - [GeneratedRegex("Thu [A-Za-z]{3} \\d{2} \\d{4}")] - private static partial Regex DateRegex(); - - /// - public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) - { - // Prepare client-side request for getting the initial token - if (clientSideResponse == null) - { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = _initialUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - }, - Options = new ClientSideOptions - { - FollowRedirects = false, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getAddressesResponse; - } - // Prepare client-side request for loading the form - else if (clientSideResponse.RequestId == 1) - { - var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( - clientSideResponse.Headers["set-cookie"]); - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 2, - Url = redirectUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - {"cookie", cookie}, - }, - Options = new ClientSideOptions - { - Metadata = { - { "cookie", cookie }, - } - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getAddressesResponse; - } - // Prepare client-side request for searching for addresses - else if (clientSideResponse.RequestId == 2) - { - var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( - clientSideResponse.Content); - - var metadata = new Dictionary - { - { "cookie", clientSideResponse.Options.Metadata["cookie"] }, - { "ebs", ebs }, - { "formstack", formstack }, - { "origRequestUrl", origRequestUrl }, - { "pageSeq", pageSequence }, - { "pageId", pageId }, - { "formStateId", formStateId }, - }; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", ebs}, - {"origrequrl", origRequestUrl}, - {"formstack", formstack}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, - {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"ufsEndUser*", "1"}, - {"pageSeq", pageSequence}, - {"pageId", pageId}, - {"formStateId", formStateId}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, - {_postcodeField, postcode}, - {"HID:inputs", _addressHidInputs}, - {_findButton, "Find address"}, - }); - - var requestHeaders = new Dictionary { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 3, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getAddressesResponse; - } - // Process addresses from response - else if (clientSideResponse.RequestId == 3) - { - var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); - - var rawAddresses = AddressesRegex().Matches(addressesHtml)!; - - // Iterate through each address, and create a new address object - var addresses = new List
(); - foreach (Match rawAddress in rawAddresses) - { - var address = new Address - { - Property = WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()), - Postcode = postcode, - Uid = rawAddress.Groups["field"].Value, - }; - - addresses.Add(address); - } - - addresses.Sort((first, second) => - { - var firstNumber = GetAddressNumber(first); - var secondNumber = GetAddressNumber(second); - - var lengthComparison = secondNumber.Length.CompareTo(firstNumber.Length); - if (lengthComparison != 0) - { - return lengthComparison; - } - - var valueComparison = firstNumber.Value.CompareTo(secondNumber.Value); - if (valueComparison != 0) - { - return valueComparison; - } - - return string.Compare(first.Property, second.Property, StringComparison.Ordinal); - }); - - var getAddressesResponse = new GetAddressesResponse - { - Addresses = [.. addresses], - }; - - return getAddressesResponse; - } - - throw new InvalidOperationException("Invalid client-side request."); - } - - /// - public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) - { - // Prepare client-side request for getting the initial token - if (clientSideResponse == null) - { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = _initialUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - }, - Options = new ClientSideOptions - { - FollowRedirects = false, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for loading the form - else if (clientSideResponse.RequestId == 1) - { - var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( - clientSideResponse.Headers["set-cookie"]); - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 2, - Url = redirectUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - {"cookie", cookie}, - }, - Options = new ClientSideOptions - { - Metadata = { - { "cookie", cookie }, - } - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for searching for addresses - else if (clientSideResponse.RequestId == 2) - { - var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( - clientSideResponse.Content); - - var metadata = new Dictionary - { - { "cookie", clientSideResponse.Options.Metadata["cookie"] }, - { "ebs", ebs }, - { "formstack", formstack }, - { "origRequestUrl", origRequestUrl }, - { "pageSeq", pageSequence }, - { "pageId", pageId }, - { "formStateId", formStateId }, - }; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", ebs}, - {"origrequrl", origRequestUrl}, - {"formstack", formstack}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, - {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"ufsEndUser*", "1"}, - {"pageSeq", pageSequence}, - {"pageId", pageId}, - {"formStateId", formStateId}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, - {_postcodeField, address.Postcode!}, - {"HID:inputs", _addressHidInputs}, - {_findButton, "Find address"}, - }); - - var requestHeaders = new Dictionary { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 3, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for selecting the address - else if (clientSideResponse.RequestId == 3) - { - var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); - var rawAddresses = AddressesRegex().Matches(addressesHtml)!; - - var addressFields = new List<(string Field, string Property)>(); - foreach (Match rawAddress in rawAddresses) - { - addressFields.Add((rawAddress.Groups["field"].Value, WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()))); - } - - var selectedAddress = addressFields.Find(x => - string.Equals(x.Field, address.Uid, StringComparison.OrdinalIgnoreCase)); - - if (selectedAddress == default) - { - selectedAddress = addressFields.Find(x => - string.Equals(x.Property, address.Property, StringComparison.OrdinalIgnoreCase)); - } - - if (selectedAddress == default) - { - throw new InvalidOperationException("Selected address not found."); - } - - var metadata = clientSideResponse.Options.Metadata; - - var requestBodyDictionary = new Dictionary - { - {"formid", _formId}, - {"ebs", metadata["ebs"]}, - {"origrequrl", metadata["origRequestUrl"]}, - {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, - {"PAGE:F", _addressPageField}, - {"ufsEndUser*", "1"}, - {"pageSeq", metadata["pageSeq"]}, - {"pageId", metadata["pageId"]}, - {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, - {_postcodeField, address.Postcode!}, - }; - - // Iterate through each address, and create the request fields - foreach (var rawAddress in addressFields) - { - requestBodyDictionary.Add(rawAddress.Field, rawAddress == selectedAddress - ? rawAddress.Property - : string.Empty); - } - - var hidInputs = new List - { - "ICTRL:Q2YAUZ5b:_:A", - "ACTRL:2eDPaBQA:_", - }; - - foreach (var rawAddress in addressFields) - { - hidInputs.Add(rawAddress.Field.Replace("CTRL:", "ACTRL:")); - } - - hidInputs.Add("APAGE:E.h"); - hidInputs.Add("APAGE:B.h"); - hidInputs.Add("APAGE:N.h"); - hidInputs.Add("APAGE:P.h"); - hidInputs.Add("APAGE:S.h"); - hidInputs.Add("APAGE:R.h"); - - requestBodyDictionary.Add("HID:inputs", string.Join(",", hidInputs)); - - var requestHeaders = new Dictionary { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(requestBodyDictionary); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 4, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for showing the collection dates - else if (clientSideResponse.RequestId == 4) - { - var metadata = clientSideResponse.Options.Metadata; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", metadata["ebs"]}, - {"origrequrl", metadata["origRequestUrl"]}, - {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, - {"PAGE:F", _showCollectionsField}, - {"ufsEndUser*", "1"}, - {"pageSeq", metadata["pageSeq"]}, - {"pageId", metadata["pageId"]}, - {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, - {"HID:inputs", _showCollectionsHidInputs}, - {_showButton, "Show collection dates"}, - }); - - var requestHeaders = new Dictionary { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 5, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for navigating to the collection dates page - else if (clientSideResponse.RequestId == 5) - { - var metadata = clientSideResponse.Options.Metadata; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", metadata["ebs"]}, - {"origrequrl", metadata["origRequestUrl"]}, - {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, - {"PAGE:F", _showCollectionsField}, - {"ufsEndUser*", "1"}, - {"pageSeq", metadata["pageSeq"]}, - {"pageId", metadata["pageId"]}, - {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, - {"HID:inputs", _showCollectionsHidInputs}, - {"ebReshow", "true"}, - }); - - var requestHeaders = new Dictionary { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 6, - Url = $"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebz={metadata["ebs"]}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - FollowRedirects = false, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Prepare client-side request for loading the collection dates page - else if (clientSideResponse.RequestId == 6) - { - var metadata = clientSideResponse.Options.Metadata; - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 7, - Url = redirectUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - {"cookie", metadata["cookie"]}, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest - }; - - return getBinDaysResponse; - } - // Process bin days from the response - else if (clientSideResponse.RequestId == 7) - { - var binSections = BinSectionRegex().Matches(clientSideResponse.Content)!; - - // Iterate through each bin section, and create a new bin day object - var binDays = new List(); - foreach (Match binSection in binSections) - { - var service = binSection.Groups["service"].Value.Trim(); - var datesText = binSection.Groups["content"].Value; - - foreach (Match dateMatch in DateRegex().Matches(datesText)!) - { - var date = DateOnly.ParseExact( - dateMatch.Value, - "ddd MMM dd yyyy", - CultureInfo.InvariantCulture, - DateTimeStyles.None - ); - - var matchingBins = ProcessingUtilities.GetMatchingBins(_binTypes, service); - - var binDay = new BinDay - { - Date = date, - Address = address, - Bins = matchingBins, - }; - - binDays.Add(binDay); - } - } - - var getBinDaysResponse = new GetBinDaysResponse - { - BinDays = ProcessingUtilities.ProcessBinDays(binDays), - }; - - return getBinDaysResponse; - } - - throw new InvalidOperationException("Invalid client-side request."); - } - - private static (int Length, int Value) GetAddressNumber(Address address) - { - var digits = GetLeadingNumber(address.Property!); - - if (digits == null) - { - return (0, 0); - } - - return (digits.Length, int.Parse(digits, CultureInfo.InvariantCulture)); - } - - private static string? GetLeadingNumber(string property) - { - var digits = string.Empty; - - foreach (var character in property) - { - if (character == ',') - { - break; - } - - if (char.IsDigit(character)) - { - digits += character; - continue; - } - - if (!string.IsNullOrWhiteSpace(digits)) - { - break; - } - } - - return string.IsNullOrWhiteSpace(digits) ? null : digits; - } - - private static string BuildAbsoluteUrl(string relativeUrl) - { - if (relativeUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) - { - return relativeUrl; - } - - return $"https://onlineforms.bradford.gov.uk/ufs/{relativeUrl}"; - } - - private static (string Ebs, string Formstack, string OrigRequestUrl, string PageSequence, string PageId, string FormStateId) ParseFormValues( - string html) - { - var ebs = EbsRegex().Match(html).Groups["value"].Value; - var formstack = FormstackRegex().Match(html).Groups["value"].Value; - var origRequestUrl = OrigRequestUrlRegex().Match(html).Groups["url"].Value.Replace("&", "&"); - var pageSequence = PageSequenceRegex().Match(html).Groups["value"].Value; - var pageId = PageIdRegex().Match(html).Groups["value"].Value; - var formStateId = FormStateRegex().Match(html).Groups["value"].Value; - - return (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId); - } - - private static string ExtractUpdatedHtml(string content, string identifier) - { - using var jsonDoc = JsonDocument.Parse(content); - - if (jsonDoc.RootElement.TryGetProperty("updatedControls", out var updatedControls)) - { - foreach (var control in updatedControls.EnumerateArray()) - { - if (control.TryGetProperty("html", out var htmlProperty) && - htmlProperty.ValueKind == JsonValueKind.String && - htmlProperty.GetString()!.Contains(identifier, StringComparison.OrdinalIgnoreCase)) - { - return htmlProperty.GetString()!.Replace("\\\"", "\""); - } - } - } - - throw new InvalidOperationException("Could not find updated HTML content."); - } -} +namespace BinDays.Api.Collectors.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors.Vendors; +using BinDays.Api.Collectors.Models; +using BinDays.Api.Collectors.Utilities; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Net; +using System.Text.Json; +using System.Text.RegularExpressions; + +/// +/// Collector implementation for Bradford Council. +/// +internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector +{ + /// + public string Name => "Bradford Council"; + + /// + public Uri WebsiteUrl => new("https://www.bradford.gov.uk/"); + + /// + public override string GovUkId => "bradford"; + + private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; + private const string _formId = "/Forms/COLLECTIONDATES"; + private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; + private const string _findButton = "CTRL:2eDPaBQA:_"; + private const string _addressPageField = "CTID-Go9IHRTP-1-A"; + private const string _showCollectionsField = "CTID-PieY14aw-_"; + private const string _showButton = "CTRL:PieY14aw:_"; + private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + private const string _showCollectionsHidInputs = "ACTRL:PieY14aw:_,ACTRL:EstZqKRj:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + + /// + /// The list of bin types for this collector. + /// + private readonly IReadOnlyCollection _binTypes = + [ + new() + { + Name = "General waste", + Colour = BinColour.Green, + Keys = [ "General waste" ], + }, + new() + { + Name = "Recycling waste", + Colour = BinColour.Grey, + Keys = [ "Recycling waste" ], + }, + new() + { + Name = "Garden waste", + Colour = BinColour.Brown, + Keys = [ "Garden waste" ], + }, + ]; + + /// + /// Regex for the formstack value from the HTML. + /// + [GeneratedRegex("name=\"formstack\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex FormstackRegex(); + + /// + /// Regex for the original request url from the HTML. + /// + [GeneratedRegex("name=\"origrequrl\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex OrigRequestUrlRegex(); + + /// + /// Regex for the ebs value from the HTML. + /// + [GeneratedRegex("name=\"ebs\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex EbsRegex(); + + /// + /// Regex for the page sequence value from the HTML. + /// + [GeneratedRegex("name=\"pageSeq\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex PageSequenceRegex(); + + /// + /// Regex for the page id value from the HTML. + /// + [GeneratedRegex("name=\"pageId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex PageIdRegex(); + + /// + /// Regex for the form state id value from the HTML. + /// + [GeneratedRegex("name=\"formStateId\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + private static partial Regex FormStateRegex(); + + /// + /// Regex for the addresses from the data. + /// + [GeneratedRegex("data-eb-input-name=\"(?CTRL:Go9IHRTP:\\d+:B\\.h)\"[^>]*>(?
[^<]+)", RegexOptions.IgnoreCase)] + private static partial Regex AddressesRegex(); + + /// + /// Regex for the bin sections on the collection page. + /// + [GeneratedRegex("

(?[^<]+)

(?.*?)((?=

)|\\z)", RegexOptions.Singleline)] + private static partial Regex BinSectionRegex(); + + /// + /// Regex for the bin day dates. + /// + [GeneratedRegex("Thu [A-Za-z]{3} \\d{2} \\d{4}")] + private static partial Regex DateRegex(); + + /// + public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting the initial token + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = _initialUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + }, + Options = new ClientSideOptions + { + FollowRedirects = false, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Prepare client-side request for loading the form + else if (clientSideResponse.RequestId == 1) + { + var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( + clientSideResponse.Headers["set-cookie"]); + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", cookie}, + }, + Options = new ClientSideOptions + { + Metadata = { + { "cookie", cookie }, + } + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Prepare client-side request for searching for addresses + else if (clientSideResponse.RequestId == 2) + { + var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( + clientSideResponse.Content); + + var metadata = new Dictionary + { + { "cookie", clientSideResponse.Options.Metadata["cookie"] }, + { "ebs", ebs }, + { "formstack", formstack }, + { "origRequestUrl", origRequestUrl }, + { "pageSeq", pageSequence }, + { "pageId", pageId }, + { "formStateId", formStateId }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", ebs}, + {"origrequrl", origRequestUrl}, + {"formstack", formstack}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", "CTID-2eDPaBQA-_"}, + {"ufsEndUser*", "1"}, + {"pageSeq", pageSequence}, + {"pageId", pageId}, + {"formStateId", formStateId}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, postcode}, + {"HID:inputs", _addressHidInputs}, + {_findButton, "Find address"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 3, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getAddressesResponse = new GetAddressesResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getAddressesResponse; + } + // Process addresses from response + else if (clientSideResponse.RequestId == 3) + { + var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); + + var rawAddresses = AddressesRegex().Matches(addressesHtml)!; + + // Iterate through each address, and create a new address object + var addresses = new List
(); + foreach (Match rawAddress in rawAddresses) + { + var address = new Address + { + Property = WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()), + Postcode = postcode, + Uid = rawAddress.Groups["field"].Value, + }; + + addresses.Add(address); + } + + addresses.Sort((first, second) => + { + var (Length, Value) = GetAddressNumber(first); + var secondNumber = GetAddressNumber(second); + + var lengthComparison = secondNumber.Length.CompareTo(Length); + if (lengthComparison != 0) + { + return lengthComparison; + } + + var valueComparison = Value.CompareTo(secondNumber.Value); + if (valueComparison != 0) + { + return valueComparison; + } + + return string.Compare(first.Property, second.Property, StringComparison.Ordinal); + }); + + var getAddressesResponse = new GetAddressesResponse + { + Addresses = [.. addresses], + }; + + return getAddressesResponse; + } + + throw new InvalidOperationException("Invalid client-side request."); + } + + /// + public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) + { + // Prepare client-side request for getting the initial token + if (clientSideResponse == null) + { + var clientSideRequest = new ClientSideRequest + { + RequestId = 1, + Url = _initialUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + }, + Options = new ClientSideOptions + { + FollowRedirects = false, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for loading the form + else if (clientSideResponse.RequestId == 1) + { + var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( + clientSideResponse.Headers["set-cookie"]); + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 2, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", cookie}, + }, + Options = new ClientSideOptions + { + Metadata = { + { "cookie", cookie }, + } + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for searching for addresses + else if (clientSideResponse.RequestId == 2) + { + var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( + clientSideResponse.Content); + + var metadata = new Dictionary + { + { "cookie", clientSideResponse.Options.Metadata["cookie"] }, + { "ebs", ebs }, + { "formstack", formstack }, + { "origRequestUrl", origRequestUrl }, + { "pageSeq", pageSequence }, + { "pageId", pageId }, + { "formStateId", formStateId }, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", ebs}, + {"origrequrl", origRequestUrl}, + {"formstack", formstack}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", "CTID-2eDPaBQA-_"}, + {"ufsEndUser*", "1"}, + {"pageSeq", pageSequence}, + {"pageId", pageId}, + {"formStateId", formStateId}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, address.Postcode!}, + {"HID:inputs", _addressHidInputs}, + {_findButton, "Find address"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 3, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for selecting the address + else if (clientSideResponse.RequestId == 3) + { + var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); + var rawAddresses = AddressesRegex().Matches(addressesHtml)!; + + var addressFields = new List<(string Field, string Property)>(); + foreach (Match rawAddress in rawAddresses) + { + addressFields.Add((rawAddress.Groups["field"].Value, WebUtility.HtmlDecode(rawAddress.Groups["address"].Value.Trim()))); + } + + var selectedAddress = addressFields.Find(x => + string.Equals(x.Field, address.Uid, StringComparison.OrdinalIgnoreCase)); + + if (selectedAddress == default) + { + selectedAddress = addressFields.Find(x => + string.Equals(x.Property, address.Property, StringComparison.OrdinalIgnoreCase)); + } + + if (selectedAddress == default) + { + throw new InvalidOperationException("Selected address not found."); + } + + var metadata = clientSideResponse.Options.Metadata; + + var requestBodyDictionary = new Dictionary + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _addressPageField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {_postcodeField, address.Postcode!}, + }; + + // Iterate through each address, and create the request fields + foreach (var rawAddress in addressFields) + { + requestBodyDictionary.Add(rawAddress.Field, rawAddress == selectedAddress + ? rawAddress.Property + : string.Empty); + } + + var hidInputs = new List + { + "ICTRL:Q2YAUZ5b:_:A", + "ACTRL:2eDPaBQA:_", + }; + + foreach (var (Field, Property) in addressFields) + { + hidInputs.Add(Field.Replace("CTRL:", "ACTRL:")); + } + + hidInputs.Add("APAGE:E.h"); + hidInputs.Add("APAGE:B.h"); + hidInputs.Add("APAGE:N.h"); + hidInputs.Add("APAGE:P.h"); + hidInputs.Add("APAGE:S.h"); + hidInputs.Add("APAGE:R.h"); + + requestBodyDictionary.Add("HID:inputs", string.Join(",", hidInputs)); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(requestBodyDictionary); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 4, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for showing the collection dates + else if (clientSideResponse.RequestId == 4) + { + var metadata = clientSideResponse.Options.Metadata; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _showCollectionsField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {"HID:inputs", _showCollectionsHidInputs}, + {_showButton, "Show collection dates"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 5, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for navigating to the collection dates page + else if (clientSideResponse.RequestId == 5) + { + var metadata = clientSideResponse.Options.Metadata; + + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() + { + {"formid", _formId}, + {"ebs", metadata["ebs"]}, + {"origrequrl", metadata["origRequestUrl"]}, + {"formstack", metadata["formstack"]}, + {"PAGE:E.h", string.Empty}, + {"PAGE:B.h", string.Empty}, + {"PAGE:N.h", string.Empty}, + {"PAGE:P.h", string.Empty}, + {"PAGE:S.h", string.Empty}, + {"PAGE:R.h", string.Empty}, + {"PAGE:D", string.Empty}, + {"PAGE:H", string.Empty}, + {"PAGE:X", "0"}, + {"PAGE:Y", "0"}, + {"PAGE:F", _showCollectionsField}, + {"ufsEndUser*", "1"}, + {"pageSeq", metadata["pageSeq"]}, + {"pageId", metadata["pageId"]}, + {"formStateId", metadata["formStateId"]}, + {"$USERVAR1", string.Empty}, + {"$USERVAR2", string.Empty}, + {"$USERVAR3", string.Empty}, + {"HID:inputs", _showCollectionsHidInputs}, + {"ebReshow", "true"}, + }); + + var requestHeaders = new Dictionary { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }; + + var clientSideRequest = new ClientSideRequest + { + RequestId = 6, + Url = $"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebz={metadata["ebs"]}", + Method = "POST", + Headers = requestHeaders, + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + FollowRedirects = false, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Prepare client-side request for loading the collection dates page + else if (clientSideResponse.RequestId == 6) + { + var metadata = clientSideResponse.Options.Metadata; + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + + var clientSideRequest = new ClientSideRequest + { + RequestId = 7, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", metadata["cookie"]}, + }, + }; + + var getBinDaysResponse = new GetBinDaysResponse + { + NextClientSideRequest = clientSideRequest + }; + + return getBinDaysResponse; + } + // Process bin days from the response + else if (clientSideResponse.RequestId == 7) + { + var binSections = BinSectionRegex().Matches(clientSideResponse.Content)!; + + // Iterate through each bin section, and create a new bin day object + var binDays = new List(); + foreach (Match binSection in binSections) + { + var service = binSection.Groups["service"].Value.Trim(); + var datesText = binSection.Groups["content"].Value; + + foreach (Match dateMatch in DateRegex().Matches(datesText)!) + { + var date = DateOnly.ParseExact( + dateMatch.Value, + "ddd MMM dd yyyy", + CultureInfo.InvariantCulture, + DateTimeStyles.None + ); + + var matchingBins = ProcessingUtilities.GetMatchingBins(_binTypes, service); + + var binDay = new BinDay + { + Date = date, + Address = address, + Bins = matchingBins, + }; + + binDays.Add(binDay); + } + } + + var getBinDaysResponse = new GetBinDaysResponse + { + BinDays = ProcessingUtilities.ProcessBinDays(binDays), + }; + + return getBinDaysResponse; + } + + throw new InvalidOperationException("Invalid client-side request."); + } + + private static (int Length, int Value) GetAddressNumber(Address address) + { + var digits = GetLeadingNumber(address.Property!); + + if (digits == null) + { + return (0, 0); + } + + return (digits.Length, int.Parse(digits, CultureInfo.InvariantCulture)); + } + + private static string? GetLeadingNumber(string property) + { + var digits = string.Empty; + + foreach (var character in property) + { + if (character == ',') + { + break; + } + + if (char.IsDigit(character)) + { + digits += character; + continue; + } + + if (!string.IsNullOrWhiteSpace(digits)) + { + break; + } + } + + return string.IsNullOrWhiteSpace(digits) ? null : digits; + } + + private static string BuildAbsoluteUrl(string relativeUrl) + { + if (relativeUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + return relativeUrl; + } + + return $"https://onlineforms.bradford.gov.uk/ufs/{relativeUrl}"; + } + + private static (string Ebs, string Formstack, string OrigRequestUrl, string PageSequence, string PageId, string FormStateId) ParseFormValues( + string html) + { + var ebs = EbsRegex().Match(html).Groups["value"].Value; + var formstack = FormstackRegex().Match(html).Groups["value"].Value; + var origRequestUrl = OrigRequestUrlRegex().Match(html).Groups["url"].Value.Replace("&", "&"); + var pageSequence = PageSequenceRegex().Match(html).Groups["value"].Value; + var pageId = PageIdRegex().Match(html).Groups["value"].Value; + var formStateId = FormStateRegex().Match(html).Groups["value"].Value; + + return (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId); + } + + private static string ExtractUpdatedHtml(string content, string identifier) + { + using var jsonDoc = JsonDocument.Parse(content); + + if (jsonDoc.RootElement.TryGetProperty("updatedControls", out var updatedControls)) + { + foreach (var control in updatedControls.EnumerateArray()) + { + if (control.TryGetProperty("html", out var htmlProperty) && + htmlProperty.ValueKind == JsonValueKind.String && + htmlProperty.GetString()!.Contains(identifier, StringComparison.OrdinalIgnoreCase)) + { + return htmlProperty.GetString()!.Replace("\\\"", "\""); + } + } + } + + throw new InvalidOperationException("Could not find updated HTML content."); + } +} diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs index 0825ac2..6061155 100644 --- a/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs @@ -1,36 +1,36 @@ -namespace BinDays.Api.IntegrationTests.Collectors.Councils; - -using BinDays.Api.Collectors.Collectors; -using BinDays.Api.Collectors.Collectors.Councils; -using BinDays.Api.Collectors.Services; -using BinDays.Api.IntegrationTests.Helpers; -using System.Threading.Tasks; -using Xunit; -using Xunit.Abstractions; - -public class BradfordCouncilTests -{ - private readonly IntegrationTestClient _client; - private static readonly ICollector _collector = new BradfordCouncil(); - private readonly CollectorService _collectorService = new([_collector]); - private readonly ITestOutputHelper _outputHelper; - - public BradfordCouncilTests(ITestOutputHelper outputHelper) - { - _outputHelper = outputHelper; - _client = new IntegrationTestClient(outputHelper); - } - - [Theory] - [InlineData("BD5 9ND")] - public async Task GetBinDaysTest(string postcode) - { - await TestSteps.EndToEnd( - _client, - _collectorService, - _collector, - postcode, - _outputHelper - ); - } -} +namespace BinDays.Api.IntegrationTests.Collectors.Councils; + +using BinDays.Api.Collectors.Collectors; +using BinDays.Api.Collectors.Collectors.Councils; +using BinDays.Api.Collectors.Services; +using BinDays.Api.IntegrationTests.Helpers; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +public class BradfordCouncilTests +{ + private readonly IntegrationTestClient _client; + private static readonly ICollector _collector = new BradfordCouncil(); + private readonly CollectorService _collectorService = new([_collector]); + private readonly ITestOutputHelper _outputHelper; + + public BradfordCouncilTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + _client = new IntegrationTestClient(outputHelper); + } + + [Theory] + [InlineData("BD5 9ND")] + public async Task GetBinDaysTest(string postcode) + { + await TestSteps.EndToEnd( + _client, + _collectorService, + _collector, + postcode, + _outputHelper + ); + } +} From 57081fb1678472bb69aa60c32236e1aca41bce34 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Wed, 4 Feb 2026 18:49:34 +0000 Subject: [PATCH 3/6] Removed null/empty/default form data. --- .../Collectors/Councils/BradfordCouncil.cs | 68 ------------------- 1 file changed, 68 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs index 01137c1..5d8ada6 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -194,24 +194,10 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl {"ebs", ebs}, {"origrequrl", origRequestUrl}, {"formstack", formstack}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"ufsEndUser*", "1"}, {"pageSeq", pageSequence}, {"pageId", pageId}, {"formStateId", formStateId}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, {_postcodeField, postcode}, {"HID:inputs", _addressHidInputs}, {_findButton, "Find address"}, @@ -376,24 +362,10 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebs", ebs}, {"origrequrl", origRequestUrl}, {"formstack", formstack}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"ufsEndUser*", "1"}, {"pageSeq", pageSequence}, {"pageId", pageId}, {"formStateId", formStateId}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, {_postcodeField, address.Postcode!}, {"HID:inputs", _addressHidInputs}, {_findButton, "Find address"}, @@ -459,24 +431,10 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebs", metadata["ebs"]}, {"origrequrl", metadata["origRequestUrl"]}, {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, {"PAGE:F", _addressPageField}, - {"ufsEndUser*", "1"}, {"pageSeq", metadata["pageSeq"]}, {"pageId", metadata["pageId"]}, {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, {_postcodeField, address.Postcode!}, }; @@ -547,24 +505,11 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebs", metadata["ebs"]}, {"origrequrl", metadata["origRequestUrl"]}, {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, {"PAGE:F", _showCollectionsField}, {"ufsEndUser*", "1"}, {"pageSeq", metadata["pageSeq"]}, {"pageId", metadata["pageId"]}, {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, {"HID:inputs", _showCollectionsHidInputs}, {_showButton, "Show collection dates"}, }); @@ -606,24 +551,11 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebs", metadata["ebs"]}, {"origrequrl", metadata["origRequestUrl"]}, {"formstack", metadata["formstack"]}, - {"PAGE:E.h", string.Empty}, - {"PAGE:B.h", string.Empty}, - {"PAGE:N.h", string.Empty}, - {"PAGE:P.h", string.Empty}, - {"PAGE:S.h", string.Empty}, - {"PAGE:R.h", string.Empty}, - {"PAGE:D", string.Empty}, - {"PAGE:H", string.Empty}, - {"PAGE:X", "0"}, - {"PAGE:Y", "0"}, {"PAGE:F", _showCollectionsField}, {"ufsEndUser*", "1"}, {"pageSeq", metadata["pageSeq"]}, {"pageId", metadata["pageId"]}, {"formStateId", metadata["formStateId"]}, - {"$USERVAR1", string.Empty}, - {"$USERVAR2", string.Empty}, - {"$USERVAR3", string.Empty}, {"HID:inputs", _showCollectionsHidInputs}, {"ebReshow", "true"}, }); From a38cdee21545ef35e5134c6e94e229e866bd5778 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Wed, 4 Feb 2026 19:31:32 +0000 Subject: [PATCH 4/6] Resolve PR #138 code review comments This commit addresses all unresolved PR review comments: - Add XML documentation to all constant fields - Inline single-use constants (_addressPageField, _showButton) - Move bin types field before constants (style guide ordering) - Add trailing commas to multi-line initializers - Convert dictionaries to target-typed new() syntax - Add XML documentation to all helper methods - Improve BuildAbsoluteUrl to use Uri.IsWellFormedUriString - Verify closing brace formatting with dotnet format Note: Address sorting logic was kept as removing it breaks functionality. Tests select the first address, and API order may not be valid without sorting. All tests passing. Co-Authored-By: Claude Sonnet 4.5 --- .../Collectors/Councils/BradfordCouncil.cs | 118 +++++++++++++----- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs index 5d8ada6..8452c16 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -24,16 +24,6 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector /// public override string GovUkId => "bradford"; - private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; - private const string _formId = "/Forms/COLLECTIONDATES"; - private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; - private const string _findButton = "CTRL:2eDPaBQA:_"; - private const string _addressPageField = "CTID-Go9IHRTP-1-A"; - private const string _showCollectionsField = "CTID-PieY14aw-_"; - private const string _showButton = "CTRL:PieY14aw:_"; - private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; - private const string _showCollectionsHidInputs = "ACTRL:PieY14aw:_,ACTRL:EstZqKRj:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; - /// /// The list of bin types for this collector. /// @@ -59,6 +49,41 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector }, ]; + /// + /// The initial URL for the bin collection dates form. + /// + private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; + + /// + /// The form identifier for the collection dates form. + /// + private const string _formId = "/Forms/COLLECTIONDATES"; + + /// + /// The field identifier for the postcode input. + /// + private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; + + /// + /// The button identifier for finding addresses. + /// + private const string _findButton = "CTRL:2eDPaBQA:_"; + + /// + /// The field identifier for showing collections. + /// + private const string _showCollectionsField = "CTID-PieY14aw-_"; + + /// + /// The hidden inputs for the address search request. + /// + private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + + /// + /// The hidden inputs for the show collections request. + /// + private const string _showCollectionsHidInputs = "ACTRL:PieY14aw:_,ACTRL:EstZqKRj:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; + /// /// Regex for the formstack value from the HTML. /// @@ -135,7 +160,7 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl var getAddressesResponse = new GetAddressesResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getAddressesResponse; @@ -160,13 +185,13 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl { Metadata = { { "cookie", cookie }, - } + }, }, }; var getAddressesResponse = new GetAddressesResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getAddressesResponse; @@ -177,7 +202,7 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( clientSideResponse.Content); - var metadata = new Dictionary + Dictionary metadata = new() { { "cookie", clientSideResponse.Options.Metadata["cookie"] }, { "ebs", ebs }, @@ -203,7 +228,7 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl {_findButton, "Find address"}, }); - var requestHeaders = new Dictionary { + Dictionary requestHeaders = new() { {"user-agent", Constants.UserAgent}, {"content-type", "application/x-www-form-urlencoded"}, {"cookie", metadata["cookie"]}, @@ -224,7 +249,7 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl var getAddressesResponse = new GetAddressesResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getAddressesResponse; @@ -303,7 +328,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -328,13 +353,13 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client { Metadata = { { "cookie", cookie }, - } + }, }, }; var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -345,7 +370,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( clientSideResponse.Content); - var metadata = new Dictionary + Dictionary metadata = new() { { "cookie", clientSideResponse.Options.Metadata["cookie"] }, { "ebs", ebs }, @@ -371,7 +396,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {_findButton, "Find address"}, }); - var requestHeaders = new Dictionary { + Dictionary requestHeaders = new() { {"user-agent", Constants.UserAgent}, {"content-type", "application/x-www-form-urlencoded"}, {"cookie", metadata["cookie"]}, @@ -392,7 +417,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -425,13 +450,13 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var metadata = clientSideResponse.Options.Metadata; - var requestBodyDictionary = new Dictionary + Dictionary requestBodyDictionary = new() { {"formid", _formId}, {"ebs", metadata["ebs"]}, {"origrequrl", metadata["origRequestUrl"]}, {"formstack", metadata["formstack"]}, - {"PAGE:F", _addressPageField}, + {"PAGE:F", "CTID-Go9IHRTP-1-A"}, {"pageSeq", metadata["pageSeq"]}, {"pageId", metadata["pageId"]}, {"formStateId", metadata["formStateId"]}, @@ -466,7 +491,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client requestBodyDictionary.Add("HID:inputs", string.Join(",", hidInputs)); - var requestHeaders = new Dictionary { + Dictionary requestHeaders = new() { {"user-agent", Constants.UserAgent}, {"content-type", "application/x-www-form-urlencoded"}, {"cookie", metadata["cookie"]}, @@ -489,7 +514,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -511,10 +536,10 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"pageId", metadata["pageId"]}, {"formStateId", metadata["formStateId"]}, {"HID:inputs", _showCollectionsHidInputs}, - {_showButton, "Show collection dates"}, + {"CTRL:PieY14aw:_", "Show collection dates"}, }); - var requestHeaders = new Dictionary { + Dictionary requestHeaders = new() { {"user-agent", Constants.UserAgent}, {"content-type", "application/x-www-form-urlencoded"}, {"cookie", metadata["cookie"]}, @@ -535,7 +560,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -560,7 +585,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebReshow", "true"}, }); - var requestHeaders = new Dictionary { + Dictionary requestHeaders = new() { {"user-agent", Constants.UserAgent}, {"content-type", "application/x-www-form-urlencoded"}, {"cookie", metadata["cookie"]}, @@ -582,7 +607,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -606,7 +631,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client var getBinDaysResponse = new GetBinDaysResponse { - NextClientSideRequest = clientSideRequest + NextClientSideRequest = clientSideRequest, }; return getBinDaysResponse; @@ -656,6 +681,11 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client throw new InvalidOperationException("Invalid client-side request."); } + /// + /// Extracts the leading number from an address property for sorting purposes. + /// + /// The address to extract the number from. + /// A tuple containing the length and value of the leading number, or (0, 0) if no number is found. private static (int Length, int Value) GetAddressNumber(Address address) { var digits = GetLeadingNumber(address.Property!); @@ -668,6 +698,11 @@ private static (int Length, int Value) GetAddressNumber(Address address) return (digits.Length, int.Parse(digits, CultureInfo.InvariantCulture)); } + /// + /// Extracts the leading numeric digits from a property string. + /// + /// The property string to extract digits from. + /// The leading numeric digits as a string, or null if no digits are found. private static string? GetLeadingNumber(string property) { var digits = string.Empty; @@ -694,9 +729,14 @@ private static (int Length, int Value) GetAddressNumber(Address address) return string.IsNullOrWhiteSpace(digits) ? null : digits; } + /// + /// Converts a relative URL to an absolute URL by prepending the Bradford Council forms base URL if needed. + /// + /// The relative or absolute URL to process. + /// An absolute URL. private static string BuildAbsoluteUrl(string relativeUrl) { - if (relativeUrl.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + if (Uri.IsWellFormedUriString(relativeUrl, UriKind.Absolute)) { return relativeUrl; } @@ -704,6 +744,11 @@ private static string BuildAbsoluteUrl(string relativeUrl) return $"https://onlineforms.bradford.gov.uk/ufs/{relativeUrl}"; } + /// + /// Parses form values from HTML content using regex patterns. + /// + /// The HTML content to parse. + /// A tuple containing the extracted form values (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId). private static (string Ebs, string Formstack, string OrigRequestUrl, string PageSequence, string PageId, string FormStateId) ParseFormValues( string html) { @@ -717,6 +762,13 @@ private static (string Ebs, string Formstack, string OrigRequestUrl, string Page return (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId); } + /// + /// Extracts HTML content from a JSON response containing updated controls. + /// + /// The JSON content to parse. + /// The identifier to search for in the HTML content. + /// The HTML content from the matching control. + /// Thrown when the updated HTML content cannot be found. private static string ExtractUpdatedHtml(string content, string identifier) { using var jsonDoc = JsonDocument.Parse(content); From 5e6bd4f5fa0a14cdbe0e91122b8886b758321c64 Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Thu, 5 Feb 2026 01:25:39 +0000 Subject: [PATCH 5/6] Removed sorting for Bradford Council. --- .../Collectors/Councils/BradfordCouncil.cs | 405 +++++------------- .../Councils/BradfordCouncilTests.cs | 3 +- 2 files changed, 110 insertions(+), 298 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs index 8452c16..9ed4811 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -141,121 +141,14 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector /// public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) { - // Prepare client-side request for getting the initial token - if (clientSideResponse == null) + // Handle session initialization (RequestIds null, 1, 2) + var sharedRequest = HandleSessionInitialization(postcode, clientSideResponse); + if (sharedRequest != null) { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = _initialUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - }, - Options = new ClientSideOptions - { - FollowRedirects = false, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getAddressesResponse; - } - // Prepare client-side request for loading the form - else if (clientSideResponse.RequestId == 1) - { - var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( - clientSideResponse.Headers["set-cookie"]); - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 2, - Url = redirectUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - {"cookie", cookie}, - }, - Options = new ClientSideOptions - { - Metadata = { - { "cookie", cookie }, - }, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getAddressesResponse; - } - // Prepare client-side request for searching for addresses - else if (clientSideResponse.RequestId == 2) - { - var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( - clientSideResponse.Content); - - Dictionary metadata = new() - { - { "cookie", clientSideResponse.Options.Metadata["cookie"] }, - { "ebs", ebs }, - { "formstack", formstack }, - { "origRequestUrl", origRequestUrl }, - { "pageSeq", pageSequence }, - { "pageId", pageId }, - { "formStateId", formStateId }, - }; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", ebs}, - {"origrequrl", origRequestUrl}, - {"formstack", formstack}, - {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"pageSeq", pageSequence}, - {"pageId", pageId}, - {"formStateId", formStateId}, - {_postcodeField, postcode}, - {"HID:inputs", _addressHidInputs}, - {_findButton, "Find address"}, - }); - - Dictionary requestHeaders = new() { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 3, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getAddressesResponse = new GetAddressesResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getAddressesResponse; + return new GetAddressesResponse { NextClientSideRequest = sharedRequest }; } // Process addresses from response - else if (clientSideResponse.RequestId == 3) + else if (clientSideResponse!.RequestId == 3) { var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); @@ -275,26 +168,6 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl addresses.Add(address); } - addresses.Sort((first, second) => - { - var (Length, Value) = GetAddressNumber(first); - var secondNumber = GetAddressNumber(second); - - var lengthComparison = secondNumber.Length.CompareTo(Length); - if (lengthComparison != 0) - { - return lengthComparison; - } - - var valueComparison = Value.CompareTo(secondNumber.Value); - if (valueComparison != 0) - { - return valueComparison; - } - - return string.Compare(first.Property, second.Property, StringComparison.Ordinal); - }); - var getAddressesResponse = new GetAddressesResponse { Addresses = [.. addresses], @@ -309,121 +182,14 @@ public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? cl /// public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? clientSideResponse) { - // Prepare client-side request for getting the initial token - if (clientSideResponse == null) - { - var clientSideRequest = new ClientSideRequest - { - RequestId = 1, - Url = _initialUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - }, - Options = new ClientSideOptions - { - FollowRedirects = false, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getBinDaysResponse; - } - // Prepare client-side request for loading the form - else if (clientSideResponse.RequestId == 1) - { - var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( - clientSideResponse.Headers["set-cookie"]); - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - - var clientSideRequest = new ClientSideRequest - { - RequestId = 2, - Url = redirectUrl, - Method = "GET", - Headers = new() { - {"user-agent", Constants.UserAgent}, - {"cookie", cookie}, - }, - Options = new ClientSideOptions - { - Metadata = { - { "cookie", cookie }, - }, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getBinDaysResponse; - } - // Prepare client-side request for searching for addresses - else if (clientSideResponse.RequestId == 2) + // Handle session initialization (RequestIds null, 1, 2) + var sharedRequest = HandleSessionInitialization(address.Postcode!, clientSideResponse); + if (sharedRequest != null) { - var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( - clientSideResponse.Content); - - Dictionary metadata = new() - { - { "cookie", clientSideResponse.Options.Metadata["cookie"] }, - { "ebs", ebs }, - { "formstack", formstack }, - { "origRequestUrl", origRequestUrl }, - { "pageSeq", pageSequence }, - { "pageId", pageId }, - { "formStateId", formStateId }, - }; - - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() - { - {"formid", _formId}, - {"ebs", ebs}, - {"origrequrl", origRequestUrl}, - {"formstack", formstack}, - {"PAGE:F", "CTID-2eDPaBQA-_"}, - {"pageSeq", pageSequence}, - {"pageId", pageId}, - {"formStateId", formStateId}, - {_postcodeField, address.Postcode!}, - {"HID:inputs", _addressHidInputs}, - {_findButton, "Find address"}, - }); - - Dictionary requestHeaders = new() { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - - var clientSideRequest = new ClientSideRequest - { - RequestId = 3, - Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", - Method = "POST", - Headers = requestHeaders, - Body = requestBody, - Options = new ClientSideOptions - { - Metadata = metadata, - }, - }; - - var getBinDaysResponse = new GetBinDaysResponse - { - NextClientSideRequest = clientSideRequest, - }; - - return getBinDaysResponse; + return new GetBinDaysResponse { NextClientSideRequest = sharedRequest }; } // Prepare client-side request for selecting the address - else if (clientSideResponse.RequestId == 3) + else if (clientSideResponse!.RequestId == 3) { var addressesHtml = ExtractUpdatedHtml(clientSideResponse.Content, "Go9IHRTP"); var rawAddresses = AddressesRegex().Matches(addressesHtml)!; @@ -491,12 +257,6 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client requestBodyDictionary.Add("HID:inputs", string.Join(",", hidInputs)); - Dictionary requestHeaders = new() { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(requestBodyDictionary); var clientSideRequest = new ClientSideRequest @@ -504,7 +264,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client RequestId = 4, Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", Method = "POST", - Headers = requestHeaders, + Headers = CreateFormHeaders(metadata["cookie"]), Body = requestBody, Options = new ClientSideOptions { @@ -539,18 +299,12 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"CTRL:PieY14aw:_", "Show collection dates"}, }); - Dictionary requestHeaders = new() { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - var clientSideRequest = new ClientSideRequest { RequestId = 5, Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", Method = "POST", - Headers = requestHeaders, + Headers = CreateFormHeaders(metadata["cookie"]), Body = requestBody, Options = new ClientSideOptions { @@ -585,18 +339,12 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"ebReshow", "true"}, }); - Dictionary requestHeaders = new() { - {"user-agent", Constants.UserAgent}, - {"content-type", "application/x-www-form-urlencoded"}, - {"cookie", metadata["cookie"]}, - }; - var clientSideRequest = new ClientSideRequest { RequestId = 6, Url = $"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebz={metadata["ebs"]}", Method = "POST", - Headers = requestHeaders, + Headers = CreateFormHeaders(metadata["cookie"]), Body = requestBody, Options = new ClientSideOptions { @@ -682,53 +430,116 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client } /// - /// Extracts the leading number from an address property for sorting purposes. + /// Handles the shared session initialization steps (RequestIds null, 1, 2) used by both + /// and . /// - /// The address to extract the number from. - /// A tuple containing the length and value of the leading number, or (0, 0) if no number is found. - private static (int Length, int Value) GetAddressNumber(Address address) + /// The postcode to search for. + /// The client-side response from the previous request, or null for the initial request. + /// A for RequestIds null/1/2, or null when shared steps are complete. + private static ClientSideRequest? HandleSessionInitialization(string postcode, ClientSideResponse? clientSideResponse) { - var digits = GetLeadingNumber(address.Property!); - - if (digits == null) + // Prepare client-side request for getting the initial token + if (clientSideResponse == null) { - return (0, 0); + return new ClientSideRequest + { + RequestId = 1, + Url = _initialUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + }, + Options = new ClientSideOptions + { + FollowRedirects = false, + }, + }; } + // Prepare client-side request for loading the form + else if (clientSideResponse.RequestId == 1) + { + var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( + clientSideResponse.Headers["set-cookie"]); + var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); - return (digits.Length, int.Parse(digits, CultureInfo.InvariantCulture)); - } - - /// - /// Extracts the leading numeric digits from a property string. - /// - /// The property string to extract digits from. - /// The leading numeric digits as a string, or null if no digits are found. - private static string? GetLeadingNumber(string property) - { - var digits = string.Empty; - - foreach (var character in property) + return new ClientSideRequest + { + RequestId = 2, + Url = redirectUrl, + Method = "GET", + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"cookie", cookie}, + }, + Options = new ClientSideOptions + { + Metadata = { + { "cookie", cookie }, + }, + }, + }; + } + // Prepare client-side request for searching for addresses + else if (clientSideResponse.RequestId == 2) { - if (character == ',') + var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( + clientSideResponse.Content); + + Dictionary metadata = new() { - break; - } + { "cookie", clientSideResponse.Options.Metadata["cookie"] }, + { "ebs", ebs }, + { "formstack", formstack }, + { "origRequestUrl", origRequestUrl }, + { "pageSeq", pageSequence }, + { "pageId", pageId }, + { "formStateId", formStateId }, + }; - if (char.IsDigit(character)) + var requestBody = ProcessingUtilities.ConvertDictionaryToFormData(new() { - digits += character; - continue; - } + {"formid", _formId}, + {"ebs", ebs}, + {"origrequrl", origRequestUrl}, + {"formstack", formstack}, + {"PAGE:F", "CTID-2eDPaBQA-_"}, + {"pageSeq", pageSequence}, + {"pageId", pageId}, + {"formStateId", formStateId}, + {_postcodeField, postcode}, + {"HID:inputs", _addressHidInputs}, + {_findButton, "Find address"}, + }); - if (!string.IsNullOrWhiteSpace(digits)) + return new ClientSideRequest { - break; - } + RequestId = 3, + Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", + Method = "POST", + Headers = CreateFormHeaders(metadata["cookie"]), + Body = requestBody, + Options = new ClientSideOptions + { + Metadata = metadata, + }, + }; } - return string.IsNullOrWhiteSpace(digits) ? null : digits; + return null; } + /// + /// Creates the standard form POST headers with the given cookie. + /// + /// The cookie value to include in the headers. + /// A dictionary of HTTP headers for form POST requests. + private static Dictionary CreateFormHeaders(string cookie) => new() + { + { "user-agent", Constants.UserAgent }, + { "content-type", "application/x-www-form-urlencoded" }, + { "cookie", cookie }, + }; + /// /// Converts a relative URL to an absolute URL by prepending the Bradford Council forms base URL if needed. /// diff --git a/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs index 6061155..efc7bfc 100644 --- a/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs +++ b/BinDays.Api.IntegrationTests/Collectors/Councils/BradfordCouncilTests.cs @@ -30,7 +30,8 @@ await TestSteps.EndToEnd( _collectorService, _collector, postcode, - _outputHelper + _outputHelper, + addressIndex: 1 ); } } From e2aa3f151ae2fa7e52560e039714f23002cd75fa Mon Sep 17 00:00:00 2001 From: BadgerHobbs Date: Thu, 5 Feb 2026 01:59:08 +0000 Subject: [PATCH 6/6] Resolve PR code review comments for BradfordCouncil - Inline single-use consts (_initialUrl, _findButton, _addressHidInputs) - Inline helper methods (CreateFormHeaders, BuildAbsoluteUrl, ParseFormValues) - Replace JSON parsing in ExtractUpdatedHtml with GeneratedRegex - Place closing ); on new lines for multi-line statements - Fix DateRegex to match any day of week, not just Thursday - Standardise OrigRequestUrlRegex capture group name to "value" Co-Authored-By: Claude Opus 4.5 --- .../Collectors/Councils/BradfordCouncil.cs | 150 +++++++----------- 1 file changed, 58 insertions(+), 92 deletions(-) diff --git a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs index 9ed4811..f621193 100644 --- a/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs +++ b/BinDays.Api.Collectors/Collectors/Councils/BradfordCouncil.cs @@ -7,7 +7,6 @@ namespace BinDays.Api.Collectors.Collectors.Councils; using System.Collections.Generic; using System.Globalization; using System.Net; -using System.Text.Json; using System.Text.RegularExpressions; /// @@ -49,11 +48,6 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector }, ]; - /// - /// The initial URL for the bin collection dates form. - /// - private const string _initialUrl = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565"; - /// /// The form identifier for the collection dates form. /// @@ -64,21 +58,11 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector /// private const string _postcodeField = "CTRL:Q2YAUZ5b:_:A"; - /// - /// The button identifier for finding addresses. - /// - private const string _findButton = "CTRL:2eDPaBQA:_"; - /// /// The field identifier for showing collections. /// private const string _showCollectionsField = "CTID-PieY14aw-_"; - /// - /// The hidden inputs for the address search request. - /// - private const string _addressHidInputs = "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"; - /// /// The hidden inputs for the show collections request. /// @@ -93,7 +77,7 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector /// /// Regex for the original request url from the HTML. /// - [GeneratedRegex("name=\"origrequrl\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] + [GeneratedRegex("name=\"origrequrl\" value=\"(?[^\"]+)\"", RegexOptions.IgnoreCase)] private static partial Regex OrigRequestUrlRegex(); /// @@ -135,9 +119,15 @@ internal sealed partial class BradfordCouncil : GovUkCollectorBase, ICollector /// /// Regex for the bin day dates. /// - [GeneratedRegex("Thu [A-Za-z]{3} \\d{2} \\d{4}")] + [GeneratedRegex("[A-Za-z]{3} [A-Za-z]{3} \\d{2} \\d{4}")] private static partial Regex DateRegex(); + /// + /// Regex for the html property value from the JSON response. + /// + [GeneratedRegex(@"""html""\s*:\s*""((?:[^""\\]|\\.)*)""", RegexOptions.IgnoreCase)] + private static partial Regex HtmlPropertyRegex(); + /// public GetAddressesResponse GetAddresses(string postcode, ClientSideResponse? clientSideResponse) { @@ -201,12 +191,14 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client } var selectedAddress = addressFields.Find(x => - string.Equals(x.Field, address.Uid, StringComparison.OrdinalIgnoreCase)); + string.Equals(x.Field, address.Uid, StringComparison.OrdinalIgnoreCase) + ); if (selectedAddress == default) { selectedAddress = addressFields.Find(x => - string.Equals(x.Property, address.Property, StringComparison.OrdinalIgnoreCase)); + string.Equals(x.Property, address.Property, StringComparison.OrdinalIgnoreCase) + ); } if (selectedAddress == default) @@ -264,7 +256,11 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client RequestId = 4, Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", Method = "POST", - Headers = CreateFormHeaders(metadata["cookie"]), + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }, Body = requestBody, Options = new ClientSideOptions { @@ -297,14 +293,19 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"formStateId", metadata["formStateId"]}, {"HID:inputs", _showCollectionsHidInputs}, {"CTRL:PieY14aw:_", "Show collection dates"}, - }); + } + ); var clientSideRequest = new ClientSideRequest { RequestId = 5, Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={metadata["ebs"]}", Method = "POST", - Headers = CreateFormHeaders(metadata["cookie"]), + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }, Body = requestBody, Options = new ClientSideOptions { @@ -337,14 +338,19 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"formStateId", metadata["formStateId"]}, {"HID:inputs", _showCollectionsHidInputs}, {"ebReshow", "true"}, - }); + } + ); var clientSideRequest = new ClientSideRequest { RequestId = 6, Url = $"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebz={metadata["ebs"]}", Method = "POST", - Headers = CreateFormHeaders(metadata["cookie"]), + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }, Body = requestBody, Options = new ClientSideOptions { @@ -364,12 +370,12 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client else if (clientSideResponse.RequestId == 6) { var metadata = clientSideResponse.Options.Metadata; - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + var location = clientSideResponse.Headers["location"]; var clientSideRequest = new ClientSideRequest { RequestId = 7, - Url = redirectUrl, + Url = $"https://onlineforms.bradford.gov.uk/ufs/{location}", Method = "GET", Headers = new() { {"user-agent", Constants.UserAgent}, @@ -444,7 +450,7 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client return new ClientSideRequest { RequestId = 1, - Url = _initialUrl, + Url = "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb?ebd=0&ebp=20&ebz=1_1761729510565", Method = "GET", Headers = new() { {"user-agent", Constants.UserAgent}, @@ -459,13 +465,14 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client else if (clientSideResponse.RequestId == 1) { var cookie = ProcessingUtilities.ParseSetCookieHeaderForRequestCookie( - clientSideResponse.Headers["set-cookie"]); - var redirectUrl = BuildAbsoluteUrl(clientSideResponse.Headers["location"]); + clientSideResponse.Headers["set-cookie"] + ); + var location = clientSideResponse.Headers["location"]; return new ClientSideRequest { RequestId = 2, - Url = redirectUrl, + Url = $"https://onlineforms.bradford.gov.uk/ufs/{location}", Method = "GET", Headers = new() { {"user-agent", Constants.UserAgent}, @@ -482,8 +489,13 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client // Prepare client-side request for searching for addresses else if (clientSideResponse.RequestId == 2) { - var (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId) = ParseFormValues( - clientSideResponse.Content); + var html = clientSideResponse.Content; + var ebs = EbsRegex().Match(html).Groups["value"].Value; + var formstack = FormstackRegex().Match(html).Groups["value"].Value; + var origRequestUrl = OrigRequestUrlRegex().Match(html).Groups["value"].Value.Replace("&", "&"); + var pageSequence = PageSequenceRegex().Match(html).Groups["value"].Value; + var pageId = PageIdRegex().Match(html).Groups["value"].Value; + var formStateId = FormStateRegex().Match(html).Groups["value"].Value; Dictionary metadata = new() { @@ -507,16 +519,21 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client {"pageId", pageId}, {"formStateId", formStateId}, {_postcodeField, postcode}, - {"HID:inputs", _addressHidInputs}, - {_findButton, "Find address"}, - }); + {"HID:inputs", "ICTRL:Q2YAUZ5b:_:A,ACTRL:2eDPaBQA:_,APAGE:E.h,APAGE:B.h,APAGE:N.h,APAGE:P.h,APAGE:S.h,APAGE:R.h"}, + {"CTRL:2eDPaBQA:_", "Find address"}, + } + ); return new ClientSideRequest { RequestId = 3, Url = $"https://onlineforms.bradford.gov.uk/ufs/ufsajax?ebz={ebs}", Method = "POST", - Headers = CreateFormHeaders(metadata["cookie"]), + Headers = new() { + {"user-agent", Constants.UserAgent}, + {"content-type", "application/x-www-form-urlencoded"}, + {"cookie", metadata["cookie"]}, + }, Body = requestBody, Options = new ClientSideOptions { @@ -528,51 +545,6 @@ public GetBinDaysResponse GetBinDays(Address address, ClientSideResponse? client return null; } - /// - /// Creates the standard form POST headers with the given cookie. - /// - /// The cookie value to include in the headers. - /// A dictionary of HTTP headers for form POST requests. - private static Dictionary CreateFormHeaders(string cookie) => new() - { - { "user-agent", Constants.UserAgent }, - { "content-type", "application/x-www-form-urlencoded" }, - { "cookie", cookie }, - }; - - /// - /// Converts a relative URL to an absolute URL by prepending the Bradford Council forms base URL if needed. - /// - /// The relative or absolute URL to process. - /// An absolute URL. - private static string BuildAbsoluteUrl(string relativeUrl) - { - if (Uri.IsWellFormedUriString(relativeUrl, UriKind.Absolute)) - { - return relativeUrl; - } - - return $"https://onlineforms.bradford.gov.uk/ufs/{relativeUrl}"; - } - - /// - /// Parses form values from HTML content using regex patterns. - /// - /// The HTML content to parse. - /// A tuple containing the extracted form values (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId). - private static (string Ebs, string Formstack, string OrigRequestUrl, string PageSequence, string PageId, string FormStateId) ParseFormValues( - string html) - { - var ebs = EbsRegex().Match(html).Groups["value"].Value; - var formstack = FormstackRegex().Match(html).Groups["value"].Value; - var origRequestUrl = OrigRequestUrlRegex().Match(html).Groups["url"].Value.Replace("&", "&"); - var pageSequence = PageSequenceRegex().Match(html).Groups["value"].Value; - var pageId = PageIdRegex().Match(html).Groups["value"].Value; - var formStateId = FormStateRegex().Match(html).Groups["value"].Value; - - return (ebs, formstack, origRequestUrl, pageSequence, pageId, formStateId); - } - /// /// Extracts HTML content from a JSON response containing updated controls. /// @@ -582,18 +554,12 @@ private static (string Ebs, string Formstack, string OrigRequestUrl, string Page /// Thrown when the updated HTML content cannot be found. private static string ExtractUpdatedHtml(string content, string identifier) { - using var jsonDoc = JsonDocument.Parse(content); - - if (jsonDoc.RootElement.TryGetProperty("updatedControls", out var updatedControls)) + foreach (Match match in HtmlPropertyRegex().Matches(content)) { - foreach (var control in updatedControls.EnumerateArray()) + var html = match.Groups[1].Value; + if (html.Contains(identifier, StringComparison.OrdinalIgnoreCase)) { - if (control.TryGetProperty("html", out var htmlProperty) && - htmlProperty.ValueKind == JsonValueKind.String && - htmlProperty.GetString()!.Contains(identifier, StringComparison.OrdinalIgnoreCase)) - { - return htmlProperty.GetString()!.Replace("\\\"", "\""); - } + return html.Replace("\\\"", "\""); } }