From f10583ba81ebce78fc821c7949a1624f5ad81dcc Mon Sep 17 00:00:00 2001 From: Jay Van der Zant Date: Wed, 1 Apr 2026 16:18:30 +0000 Subject: [PATCH 1/4] perf: pre-fetch CSO external ID dictionary for O(1) import matching (#440) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace N per-object database queries during import with a single bulk query that loads all CSO external ID mappings into an in-memory dictionary at import start. FindMatchingCso now uses O(1) dictionary lookup (Lookup phase) followed by PK-based hydration (Hydrate phase), eliminating the LOWER() string-scan queries that dominated import time. The Lookup/Hydrate separation provides a clean seam for future persistent caching (#436) — the dictionary becomes the cache warm-up path, and the hydration method becomes the cache-miss handler. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 4 + .../IConnectedSystemRepository.cs | 9 + src/JIM.Data/Repositories/ISyncRepository.cs | 16 + src/JIM.InMemoryData/SyncRepository.cs | 55 ++ .../Repositories/ConnectedSystemRepository.cs | 22 + .../Repositories/SyncRepository.cs | 6 + .../Processors/SyncImportTaskProcessor.cs | 116 +++-- .../ImportBatchPrefetchTests.cs | 481 ++++++++++++++++++ 8 files changed, 679 insertions(+), 30 deletions(-) create mode 100644 test/JIM.Worker.Tests/Synchronisation/ImportBatchPrefetchTests.cs diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fb07e4b..69c774ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ✨ Keycloak admin console accessible at `http://localhost:8181` - 🔒 HTTP OIDC authority support for development (RequireHttpsMetadata conditionally disabled) +### Performance + +- ⚡ Import CSO matching now uses a pre-fetched dictionary for O(1) external ID lookups, replacing N per-object database queries with a single bulk query at import start — eliminates the dominant bottleneck in full imports (#440) + ### Fixed - 🔒 Attribute change history is no longer cascade-deleted when a metaverse or connected system attribute definition is removed — the FK is set to null and snapshot `AttributeName`/`AttributeType` properties preserve the audit trail indefinitely (#58) diff --git a/src/JIM.Data/Repositories/IConnectedSystemRepository.cs b/src/JIM.Data/Repositories/IConnectedSystemRepository.cs index bf7f9242..8735b2db 100644 --- a/src/JIM.Data/Repositories/IConnectedSystemRepository.cs +++ b/src/JIM.Data/Repositories/IConnectedSystemRepository.cs @@ -76,6 +76,15 @@ public Task> GetAttributeVal /// The connected system to load mappings for. /// A dictionary of cache key → CSO GUID mappings. public Task> GetAllCsoExternalIdMappingsAsync(int connectedSystemId); + + /// + /// Batch-loads full CSO entity graphs by their IDs. + /// Returns CSOs with the same Include chain as GetConnectedSystemObjectByAttributeAsync + /// (Type.Attributes, AttributeValues.Attribute, AttributeValues.ReferenceValue.Type). + /// Used as the hydration phase of the batch pre-fetch import pipeline (#440). + /// + public Task> GetConnectedSystemObjectsByIdsAsync(int connectedSystemId, IEnumerable csoIds); + public Task> GetConnectedSystemContainersAsync(ConnectedSystem connectedSystem); /// diff --git a/src/JIM.Data/Repositories/ISyncRepository.cs b/src/JIM.Data/Repositories/ISyncRepository.cs index 10f08af9..9bf25282 100644 --- a/src/JIM.Data/Repositories/ISyncRepository.cs +++ b/src/JIM.Data/Repositories/ISyncRepository.cs @@ -100,6 +100,22 @@ public interface ISyncRepository /// Task GetConnectedSystemObjectBySecondaryExternalIdAnyTypeAsync(int connectedSystemId, string secondaryExternalIdValue); + /// + /// Bulk-loads all CSO external ID mappings for a connected system into a lightweight dictionary. + /// Returns a dictionary mapping cache keys ("cso:{connectedSystemId}:{attributeId}:{lowerExternalIdValue}") + /// to CSO GUIDs. Used as the lookup phase of the import pipeline to answer "does this CSO exist?" + /// in O(1) without loading full entity graphs. + /// + Task> GetAllCsoExternalIdMappingsAsync(int connectedSystemId); + + /// + /// Batch-loads full CSO entity graphs by their IDs in a single query. + /// Returns CSOs with Type, Attributes, AttributeValues, and ReferenceValue navigations loaded — + /// the same shape as GetConnectedSystemObjectByAttributeAsync but for multiple CSOs at once. + /// Used as the hydration phase of the import pipeline after the lookup phase identifies which CSOs exist. + /// + Task> GetConnectedSystemObjectsByIdsAsync(int connectedSystemId, IEnumerable csoIds); + /// /// Gets multiple CSOs by their external ID attribute values (batch lookup). /// Returns a dictionary keyed by the string representation of the attribute value. diff --git a/src/JIM.InMemoryData/SyncRepository.cs b/src/JIM.InMemoryData/SyncRepository.cs index d9e71fc2..e96a0934 100644 --- a/src/JIM.InMemoryData/SyncRepository.cs +++ b/src/JIM.InMemoryData/SyncRepository.cs @@ -258,6 +258,47 @@ public Task> GetConnectedSystemObjectsModi return Task.FromResult(cso); } + public Task> GetAllCsoExternalIdMappingsAsync(int connectedSystemId) + { + var result = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var cso in GetCsosForSystem(connectedSystemId)) + { + // Try primary external ID first + var primaryAv = cso.AttributeValues.FirstOrDefault(av => av.AttributeId == cso.ExternalIdAttributeId); + var primaryValue = GetExternalIdValueString(primaryAv); + + if (primaryValue != null) + { + var cacheKey = $"cso:{connectedSystemId}:{cso.ExternalIdAttributeId}:{primaryValue}"; + result.TryAdd(cacheKey, cso.Id); + continue; + } + + // Fall back to secondary external ID (PendingProvisioning CSOs) + if (cso.SecondaryExternalIdAttributeId.HasValue) + { + var secondaryAv = cso.AttributeValues.FirstOrDefault(av => av.AttributeId == cso.SecondaryExternalIdAttributeId); + var secondaryValue = GetExternalIdValueString(secondaryAv); + + if (secondaryValue != null) + { + var cacheKey = $"cso:{connectedSystemId}:{cso.SecondaryExternalIdAttributeId.Value}:{secondaryValue}"; + result.TryAdd(cacheKey, cso.Id); + } + } + } + return Task.FromResult(result); + } + + public Task> GetConnectedSystemObjectsByIdsAsync(int connectedSystemId, IEnumerable csoIds) + { + var idSet = new HashSet(csoIds); + var result = GetCsosForSystem(connectedSystemId) + .Where(cso => idSet.Contains(cso.Id)) + .ToList(); + return Task.FromResult(result); + } + public Task> GetConnectedSystemObjectsByAttributeValuesAsync( int connectedSystemId, int attributeId, IEnumerable attributeValues) { @@ -1517,6 +1558,20 @@ private static void FixupMvoAttributeValues(MetaverseObject mvo) } } + /// + /// Converts a CSO attribute value to its lowercase string representation for cache key building. + /// Mirrors the logic in ConnectedSystemRepository.GetExternalIdValueString. + /// + private static string? GetExternalIdValueString(ConnectedSystemObjectAttributeValue? av) + { + if (av == null) return null; + if (av.StringValue != null) return av.StringValue.ToLowerInvariant(); + if (av.IntValue.HasValue) return av.IntValue.Value.ToString(); + if (av.LongValue.HasValue) return av.LongValue.Value.ToString(); + if (av.GuidValue.HasValue) return av.GuidValue.Value.ToString().ToLowerInvariant(); + return null; + } + private void AddToCsIndex(ConnectedSystemObject cso) { if (!_csosByConnectedSystem.TryGetValue(cso.ConnectedSystemId, out var csSet)) diff --git a/src/JIM.PostgresData/Repositories/ConnectedSystemRepository.cs b/src/JIM.PostgresData/Repositories/ConnectedSystemRepository.cs index 7a987f2b..25582af6 100644 --- a/src/JIM.PostgresData/Repositories/ConnectedSystemRepository.cs +++ b/src/JIM.PostgresData/Repositories/ConnectedSystemRepository.cs @@ -1388,6 +1388,28 @@ public async Task> GetAllCsoExternalIdMappingsAsync(int return null; } + /// + public async Task> GetConnectedSystemObjectsByIdsAsync(int connectedSystemId, IEnumerable csoIds) + { + var idList = csoIds.ToList(); + if (idList.Count == 0) + return new List(); + + // Same Include chain as GetConnectedSystemObjectByAttributeAsync — callers need the full + // entity graph for attribute diffing during import processing. + return await Repository.Database.ConnectedSystemObjects + .AsSplitQuery() + .Include(cso => cso.Type) + .ThenInclude(t => t.Attributes) + .Include(cso => cso.AttributeValues) + .ThenInclude(av => av.Attribute) + .Include(cso => cso.AttributeValues) + .ThenInclude(av => av.ReferenceValue) + .ThenInclude(refCso => refCso!.Type) + .Where(cso => cso.ConnectedSystemId == connectedSystemId && idList.Contains(cso.Id)) + .ToListAsync(); + } + /// /// Gets a Connected System Object by its secondary external ID attribute value. /// Used to find PendingProvisioning CSOs during import reconciliation when the diff --git a/src/JIM.PostgresData/Repositories/SyncRepository.cs b/src/JIM.PostgresData/Repositories/SyncRepository.cs index a11018bd..a9c1260d 100644 --- a/src/JIM.PostgresData/Repositories/SyncRepository.cs +++ b/src/JIM.PostgresData/Repositories/SyncRepository.cs @@ -91,6 +91,12 @@ public Task> GetConnectedSystemObjectsModi int connectedSystemId, string secondaryExternalIdValue) => _repo.ConnectedSystems.GetConnectedSystemObjectBySecondaryExternalIdAnyTypeAsync(connectedSystemId, secondaryExternalIdValue); + public Task> GetAllCsoExternalIdMappingsAsync(int connectedSystemId) + => _repo.ConnectedSystems.GetAllCsoExternalIdMappingsAsync(connectedSystemId); + + public Task> GetConnectedSystemObjectsByIdsAsync(int connectedSystemId, IEnumerable csoIds) + => _repo.ConnectedSystems.GetConnectedSystemObjectsByIdsAsync(connectedSystemId, csoIds); + public Task> GetConnectedSystemObjectsByAttributeValuesAsync( int connectedSystemId, int attributeId, IEnumerable attributeValues) => _repo.ConnectedSystems.GetConnectedSystemObjectsByAttributeValuesAsync(connectedSystemId, attributeId, attributeValues); diff --git a/src/JIM.Worker/Processors/SyncImportTaskProcessor.cs b/src/JIM.Worker/Processors/SyncImportTaskProcessor.cs index 0ce42e52..77c4ae18 100644 --- a/src/JIM.Worker/Processors/SyncImportTaskProcessor.cs +++ b/src/JIM.Worker/Processors/SyncImportTaskProcessor.cs @@ -49,6 +49,15 @@ public class SyncImportTaskProcessor /// private bool _csIsEmpty; + /// + /// Pre-fetched external ID → CSO GUID lookup dictionary. Loaded once at import start (when the CS + /// is not empty) to replace N per-object DB queries with O(1) dictionary lookups. + /// Key format: "cso:{connectedSystemId}:{attributeId}:{lowerExternalIdValue}" + /// Updated when new CSOs are created mid-import to maintain consistency within the batch. + /// This is the "lookup" phase of the Lookup/Hydrate seam (#440). + /// + private Dictionary? _csoExternalIdLookup; + /// /// Controls how much detail is recorded for sync outcome graphs on each RPEI. /// Loaded once at import start from service settings. @@ -108,7 +117,22 @@ public async Task PerformFullImportAsync() var csoCountAtStart = await _syncRepo.GetConnectedSystemObjectCountAsync(_connectedSystem.Id); _csIsEmpty = csoCountAtStart == 0; if (_csIsEmpty) + { Log.Information("PerformFullImportAsync: Connected system {ConnectedSystemId} has no existing CSOs. Skipping CSO lookups for this import.", _connectedSystem.Id); + } + else + { + // Pre-fetch all CSO external ID mappings into a lightweight dictionary for O(1) lookups. + // This replaces N per-object DB queries with a single bulk query + dictionary lookups. + // The dictionary is ~200 bytes per CSO, so 100k CSOs ≈ 20 MB — acceptable at all scales. + // See #440 for scaling analysis. + using (Diagnostics.Sync.StartSpan("PreFetchCsoExternalIdLookup").SetTag("csoCount", csoCountAtStart)) + { + _csoExternalIdLookup = await _syncRepo.GetAllCsoExternalIdMappingsAsync(_connectedSystem.Id); + } + Log.Information("PerformFullImportAsync: Pre-fetched {Count} CSO external ID mappings for connected system {ConnectedSystemId}.", + _csoExternalIdLookup.Count, _connectedSystem.Id); + } // Load sync outcome tracking level once at start of import _syncOutcomeTrackingLevel = await _syncServer.GetSyncOutcomeTrackingLevelAsync(); @@ -932,8 +956,9 @@ await _syncRepo.UpdateActivityMessageAsync(_activity, $"Processing imported objects (0 / {totalObjectsInBatch:N0})"); const int progressUpdateInterval = 100; - // decision: do we want to load the whole connector space into memory to maximise performance? for now, let's keep it db-centric. - // todo: experiment with using parallel foreach to see if we can speed up processing + // CSO matching uses a pre-fetched external ID dictionary (O(1) lookup) + per-object hydration by ID. + // See #440 for the Lookup/Hydrate design and scaling analysis. + // Future: page-level batch hydration (#440 Option B) and persistent caching (#436). var importIndex = -1; foreach (var importObject in connectedSystemImportResult.ImportObjects) { @@ -1365,17 +1390,17 @@ private static void RemoveNullImportObjectAttributes(ConnectedSystemImportObject connectedSystemImportObject.Attributes.Remove(nullAttribute); } - private async Task TryAndFindMatchingConnectedSystemObjectAsync(ConnectedSystemImportObject connectedSystemImportObject, ConnectedSystemObjectType connectedSystemObjectType) + /// + /// Lookup phase: Uses the pre-fetched dictionary to check whether a CSO with this external ID + /// already exists, returning just the CSO GUID. O(1) dictionary lookup, no DB query. + /// Returns null if no match found (new object) or if this is a first-ever import (_csIsEmpty). + /// + private Guid? LookupCsoByExternalId(ConnectedSystemImportObject connectedSystemImportObject, ConnectedSystemObjectType connectedSystemObjectType) { - // If the connected system has no existing CSOs (first-ever import), every object is new. - // Skip the DB/cache lookup entirely — there's nothing to match against. - if (_csIsEmpty) + if (_csIsEmpty || _csoExternalIdLookup == null) return null; - // todo: consider support for multiple external id attributes, i.e. compound primary keys var externalIdAttribute = connectedSystemObjectType.Attributes.First(a => a.IsExternalId); - - // find the matching import object attribute var importObjectAttribute = connectedSystemImportObject.Attributes.SingleOrDefault(csioa => csioa.Name.Equals(externalIdAttribute.Name, StringComparison.OrdinalIgnoreCase)) ?? throw new MissingExternalIdAttributeException($"The imported object is missing the External Id attribute '{externalIdAttribute.Name}'. It cannot be processed as we will not be able to determine if it's an existing object or not."); @@ -1385,34 +1410,53 @@ private static void RemoveNullImportObjectAttributes(ConnectedSystemImportObject importObjectAttribute.GuidValues.Count > 1) throw new ExternalIdAttributeNotSingleValuedException($"External Id attribute ({externalIdAttribute.Name}) on the imported object has multiple values! The External Id attribute must be single-valued."); - // First, try to find CSO by primary external ID - ConnectedSystemObject? cso = externalIdAttribute.Type switch + // Build the cache key in the same format as GetAllCsoExternalIdMappingsAsync + var externalIdValue = externalIdAttribute.Type switch { AttributeDataType.Text when importObjectAttribute.StringValues.Count == 0 => throw new ExternalIdAttributeValueMissingException($"External Id string attribute ({externalIdAttribute.Name}) on the imported object has no value."), - AttributeDataType.Text => - await _syncRepo.GetConnectedSystemObjectByAttributeAsync(_connectedSystem.Id, externalIdAttribute.Id, importObjectAttribute.StringValues[0]), + AttributeDataType.Text => importObjectAttribute.StringValues[0].ToLowerInvariant(), AttributeDataType.Number when importObjectAttribute.IntValues.Count == 0 => throw new ExternalIdAttributeValueMissingException($"External Id number attribute({externalIdAttribute.Name}) on the imported object has no value."), - AttributeDataType.Number => - await _syncRepo.GetConnectedSystemObjectByAttributeAsync(_connectedSystem.Id, externalIdAttribute.Id, importObjectAttribute.IntValues[0]), + AttributeDataType.Number => importObjectAttribute.IntValues[0].ToString(), AttributeDataType.LongNumber when importObjectAttribute.LongValues.Count == 0 => throw new ExternalIdAttributeValueMissingException($"External Id long number attribute({externalIdAttribute.Name}) on the imported object has no value."), - AttributeDataType.LongNumber => - await _syncRepo.GetConnectedSystemObjectByAttributeAsync(_connectedSystem.Id, externalIdAttribute.Id, importObjectAttribute.LongValues[0]), + AttributeDataType.LongNumber => importObjectAttribute.LongValues[0].ToString(), AttributeDataType.Guid when importObjectAttribute.GuidValues.Count == 0 => throw new ExternalIdAttributeValueMissingException($"External Id guid attribute ({externalIdAttribute.Name}) on the imported object has no value."), - AttributeDataType.Guid => - await _syncRepo.GetConnectedSystemObjectByAttributeAsync(_connectedSystem.Id, externalIdAttribute.Id, importObjectAttribute.GuidValues[0]), - _ => throw new InvalidDataException($"TryAndFindMatchingConnectedSystemObjectAsync: Unsupported connected system object type External Id attribute type: {externalIdAttribute.Type}") + AttributeDataType.Guid => importObjectAttribute.GuidValues[0].ToString().ToLowerInvariant(), + _ => throw new InvalidDataException($"LookupCsoByExternalId: Unsupported connected system object type External Id attribute type: {externalIdAttribute.Type}") }; - if (cso != null) - return cso; + var cacheKey = $"cso:{_connectedSystem.Id}:{externalIdAttribute.Id}:{externalIdValue}"; + return _csoExternalIdLookup.TryGetValue(cacheKey, out var csoId) ? csoId : null; + } + + /// + /// Hydration phase: Loads a full CSO entity graph by ID with all includes needed for attribute + /// diffing during import processing. This is a single PK-based query with includes — much cheaper + /// than the previous attribute-value-based search query with LOWER() string comparison. + /// Falls back to secondary external ID lookup for PendingProvisioning CSOs not found in the dictionary. + /// + private async Task HydrateCsoAsync(Guid? csoId, ConnectedSystemImportObject connectedSystemImportObject, ConnectedSystemObjectType connectedSystemObjectType) + { + // If lookup found a match, hydrate the full entity by ID + if (csoId.HasValue) + { + var csos = await _syncRepo.GetConnectedSystemObjectsByIdsAsync(_connectedSystem.Id, new[] { csoId.Value }); + var cso = csos.FirstOrDefault(); + if (cso != null) + return cso; + + // Cache had the ID but the CSO no longer exists (deleted between pre-fetch and hydrate). + // Fall through to secondary external ID check. + Log.Warning("HydrateCsoAsync: Pre-fetch dictionary contained CSO {CsoId} but it was not found in the database. Possible concurrent deletion. Falling through to secondary lookup.", + csoId.Value); + } - // No match found by primary external ID. Check for PendingProvisioning CSOs by secondary external ID. - // This handles the case where a CSO was created during provisioning evaluation but the object - // hasn't been imported yet with its system-assigned external ID (e.g., LDAP objectGUID). + // No match by primary external ID (or CSO was deleted). Check for PendingProvisioning CSOs + // by secondary external ID. This is a per-object query but PendingProvisioning CSOs are rare + // (only exist during the narrow window between provisioning evaluation and confirming import). var secondaryExternalIdAttribute = connectedSystemObjectType.Attributes.FirstOrDefault(a => a.IsSecondaryExternalId); if (secondaryExternalIdAttribute == null) return null; @@ -1422,7 +1466,7 @@ await _syncRepo.GetConnectedSystemObjectByAttributeAsync(_connectedSystem.Id, ex if (secondaryIdImportAttr == null) return null; - cso = secondaryExternalIdAttribute.Type switch + var secondaryCso = secondaryExternalIdAttribute.Type switch { AttributeDataType.Text when secondaryIdImportAttr.StringValues.Count > 0 => await _syncRepo.GetConnectedSystemObjectBySecondaryExternalIdAsync( @@ -1431,16 +1475,28 @@ await _syncRepo.GetConnectedSystemObjectBySecondaryExternalIdAsync( }; // Only return PendingProvisioning CSOs - if it's already Normal, the primary ID lookup should have found it - if (cso != null && cso.Status == ConnectedSystemObjectStatus.PendingProvisioning) + if (secondaryCso != null && secondaryCso.Status == ConnectedSystemObjectStatus.PendingProvisioning) { - Log.Information("TryAndFindMatchingConnectedSystemObjectAsync: Found PendingProvisioning CSO {CsoId} by secondary external ID '{SecondaryId}'. This confirms a provisioned object.", - cso.Id, secondaryIdImportAttr.StringValues[0]); - return cso; + Log.Information("HydrateCsoAsync: Found PendingProvisioning CSO {CsoId} by secondary external ID '{SecondaryId}'. This confirms a provisioned object.", + secondaryCso.Id, secondaryIdImportAttr.StringValues[0]); + return secondaryCso; } return null; } + /// + /// Combined Lookup + Hydrate: finds a matching CSO for an imported object. + /// Phase 1 (Lookup): O(1) dictionary lookup using the pre-fetched external ID mappings. + /// Phase 2 (Hydrate): Loads the full CSO entity graph by ID for attribute diffing. + /// This replaces the previous per-object attribute-value-based search queries (#440). + /// + private async Task TryAndFindMatchingConnectedSystemObjectAsync(ConnectedSystemImportObject connectedSystemImportObject, ConnectedSystemObjectType connectedSystemObjectType) + { + var csoId = LookupCsoByExternalId(connectedSystemImportObject, connectedSystemObjectType); + return await HydrateCsoAsync(csoId, connectedSystemImportObject, connectedSystemObjectType); + } + private ConnectedSystemObject? CreateConnectedSystemObjectFromImportObject(ConnectedSystemImportObject connectedSystemImportObject, ConnectedSystemObjectType connectedSystemObjectType, ActivityRunProfileExecutionItem activityRunProfileExecutionItem) { var stopwatch = Stopwatch.StartNew(); diff --git a/test/JIM.Worker.Tests/Synchronisation/ImportBatchPrefetchTests.cs b/test/JIM.Worker.Tests/Synchronisation/ImportBatchPrefetchTests.cs new file mode 100644 index 00000000..f0beeb8c --- /dev/null +++ b/test/JIM.Worker.Tests/Synchronisation/ImportBatchPrefetchTests.cs @@ -0,0 +1,481 @@ +using JIM.Application; +using JIM.Application.Servers; +using JIM.Connectors.Mock; +using JIM.Models.Activities; +using JIM.Models.Core; +using JIM.Models.Enums; +using JIM.Models.Staging; +using JIM.Models.Transactional; +using JIM.PostgresData; +using JIM.Worker.Processors; +using JIM.Worker.Tests.Models; +using Microsoft.EntityFrameworkCore; +using MockQueryable.Moq; +using Moq; +using SyncRepository = JIM.InMemoryData.SyncRepository; + +namespace JIM.Worker.Tests.Synchronisation; + +/// +/// Tests for the batch pre-fetch import pipeline (#440). +/// Verifies that the lookup (dictionary) + hydration (batch load) approach produces +/// identical results to the previous per-object query approach. +/// +[TestFixture] +public class ImportBatchPrefetchTests +{ + #region accessors + private MetaverseObject InitiatedBy { get; set; } = null!; + private List ConnectedSystemsData { get; set; } = null!; + private Mock> MockDbSetConnectedSystems { get; set; } = null!; + private List ConnectedSystemRunProfilesData { get; set; } = null!; + private Mock> MockDbSetConnectedSystemRunProfiles { get; set; } = null!; + private List ConnectedSystemObjectTypesData { get; set; } = null!; + private Mock> MockDbSetConnectedSystemObjectTypes { get; set; } = null!; + private List ConnectedSystemPartitionsData { get; set; } = null!; + private Mock> MockDbSetConnectedSystemPartitions { get; set; } = null!; + private List ActivitiesData { get; set; } = null!; + private Mock> MockDbSetActivities { get; set; } = null!; + private List ServiceSettingsData { get; set; } = null!; + private Mock> MockDbSetServiceSettings { get; set; } = null!; + private List PendingExportsData { get; set; } = null!; + private Mock> MockDbSetPendingExports { get; set; } = null!; + private List ConnectedSystemObjectsData { get; set; } = new(); + private Mock MockJimDbContext { get; set; } = null!; + private JimApplication Jim { get; set; } = null!; + private SyncRepository SyncRepo { get; set; } = null!; + #endregion + + [TearDown] + public void TearDown() + { + Jim?.Dispose(); + } + + [SetUp] + public void Setup() + { + TestUtilities.SetEnvironmentVariables(); + InitiatedBy = TestUtilities.GetInitiatedBy(); + + ConnectedSystemsData = TestUtilities.GetConnectedSystemData(); + MockDbSetConnectedSystems = ConnectedSystemsData.BuildMockDbSet(); + + ConnectedSystemRunProfilesData = TestUtilities.GetConnectedSystemRunProfileData(); + MockDbSetConnectedSystemRunProfiles = ConnectedSystemRunProfilesData.BuildMockDbSet(); + + ConnectedSystemObjectTypesData = TestUtilities.GetConnectedSystemObjectTypeData(); + MockDbSetConnectedSystemObjectTypes = ConnectedSystemObjectTypesData.BuildMockDbSet(); + + ConnectedSystemPartitionsData = TestUtilities.GetConnectedSystemPartitionData(); + MockDbSetConnectedSystemPartitions = ConnectedSystemPartitionsData.BuildMockDbSet(); + + var fullImportRunProfile = ConnectedSystemRunProfilesData[0]; + ActivitiesData = TestUtilities.GetActivityData(fullImportRunProfile.RunType, fullImportRunProfile.Id); + MockDbSetActivities = ActivitiesData.BuildMockDbSet(); + + ServiceSettingsData = TestUtilities.GetServiceSettingsData(); + MockDbSetServiceSettings = ServiceSettingsData.BuildMockDbSet(); + + PendingExportsData = new List(); + MockDbSetPendingExports = PendingExportsData.BuildMockDbSet(); + + MockJimDbContext = new Mock(); + MockJimDbContext.Setup(m => m.Activities).Returns(MockDbSetActivities.Object); + MockJimDbContext.Setup(m => m.ConnectedSystems).Returns(MockDbSetConnectedSystems.Object); + MockJimDbContext.Setup(m => m.ConnectedSystemObjectTypes).Returns(MockDbSetConnectedSystemObjectTypes.Object); + MockJimDbContext.Setup(m => m.ConnectedSystemRunProfiles).Returns(MockDbSetConnectedSystemRunProfiles.Object); + MockJimDbContext.Setup(m => m.ConnectedSystemPartitions).Returns(MockDbSetConnectedSystemPartitions.Object); + MockJimDbContext.Setup(m => m.ServiceSettingItems).Returns(MockDbSetServiceSettings.Object); + MockJimDbContext.Setup(m => m.PendingExports).Returns(MockDbSetPendingExports.Object); + + ConnectedSystemObjectsData = new List(); + } + + #region First-ever import (empty CS) — dictionary not loaded, all objects created + + [Test] + public async Task FullImport_EmptyConnectedSystem_CreatesAllObjectsAsync() + { + // Arrange — no pre-existing CSOs, so _csIsEmpty=true and dictionary is not loaded + SetupDbContextWithCsoData(); + SyncRepo = TestUtilities.CreateSyncRepository(activity: ActivitiesData.First()); + Jim = new JimApplication(new PostgresDataRepository(MockJimDbContext.Object), syncRepository: SyncRepo); + + var mockFileConnector = new MockFileConnector(); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_1_HR_ID, TestConstants.CS_OBJECT_1_DISPLAY_NAME)); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_2_HR_ID, TestConstants.CS_OBJECT_2_DISPLAY_NAME)); + + // Act + var processor = await CreateProcessorAsync(mockFileConnector); + await processor.PerformFullImportAsync(); + + // Assert — both objects should be created + Assert.That(SyncRepo.ConnectedSystemObjects.Count, Is.EqualTo(2), + "First-ever import should create all imported objects."); + + var cso1 = SyncRepo.ConnectedSystemObjects.Values + .SingleOrDefault(c => c.AttributeValues.Any(av => av.GuidValue == TestConstants.CS_OBJECT_1_HR_ID)); + Assert.That(cso1, Is.Not.Null, "CSO for object 1 should have been created."); + + var cso2 = SyncRepo.ConnectedSystemObjects.Values + .SingleOrDefault(c => c.AttributeValues.Any(av => av.GuidValue == TestConstants.CS_OBJECT_2_HR_ID)); + Assert.That(cso2, Is.Not.Null, "CSO for object 2 should have been created."); + } + + #endregion + + #region Re-import with existing CSOs — dictionary finds matches, CSOs updated + + [Test] + public async Task FullImport_WithExistingCsos_MatchesAndUpdatesViaPreFetchAsync() + { + // Arrange — seed two existing CSOs, then re-import with attribute changes + InitialiseExistingCsos(); + SetupDbContextWithCsoData(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + Jim = new JimApplication(new PostgresDataRepository(MockJimDbContext.Object), syncRepository: SyncRepo); + + // Import the same objects with a changed DISPLAY_NAME for object 1 + var mockFileConnector = new MockFileConnector(); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_1_HR_ID, "Jane Smith-Updated")); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_2_HR_ID, TestConstants.CS_OBJECT_2_DISPLAY_NAME)); + + // Act + var processor = await CreateProcessorAsync(mockFileConnector); + await processor.PerformFullImportAsync(); + + // Assert — same two CSOs should exist (no new ones created) + Assert.That(SyncRepo.ConnectedSystemObjects.Count, Is.EqualTo(2), + "Re-import should not create new CSOs for existing objects."); + + // Verify the pre-fetch dictionary matched correctly — object 1 was updated, not recreated + var cso1 = SyncRepo.ConnectedSystemObjects[TestConstants.CS_OBJECT_1_ID]; + Assert.That(cso1, Is.Not.Null, "Original CSO 1 should still exist."); + + var displayName = cso1.AttributeValues + .SingleOrDefault(av => av.Attribute?.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()); + Assert.That(displayName?.StringValue, Is.EqualTo("Jane Smith-Updated"), + "CSO 1 DISPLAY_NAME should have been updated to the new value."); + + // Verify object 2 is unchanged + var cso2 = SyncRepo.ConnectedSystemObjects[TestConstants.CS_OBJECT_2_ID]; + var displayName2 = cso2.AttributeValues + .SingleOrDefault(av => av.Attribute?.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()); + Assert.That(displayName2?.StringValue, Is.EqualTo(TestConstants.CS_OBJECT_2_DISPLAY_NAME), + "CSO 2 DISPLAY_NAME should be unchanged."); + } + + #endregion + + #region Mixed import — some new, some existing + + [Test] + public async Task FullImport_MixedNewAndExisting_CreatesNewAndUpdatesExistingAsync() + { + // Arrange — seed one existing CSO, import two objects (one existing, one new) + InitialiseExistingCsos(singleCsoOnly: true); + SetupDbContextWithCsoData(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + Jim = new JimApplication(new PostgresDataRepository(MockJimDbContext.Object), syncRepository: SyncRepo); + + var mockFileConnector = new MockFileConnector(); + // Object 1 already exists — import with changed display name + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_1_HR_ID, "Jane Smith-Updated")); + // Object 2 is new — should be created + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_2_HR_ID, TestConstants.CS_OBJECT_2_DISPLAY_NAME)); + + // Act + var processor = await CreateProcessorAsync(mockFileConnector); + await processor.PerformFullImportAsync(); + + // Assert — should have 2 CSOs total: 1 updated + 1 newly created + Assert.That(SyncRepo.ConnectedSystemObjects.Count, Is.EqualTo(2), + "Mixed import should result in one updated and one new CSO."); + + // Existing CSO was matched and updated (not recreated) + var cso1 = SyncRepo.ConnectedSystemObjects[TestConstants.CS_OBJECT_1_ID]; + Assert.That(cso1, Is.Not.Null, "Original CSO 1 should still exist with the same ID."); + var displayName1 = cso1.AttributeValues + .SingleOrDefault(av => av.Attribute?.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()); + Assert.That(displayName1?.StringValue, Is.EqualTo("Jane Smith-Updated"), + "Existing CSO should have been updated."); + + // New CSO was created + var cso2 = SyncRepo.ConnectedSystemObjects.Values + .SingleOrDefault(c => c.Id != TestConstants.CS_OBJECT_1_ID); + Assert.That(cso2, Is.Not.Null, "A new CSO should have been created for object 2."); + var displayName2 = cso2!.AttributeValues + .SingleOrDefault(av => av.Attribute?.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()); + Assert.That(displayName2?.StringValue, Is.EqualTo(TestConstants.CS_OBJECT_2_DISPLAY_NAME), + "New CSO should have the correct display name."); + } + + #endregion + + #region Dictionary is updated mid-import when new CSOs are created + + [Test] + public async Task FullImport_NewCsoCreatedMidBatch_DoesNotDuplicateOnSubsequentPageAsync() + { + // Arrange — empty CS, import 3 objects. Verifies that all 3 are correctly created + // even though the dictionary starts empty. The dictionary should be updated as new + // CSOs are created so that cross-page deduplication still works. + SetupDbContextWithCsoData(); + SyncRepo = TestUtilities.CreateSyncRepository(activity: ActivitiesData.First()); + Jim = new JimApplication(new PostgresDataRepository(MockJimDbContext.Object), syncRepository: SyncRepo); + + var mockFileConnector = new MockFileConnector(); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_1_HR_ID, TestConstants.CS_OBJECT_1_DISPLAY_NAME)); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_2_HR_ID, TestConstants.CS_OBJECT_2_DISPLAY_NAME)); + mockFileConnector.TestImportObjects.Add(CreateImportObject(TestConstants.CS_OBJECT_3_HR_ID, TestConstants.CS_OBJECT_3_DISPLAY_NAME)); + + // Act + var processor = await CreateProcessorAsync(mockFileConnector); + await processor.PerformFullImportAsync(); + + // Assert — all 3 should be created without duplicates + Assert.That(SyncRepo.ConnectedSystemObjects.Count, Is.EqualTo(3), + "All three objects should be created, no duplicates."); + } + + #endregion + + #region Pre-fetch dictionary lookup tests (InMemory SyncRepository) + + [Test] + public async Task GetAllCsoExternalIdMappingsAsync_WithExistingCsos_ReturnsDictionaryAsync() + { + // Arrange + InitialiseExistingCsos(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + + // Act + var mappings = await SyncRepo.GetAllCsoExternalIdMappingsAsync(1); + + // Assert + Assert.That(mappings.Count, Is.EqualTo(2), "Should have 2 mappings for 2 seeded CSOs."); + + // Verify the cache keys follow the expected format + var objectType = ConnectedSystemObjectTypesData.First(); + var externalIdAttrId = (int)MockSourceSystemAttributeNames.HR_ID; + var expectedKey1 = $"cso:1:{externalIdAttrId}:{TestConstants.CS_OBJECT_1_HR_ID.ToString().ToLowerInvariant()}"; + Assert.That(mappings.ContainsKey(expectedKey1), Is.True, + $"Dictionary should contain key for CSO 1. Keys: {string.Join(", ", mappings.Keys)}"); + Assert.That(mappings[expectedKey1], Is.EqualTo(TestConstants.CS_OBJECT_1_ID)); + } + + [Test] + public async Task GetAllCsoExternalIdMappingsAsync_EmptyConnectedSystem_ReturnsEmptyDictionaryAsync() + { + // Arrange + SyncRepo = TestUtilities.CreateSyncRepository(activity: ActivitiesData.First()); + + // Act + var mappings = await SyncRepo.GetAllCsoExternalIdMappingsAsync(1); + + // Assert + Assert.That(mappings.Count, Is.EqualTo(0), "Empty CS should return empty dictionary."); + } + + [Test] + public async Task GetConnectedSystemObjectsByIdsAsync_WithValidIds_ReturnsMatchingCsosAsync() + { + // Arrange + InitialiseExistingCsos(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + + // Act + var csos = await SyncRepo.GetConnectedSystemObjectsByIdsAsync(1, + new[] { TestConstants.CS_OBJECT_1_ID, TestConstants.CS_OBJECT_2_ID }); + + // Assert + Assert.That(csos.Count, Is.EqualTo(2), "Should return both CSOs."); + Assert.That(csos.Any(c => c.Id == TestConstants.CS_OBJECT_1_ID), Is.True); + Assert.That(csos.Any(c => c.Id == TestConstants.CS_OBJECT_2_ID), Is.True); + } + + [Test] + public async Task GetConnectedSystemObjectsByIdsAsync_WithEmptyIds_ReturnsEmptyListAsync() + { + // Arrange + InitialiseExistingCsos(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + + // Act + var csos = await SyncRepo.GetConnectedSystemObjectsByIdsAsync(1, Array.Empty()); + + // Assert + Assert.That(csos.Count, Is.EqualTo(0)); + } + + [Test] + public async Task GetConnectedSystemObjectsByIdsAsync_WithNonExistentIds_ReturnsEmptyListAsync() + { + // Arrange + InitialiseExistingCsos(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + + // Act + var csos = await SyncRepo.GetConnectedSystemObjectsByIdsAsync(1, + new[] { Guid.NewGuid(), Guid.NewGuid() }); + + // Assert + Assert.That(csos.Count, Is.EqualTo(0)); + } + + [Test] + public async Task GetConnectedSystemObjectsByIdsAsync_WrongConnectedSystem_ReturnsEmptyListAsync() + { + // Arrange + InitialiseExistingCsos(); + SyncRepo = TestUtilities.CreateSyncRepository(csos: ConnectedSystemObjectsData, activity: ActivitiesData.First()); + + // Act — CSOs belong to CS 1, query for CS 999 + var csos = await SyncRepo.GetConnectedSystemObjectsByIdsAsync(999, + new[] { TestConstants.CS_OBJECT_1_ID, TestConstants.CS_OBJECT_2_ID }); + + // Assert + Assert.That(csos.Count, Is.EqualTo(0)); + } + + #endregion + + #region Private helpers + + private async Task CreateProcessorAsync(MockFileConnector connector) + { + var connectedSystem = await Jim.ConnectedSystems.GetConnectedSystemAsync(1); + Assert.That(connectedSystem, Is.Not.Null, "Expected to retrieve a Connected System."); + + var runProfile = ConnectedSystemRunProfilesData.Single( + q => q.ConnectedSystemId == connectedSystem!.Id && q.RunType == ConnectedSystemRunType.FullImport); + var activity = ActivitiesData.First(); + + return new SyncImportTaskProcessor( + Jim, SyncRepo, new SyncServer(Jim), new JIM.Application.Servers.SyncEngine(), + connector, connectedSystem!, runProfile, + TestUtilities.CreateTestWorkerTask(activity, InitiatedBy), + new CancellationTokenSource()); + } + + private void SetupDbContextWithCsoData() + { + var mockDbSetConnectedSystemObject = ConnectedSystemObjectsData.BuildMockDbSet(); + mockDbSetConnectedSystemObject.Setup(set => set.AddRange(It.IsAny>())) + .Callback((IEnumerable entities) => + { + var connectedSystemObjects = entities as ConnectedSystemObject[] ?? entities.ToArray(); + foreach (var entity in connectedSystemObjects) + entity.Id = Guid.NewGuid(); + ConnectedSystemObjectsData.AddRange(connectedSystemObjects); + }); + MockJimDbContext.Setup(m => m.ConnectedSystemObjects).Returns(mockDbSetConnectedSystemObject.Object); + } + + private static ConnectedSystemImportObject CreateImportObject(Guid hrId, string displayName) + { + return new ConnectedSystemImportObject + { + ChangeType = ObjectChangeType.NotSet, + ObjectType = "SOURCE_USER", + Attributes = new List + { + new() + { + Name = MockSourceSystemAttributeNames.HR_ID.ToString(), + GuidValues = new List { hrId } + }, + new() + { + Name = MockSourceSystemAttributeNames.DISPLAY_NAME.ToString(), + StringValues = new List { displayName } + }, + new() + { + Name = MockSourceSystemAttributeNames.EMPLOYEE_ID.ToString(), + IntValues = new List { 1 } + } + } + }; + } + + private void InitialiseExistingCsos(bool singleCsoOnly = false) + { + ConnectedSystemObjectsData.Clear(); + + var connectedSystemObjectType = ConnectedSystemObjectTypesData.First(); + var cso1 = new ConnectedSystemObject + { + Id = TestConstants.CS_OBJECT_1_ID, + ConnectedSystemId = 1, + ConnectedSystem = ConnectedSystemsData.First(), + Type = connectedSystemObjectType, + ExternalIdAttributeId = (int)MockSourceSystemAttributeNames.HR_ID + }; + cso1.AttributeValues = new List + { + new() + { + Id = Guid.NewGuid(), + GuidValue = TestConstants.CS_OBJECT_1_HR_ID, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.HR_ID.ToString()), + ConnectedSystemObject = cso1 + }, + new() + { + Id = Guid.NewGuid(), + StringValue = TestConstants.CS_OBJECT_1_DISPLAY_NAME, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()), + ConnectedSystemObject = cso1 + }, + new() + { + Id = Guid.NewGuid(), + IntValue = 1, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.EMPLOYEE_ID.ToString()), + ConnectedSystemObject = cso1 + } + }; + ConnectedSystemObjectsData.Add(cso1); + + if (singleCsoOnly) + return; + + var cso2 = new ConnectedSystemObject + { + Id = TestConstants.CS_OBJECT_2_ID, + ConnectedSystemId = 1, + ConnectedSystem = ConnectedSystemsData.First(), + Type = connectedSystemObjectType, + ExternalIdAttributeId = (int)MockSourceSystemAttributeNames.HR_ID + }; + cso2.AttributeValues = new List + { + new() + { + Id = Guid.NewGuid(), + GuidValue = TestConstants.CS_OBJECT_2_HR_ID, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.HR_ID.ToString()), + ConnectedSystemObject = cso2 + }, + new() + { + Id = Guid.NewGuid(), + StringValue = TestConstants.CS_OBJECT_2_DISPLAY_NAME, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.DISPLAY_NAME.ToString()), + ConnectedSystemObject = cso2 + }, + new() + { + Id = Guid.NewGuid(), + IntValue = 2, + Attribute = connectedSystemObjectType.Attributes.Single(q => q.Name == MockSourceSystemAttributeNames.EMPLOYEE_ID.ToString()), + ConnectedSystemObject = cso2 + } + }; + ConnectedSystemObjectsData.Add(cso2); + } + + #endregion +} From 530f814619562315650ebf3b00051aa4cd767e6e Mon Sep 17 00:00:00 2001 From: Jay Van der Zant Date: Wed, 1 Apr 2026 16:45:22 +0000 Subject: [PATCH 2/4] docs: add documentation, diagram, and marketing site review to release skill Adds a new "Documentation Review and Update" section that runs before changelog validation, ensuring all docs, C4/Mermaid diagrams, and marketing site content are current before each release. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/release/SKILL.md | 116 +++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/.claude/skills/release/SKILL.md b/.claude/skills/release/SKILL.md index 56fc7024..39b84eab 100644 --- a/.claude/skills/release/SKILL.md +++ b/.claude/skills/release/SKILL.md @@ -2,7 +2,6 @@ name: release description: Create a new JIM release — updates VERSION, CHANGELOG, PowerShell manifest, commits, tags, and pushes argument-hint: " (e.g., 0.4.0 or 0.4.0-alpha)" -allowed-tools: Bash, Read, Edit, Write, Glob, Grep --- # Create a JIM Release @@ -39,6 +38,121 @@ Before starting, verify: 5. **Review pinned Docker dependencies**: Check that base image digests and apt package versions are current. If Dependabot PRs for Docker digests have been merged since the last release, flag this so the user can verify pinned apt versions still match. +## Documentation Review and Update + +Before validating the changelog, ensure all documentation reflects the current state of the codebase. + +### 1. Identify what changed since the last release + +Get the last release tag and list all commits since then: +``` +git describe --tags --abbrev=0 +git log ..HEAD --oneline --no-merges +``` + +From this list, identify commits that introduced: +- New features or capabilities +- Changed behaviour or configuration +- New or modified API endpoints +- New connectors, components, or services +- Architectural changes (new projects, containers, dependencies) +- Changed deployment or setup steps + +### 2. Review and update documentation + +Cross-reference the identified changes against ALL of the following documentation files, and update any that are out of date: + +- **`README.md`** (root) — Feature list, quick-start instructions, architecture overview, prerequisites +- **`docs/DEVELOPER_GUIDE.md`** — Architecture guide, development workflows, component documentation, diagram references +- **`docs/DEPLOYMENT_GUIDE.md`** — Deployment instructions, configuration, environment variables +- **`docs/DATABASE_GUIDE.md`** — Schema changes, migration notes, database configuration +- **`docs/EXPRESSIONS_GUIDE.md`** — Expression language documentation, available functions +- **`docs/TESTING_STRATEGY.md`** — Testing approach, test categories, integration test documentation +- **`docs/COMPLIANCE_MAPPING.md`** — Security and compliance documentation +- **`docs/CACHING_STRATEGY.md`** — Caching architecture and configuration +- **`docs/RELEASE_PROCESS.md`** — Release steps (if process has changed) +- **`docs/JIM_AI_ASSISTANT_CONTEXT.md`** — AI assistant context document +- **`docs/JIM_AI_ASSISTANT_INSTRUCTIONS.md`** — AI assistant instructions +- **Any other `.md` files in `docs/`** that are affected by the changes + +For each file, check whether the changes since the last release have made any content inaccurate, incomplete, or missing. Make the updates directly — do not just flag them. + +### 3. Review and update architectural diagrams + +#### C4 Model (Structurizr) + +1. **Review `docs/diagrams/structurizr/workspace.dsl`** against the current codebase. Check that: + - All containers (services, databases) are represented + - All components within each container are current + - Relationships between components/containers are accurate + - Any new connectors, services, or significant components added since the last release are included + - Removed or renamed components are cleaned up + +2. **Update the DSL** if any changes are needed. + +3. **Regenerate the C4 SVG images**: + ``` + jim-diagrams + ``` + This exports both light and dark theme SVGs to `docs/diagrams/images/`. + +4. **Review `docs/diagrams/structurizr/docs/01-overview.md`** — update the diagram documentation page if the diagrams have changed or new views have been added. + +#### Mermaid Process Diagrams + +Review each Mermaid diagram in `docs/diagrams/mermaid/` against the current codebase behaviour: + +- `FULL_IMPORT_FLOW.md` — Object import, duplicate detection, deletion detection +- `FULL_SYNC_CSO_PROCESSING.md` — Per-CSO sync decision tree +- `DELTA_SYNC_FLOW.md` — Delta sync watermark and early exit logic +- `EXPORT_EXECUTION_FLOW.md` — Export batching, parallelism, retry +- `PENDING_EXPORT_LIFECYCLE.md` — Pending export state machine +- `WORKER_TASK_LIFECYCLE.md` — Worker polling, dispatch, heartbeat +- `SCHEDULE_EXECUTION_LIFECYCLE.md` — Schedule step groups and recovery +- `CONNECTOR_LIFECYCLE.md` — Connector interface and lifecycle +- `ACTIVITY_AND_RPEI_FLOW.md` — Activity and RPEI accumulation +- `MVO_DELETION_AND_GRACE_PERIOD.md` — Deletion rules and grace periods + +For each diagram, check whether the logic or flow has changed since the last release. Update any diagrams that no longer accurately reflect the code. + +#### Diagram references in documentation + +Verify that pages embedding or linking to diagrams are up to date: +- `README.md` — SVG references +- `docs/DEVELOPER_GUIDE.md` — C4 and Mermaid diagram listings and links +- `docs/diagrams/structurizr/README.md` — Structurizr tooling documentation + +If new diagrams were added or existing ones renamed/removed, update these references accordingly. + +### 4. Review marketing site content + +Fetch the public marketing site at `https://tetron.io/jim/` and compare its content against the current state of JIM: + +1. **Feature claims** — Are all listed features still accurate? Are significant new features missing? +2. **Architecture descriptions** — Does the site's description of JIM's architecture match the current state? +3. **Connector/integration list** — Are all supported connectors and integrations listed? +4. **Deployment/requirements** — Are prerequisites and deployment descriptions current? +5. **Screenshots or visuals** — Are they representative of the current UI? + +The release agent cannot update the marketing site directly. Instead, present a clear summary of recommended changes to the user, structured so that a website development agent can implement them easily: +- List each recommended change with: **what to change**, **where on the page**, **what the current text/content says**, and **what it should say or show instead** +- Include any new feature descriptions that should be added, written in marketing-appropriate language +- Flag any content that should be removed (e.g., features that were deprecated or renamed) + +### 5. Present documentation changes for review + +Show the user a summary of all documentation and diagram changes made: +- List of files updated and what changed in each +- Any new diagrams added +- Marketing site recommendations (for manual action) +- Ask the user to confirm before proceeding to changelog validation + +Once confirmed, commit the documentation updates: +```bash +git add README.md docs/ docs/diagrams/ +git commit -m "docs: update documentation and diagrams for v release" +``` + ## Changelog Validation Before proceeding with version updates, validate that the changelog is complete. From bb41aa3215de97375f50a154c2ab8a71cb749a22 Mon Sep 17 00:00:00 2001 From: Jay Van der Zant Date: Wed, 1 Apr 2026 17:38:00 +0000 Subject: [PATCH 3/4] docs: add planning, quality, and subagent strategy rules to CLAUDE.md Add three new directive blocks: plan-before-building (with tunnel-vision breaker), demand-quality standards, and subagent strategy. Replace the manual release checklist with a pointer to the /release skill. Minor wording cleanups (branch naming, security section). Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index cc4c7e64..2e2defa1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ Always use Context7 MCP when you need library/API documentation, code generation **YOU MUST BUILD AND TEST BEFORE EVERY COMMIT AND PR (for .NET code):** -1. **ALWAYS** build and test before committing - zero errors required +1. **ALWAYS** build and test before committing - zero errors and warnings required 2. **Prefer targeted builds**: Build only affected projects and their dependents, not the full solution. For example, if you changed `JIM.Connectors` and `JIM.Worker.Tests`, run `dotnet build test/JIM.Worker.Tests/` (which transitively builds its dependencies). Only use `dotnet build JIM.sln` for the **final pre-PR check** or when changes span many projects. 3. **Prefer targeted tests**: Run only the test projects that cover your changes. For example, `dotnet test test/JIM.Worker.Tests/` instead of `dotnet test JIM.sln`. Only run `dotnet test JIM.sln` for the **final pre-PR check**. 4. **NEVER** commit code that hasn't been built and tested locally @@ -69,6 +69,23 @@ JIM uses TDD as the standard development workflow. Tests are written **before** 2. When work is complete and tests pass, commit automatically if the user has indicated approval 3. Don't ask "Would you like me to commit?" - if the context is clear, just do it +**YOU MUST PLAN BEFORE BUILDING:** + +1. **ALWAYS** enter plan mode for any non-trivial task (3+ steps or architectural decisions) +2. Use plan mode for verification steps, not just building — plan how you will prove it works +3. **If you've spent more than 2–3 attempts fixing an issue without progress, STOP.** Do not keep pushing down the same path. Step back, re-enter plan mode, review the problem from scratch, and consider whether you are missing something or there is a better approach. If still stuck, ask the user — they have deep developer experience and can often see what you are missing. +4. **NEVER** mark a task complete without proving it works (build passes, tests pass, behaviour verified) +5. When relevant, diff behaviour between `main` and your changes to confirm correctness + +**YOU MUST DEMAND QUALITY:** + +1. **Simplicity First**: Make every change as simple as possible. Impact minimal code. +2. **Find Root Causes**: No temporary fixes, no workarounds, no band-aids. Diagnose the actual problem and fix it properly. Senior developer standards. +3. For non-trivial changes, pause and ask: "Is there a more elegant way to do this?" +4. If a fix feels hacky, step back and implement the clean solution — knowing everything you now know +5. Skip this for simple, obvious fixes — don't over-engineer +6. Before presenting work, ask yourself: "Would a staff engineer approve this?" + **Test project locations:** - API tests: `test/JIM.Web.Api.Tests/` - Model tests: `test/JIM.Models.Tests/` @@ -81,6 +98,13 @@ If you cannot build/test locally due to environment constraints, you MUST: - Mark the PR as draft - Request manual review and testing before merge +## Subagent Strategy + +- Use subagents liberally to keep the main context window clean +- Offload research, exploration, and parallel analysis to subagents +- For complex problems, throw more compute at it — use multiple subagents in parallel +- One task per subagent for focused execution + ## Synchronisation Integrity **SYNCHRONISATION INTEGRITY IS CRITICAL - PRIORITISE IT ABOVE ALL ELSE.** @@ -334,7 +358,7 @@ When creating ASCII diagrams in documentation or code comments, use only reliabl ## Security -JIM is deployed in government, defence, healthcare, financial services, and critical national infrastructure environments. All code MUST meet the security expectations of these sectors. +JIM is deployed in high-trust/assurance customer environments, i.e. healthcare, financial services, government, etc. All code MUST meet the security expectations of these sectors. **Key security rules (always apply):** - Use `[Authorize]` on all API controllers - deny by default @@ -537,7 +561,7 @@ Use `./test/integration/Run-IntegrationTests.ps1` (PowerShell) - never invoke sc **Git:** - **ALWAYS** work on a feature branch - NEVER commit directly to `main` - **NEVER** automatically create a PR or merge to `main` - the user must explicitly instruct you to do so -- Branch naming: `feature/description` or `claude/description-sessionId` +- Branch naming: `feature/description` - Commit messages: Descriptive, include issue reference if applicable - Build and test before every commit of .NET code (see **Exceptions** under Critical Requirements) - Push to feature branches, create PRs to main @@ -617,13 +641,7 @@ The `VERSION` file is the single source of truth for JIM's version number. It fe - Development tooling changes - Refactoring without user-facing changes -**Release checklist (all steps require user approval):** -1. Update `VERSION` file with new version -2. Move `[Unreleased]` items in `CHANGELOG.md` to a new version section -3. Update `src/JIM.PowerShell/JIM.psd1` — `ModuleVersion` field -4. Commit: `git commit -m "Release vX.Y.Z"` -5. Tag: `git tag vX.Y.Z` -6. Push: `git push origin main --tags` +**To create a release:** Use `/release ` — the skill handles VERSION, CHANGELOG, PowerShell manifest, documentation review, commit, tag, and push. > **Full release process, air-gapped deployment, and Docker image details:** See `docs/RELEASE_PROCESS.md` From f2f9bf3ca79b9d8e6da0d1548663fe14c6a34a3f Mon Sep 17 00:00:00 2001 From: Jay Van der Zant Date: Wed, 1 Apr 2026 17:50:02 +0000 Subject: [PATCH 4/4] docs: decompose root CLAUDE.md into targeted sub-folder files Move detailed reference material out of the root CLAUDE.md (657 -> 362 lines, 45% reduction) into contextual sub-folder files that load only when working in those directories: - src/CLAUDE.md: code conventions, architecture rules, project locations, common development tasks - .devcontainer/CLAUDE.md: commands, shell aliases, Docker workflows, dependency policy, environment setup, troubleshooting Root retains all behavioural guardrails, critical requirements, and short summaries with pointers to the sub-files. Co-Authored-By: Claude Opus 4.6 (1M context) --- .devcontainer/CLAUDE.md | 165 ++++++++++++++++++++ CLAUDE.md | 334 +++------------------------------------- src/CLAUDE.md | 165 ++++++++++++++++++++ 3 files changed, 350 insertions(+), 314 deletions(-) create mode 100644 .devcontainer/CLAUDE.md create mode 100644 src/CLAUDE.md diff --git a/.devcontainer/CLAUDE.md b/.devcontainer/CLAUDE.md new file mode 100644 index 00000000..803bfa85 --- /dev/null +++ b/.devcontainer/CLAUDE.md @@ -0,0 +1,165 @@ +# Development Environment Reference + +> Commands, workflows, environment setup, and troubleshooting. See root `CLAUDE.md` for behavioural rules and guardrails. + +## Commands + +**Build & Test:** +- `dotnet build JIM.sln` - Build entire solution (use for final pre-PR check) +- `dotnet build test/JIM.Worker.Tests/` - Build a specific project and its dependencies (preferred during development) +- `dotnet test JIM.sln` - Run all tests (use for final pre-PR check) +- `dotnet test test/JIM.Worker.Tests/` - Run a specific test project (preferred during development) +- `dotnet test --filter "FullyQualifiedName~TestName"` - Run specific test +- `dotnet clean && dotnet build` - Clean build + +**Database:** +- `dotnet ef migrations add [Name] --project src/JIM.PostgresData` - Add migration +- `dotnet ef database update --project src/JIM.PostgresData` - Apply migrations +- `docker compose exec jim.web dotnet ef database update` - Apply migrations in Docker + +**Shell Aliases (Recommended):** +- Aliases are automatically configured from `.devcontainer/jim-aliases.sh` +- If aliases don't work, run: `source ~/.zshrc` (or restart terminal) +- `jim` - List all available jim aliases +- `jim-compile` - Build entire solution (dotnet build) +- `jim-test` - Run unit + workflow tests (excludes Explicit) +- `jim-test-all` - Run ALL tests (incl. Explicit + Pester) +- `jim-db` - Start PostgreSQL (for local debugging) +- `jim-db-stop` - Stop PostgreSQL +- `jim-migrate` - Apply migrations +- `jim-postgres-tune` - Re-tune PostgreSQL for current CPU/RAM + +**Docker Stack Management:** +- `jim-stack` - Start Docker stack +- `jim-stack-logs` - View Docker stack logs +- `jim-stack-down` - Stop Docker stack +- `jim-restart` - Restart stack (re-reads .env, no rebuild) + +**Docker Builds (rebuild and start services):** +- `jim-build` - Build all services + start +- `jim-build-web` - Build jim.web + start +- `jim-build-worker` - Build jim.worker + start +- `jim-build-scheduler` - Build jim.scheduler + start + +**Reset:** +- `jim-reset` - Reset JIM (delete database & logs volumes) + +**Diagrams:** +- `jim-diagrams` - Export Structurizr C4 diagrams as SVG (requires Docker) + +**Planning:** +- `jim-prd` - Create a new PRD from template (prompts for feature name) + +**Docker (Manual Commands):** +- `docker compose -f db.yml up -d` - Start database (same as jim-db) +- `docker compose -f db.yml down` - Stop database +- `docker compose logs [service]` - View service logs + +**IMPORTANT - Rebuilding Containers After Code Changes:** +When running the Docker stack and you make code changes to JIM.Web, JIM.Worker, or JIM.Scheduler, you MUST rebuild the affected container(s) for changes to take effect: +- `jim-build-web` - Rebuild and restart jim.web service +- `jim-build-worker` - Rebuild and restart jim.worker service +- `jim-build-scheduler` - Rebuild and restart jim.scheduler service +- `jim-build` - Rebuild and restart all services + +Blazor pages, API controllers, and other compiled code require container rebuilds. Simply refreshing the browser will not show changes. + +**PostgreSQL Auto-Tuning:** +PostgreSQL is automatically tuned during devcontainer setup (`.devcontainer/postgres-tune.sh`). The script: +- Auto-detects CPU cores and available RAM +- Calculates optimal pgtune settings for OLTP workloads +- Generates two gitignored overlay files: `docker-compose.local.yml` and `db.local.yml` +- The `jim-*` aliases automatically include these overlays when present (later files win) + +If you later increase devcontainer resources (e.g., scale from 4c/8GB to 8c/32GB), re-tune and restart: +- `jim-postgres-tune` then `jim-db-stop && jim-db` (local dev) +- `jim-postgres-tune` then `jim-restart` (Docker stack) + +## Dependency Update Policy + +All dependency updates from Dependabot require human review before merging - there is no auto-merge. This applies to all ecosystems: NuGet packages, Docker base images, and GitHub Actions. A maintainer must review each PR, verify the changes are appropriate, and merge manually. + +**NuGet Packages:** +- Pin dependency versions in `.csproj` files (avoid floating versions) +- Dependabot proposes weekly PRs for patch and minor updates +- Review each update for compatibility, changelog notes, and known issues before merging +- Run `dotnet list package --vulnerable` to check for known vulnerabilities + +**Docker Base Images:** +- Production Dockerfiles pin base image digests (`@sha256:...`) and functional apt package versions for reproducible builds +- **NEVER** remove the `@sha256:` digest from `FROM` lines +- **NEVER** remove version pins from functional apt packages (libldap, cifs-utils) +- Diagnostic utilities (curl, iputils-ping) are intentionally unpinned +- If updating a base image digest, check and update pinned apt versions to match (see `docs/DEVELOPER_GUIDE.md` "Dependency Pinning" section) + +**GitHub Actions:** +- Pin action versions by major version tag (e.g., `@v4`) in workflow files +- Dependabot proposes weekly PRs for patch and minor updates +- Review each update before merging + +## Development Workflows + +**Choose one of two workflows:** + +**Workflow 1 - Local Debugging (Recommended):** +1. Start database: `jim-db` +2. Press F5 in VS Code or run: `jim-web` +3. Debug with breakpoints and hot reload +4. Services: Web + API (https://localhost:7000), Swagger at `/api/swagger` + +**Workflow 2 - Full Docker Stack:** +1. Start all services: `jim-stack` +2. Access containerized services +3. Services: Web + API (http://localhost:5200), Swagger at `/api/swagger` + +**Use Workflow 1** for active development and debugging. +**Use Workflow 2** for integration testing or production-like environment. + +## Environment Setup + +**Required:** +- .NET 9.0 SDK +- PostgreSQL 18 (via Docker) +- Docker & Docker Compose + +**Configuration:** +- Copy `.env.example` to `.env` +- Set database credentials +- Configure SSO/OIDC settings (required) + +**Optional Environment Variables:** +- `JIM_ENCRYPTION_KEY_PATH` - Custom path for encryption key storage (default: `/data/keys` for Docker, or app data directory) + +**GitHub Codespaces:** +- PostgreSQL memory settings automatically optimized +- Use `jim-db` or `jim-stack` aliases (already configured) + +## Troubleshooting + +**Build fails:** +- Check .NET 9.0 SDK installed: `dotnet --version` +- Restore packages: `dotnet restore JIM.sln` + +**Tests fail:** +- Verify test method signature: `public async Task TestNameAsync()` +- Check `Assert.ThrowsAsync` is awaited: `await Assert.ThrowsAsync(...)` + +**Database connection:** +- Verify PostgreSQL running: `docker compose ps` +- Check `.env` connection string +- Apply migrations if needed + +**Sync Activities Failing with UnhandledError:** +- UnhandledError items in Activities indicate code/logic bugs, not data problems +- Check worker logs for the full exception stack trace using: `docker compose logs jim.worker --tail=1000 | grep -A 5 "Unhandled.*sync error"` +- Common causes: + - Query returning unexpected number of results (e.g., `SingleOrDefaultAsync` with duplicates) + - Missing or null data where code expected values + - Type casting errors or invalid data states +- DO NOT silently ignore UnhandledErrors - they indicate data integrity risk + +**Sync Statistics Not What Expected:** +- Check log for summary statistics at end of import/sync/export (look for "SUMMARY - Total objects") +- Verify Run Profile is selecting correct partition/container +- Verify sync rules are correctly scoped to object types +- Check for DuplicateObject errors - indicates deduplication is working diff --git a/CLAUDE.md b/CLAUDE.md index 2e2defa1..95578a76 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -134,115 +134,14 @@ Sync operations are the core of JIM. Customers depend on JIM to synchronise thei - Exception: `.devcontainer/setup.sh` is acceptable as it runs during container creation - Never create bash/shell scripts for project automation or testing -## Commands - -**Build & Test:** -- `dotnet build JIM.sln` - Build entire solution (use for final pre-PR check) -- `dotnet build test/JIM.Worker.Tests/` - Build a specific project and its dependencies (preferred during development) -- `dotnet test JIM.sln` - Run all tests (use for final pre-PR check) -- `dotnet test test/JIM.Worker.Tests/` - Run a specific test project (preferred during development) -- `dotnet test --filter "FullyQualifiedName~TestName"` - Run specific test -- `dotnet clean && dotnet build` - Clean build - -**Database:** -- `dotnet ef migrations add [Name] --project src/JIM.PostgresData` - Add migration -- `dotnet ef database update --project src/JIM.PostgresData` - Apply migrations -- `docker compose exec jim.web dotnet ef database update` - Apply migrations in Docker - -**Shell Aliases (Recommended):** -- Aliases are automatically configured from `.devcontainer/jim-aliases.sh` -- If aliases don't work, run: `source ~/.zshrc` (or restart terminal) -- `jim` - List all available jim aliases -- `jim-compile` - Build entire solution (dotnet build) -- `jim-test` - Run unit + workflow tests (excludes Explicit) -- `jim-test-all` - Run ALL tests (incl. Explicit + Pester) -- `jim-db` - Start PostgreSQL (for local debugging) -- `jim-db-stop` - Stop PostgreSQL -- `jim-migrate` - Apply migrations -- `jim-postgres-tune` - Re-tune PostgreSQL for current CPU/RAM - -**Docker Stack Management:** -- `jim-stack` - Start Docker stack -- `jim-stack-logs` - View Docker stack logs -- `jim-stack-down` - Stop Docker stack -- `jim-restart` - Restart stack (re-reads .env, no rebuild) - -**Docker Builds (rebuild and start services):** -- `jim-build` - Build all services + start -- `jim-build-web` - Build jim.web + start -- `jim-build-worker` - Build jim.worker + start -- `jim-build-scheduler` - Build jim.scheduler + start - -**Reset:** -- `jim-reset` - Reset JIM (delete database & logs volumes) - -**Diagrams:** -- `jim-diagrams` - Export Structurizr C4 diagrams as SVG (requires Docker) - -**Planning:** -- `jim-prd` - Create a new PRD from template (prompts for feature name) - -**Docker (Manual Commands):** -- `docker compose -f db.yml up -d` - Start database (same as jim-db) -- `docker compose -f db.yml down` - Stop database -- `docker compose logs [service]` - View service logs - -**IMPORTANT - Rebuilding Containers After Code Changes:** -When running the Docker stack and you make code changes to JIM.Web, JIM.Worker, or JIM.Scheduler, you MUST rebuild the affected container(s) for changes to take effect: -- `jim-build-web` - Rebuild and restart jim.web service -- `jim-build-worker` - Rebuild and restart jim.worker service -- `jim-build-scheduler` - Rebuild and restart jim.scheduler service -- `jim-build` - Rebuild and restart all services - -Blazor pages, API controllers, and other compiled code require container rebuilds. Simply refreshing the browser will not show changes. - -**PostgreSQL Auto-Tuning:** -PostgreSQL is automatically tuned during devcontainer setup (`.devcontainer/postgres-tune.sh`). The script: -- Auto-detects CPU cores and available RAM -- Calculates optimal pgtune settings for OLTP workloads -- Generates two gitignored overlay files: `docker-compose.local.yml` and `db.local.yml` -- The `jim-*` aliases automatically include these overlays when present (later files win) - -If you later increase devcontainer resources (e.g., scale from 4c/8GB to 8c/32GB), re-tune and restart: -- `jim-postgres-tune` then `jim-db-stop && jim-db` (local dev) -- `jim-postgres-tune` then `jim-restart` (Docker stack) - -**IMPORTANT - Dependency Update Policy:** -All dependency updates from Dependabot require human review before merging - there is no auto-merge. This applies to all ecosystems: NuGet packages, Docker base images, and GitHub Actions. A maintainer must review each PR, verify the changes are appropriate, and merge manually. - -**NuGet Packages:** -- Pin dependency versions in `.csproj` files (avoid floating versions) -- Dependabot proposes weekly PRs for patch and minor updates -- Review each update for compatibility, changelog notes, and known issues before merging -- Run `dotnet list package --vulnerable` to check for known vulnerabilities - -**Docker Base Images:** -- Production Dockerfiles pin base image digests (`@sha256:...`) and functional apt package versions for reproducible builds -- **NEVER** remove the `@sha256:` digest from `FROM` lines -- **NEVER** remove version pins from functional apt packages (libldap, cifs-utils) -- Diagnostic utilities (curl, iputils-ping) are intentionally unpinned -- If updating a base image digest, check and update pinned apt versions to match (see `docs/DEVELOPER_GUIDE.md` "Dependency Pinning" section) - -**GitHub Actions:** -- Pin action versions by major version tag (e.g., `@v4`) in workflow files -- Dependabot proposes weekly PRs for patch and minor updates -- Review each update before merging - -## Key Project Locations - -**Where to add:** -- API endpoints: `src/JIM.Web/Controllers/Api/` -- API models/DTOs: `src/JIM.Web/Models/Api/` -- API extensions: `src/JIM.Web/Extensions/Api/` -- API middleware: `src/JIM.Web/Middleware/Api/` -- UI pages: `src/JIM.Web/Pages/` -- Blazor components: `src/JIM.Web/Shared/` -- Business logic: `src/JIM.Application/Servers/` -- Performance diagnostics: `src/JIM.Application/Diagnostics/` -- Domain models: `src/JIM.Models/` (see subdirectories: `Core/`, `Staging/`, `Transactional/`, `Utility/`) -- Database repositories: `src/JIM.PostgresData/` -- Connectors: `src/JIM.Connectors/` or new connector project -- Tests: `test/JIM.Web.Api.Tests/`, `test/JIM.Models.Tests/`, `test/JIM.Worker.Tests/` +## Commands & Environment + +> **Full command reference, shell aliases, Docker workflows, dependency policy, environment setup, and troubleshooting:** See `.devcontainer/CLAUDE.md` + +**Quick reference:** +- `jim-compile` / `jim-test` / `jim-test-all` - Build and test +- `jim-db` / `jim-stack` - Start database / full Docker stack +- `jim-build-web` / `jim-build-worker` / `jim-build-scheduler` - Rebuild containers after code changes ## ASCII Diagrams @@ -259,60 +158,15 @@ When creating ASCII diagrams in documentation or code comments, use only reliabl ## Code Style & Conventions -**IMPORTANT Rules:** -- YOU MUST use async/await for all I/O operations (method suffix: `Async`) -- YOU MUST use constructor injection for all dependencies -- YOU MUST test method signature: `[Test] public async Task TestNameAsync()` -- **CRITICAL: Use British English (en-GB) for ALL text:** - - Code: "authorisation" not "authorization", "synchronisation" not "synchronization", "colour" not "color" - - Comments: "behaviour" not "behavior", "centre" not "center", "licence" not "license" (noun) - - Documentation: "organise" not "organize", "analyse" not "analyze", "programme" not "program" (unless referring to computer programs) - - UI text: "minimise" not "minimize", "optimise" not "optimize", "cancelled" not "canceled" - - Units: Metric only (metres, litres, kilograms, kilometres) - never use imperial units - - Date/Time: Always use UTC for storage and internal operations; display in user's local time zone where appropriate - - Exceptions: Technical terms, proper nouns, third-party library names, URLs - -**DateTime Handling (IMPORTANT):** -- Always use `DateTime` type (not `DateTimeOffset`) in models -- Always use `DateTime.UtcNow` for current time - NEVER use `DateTime.Now` -- PostgreSQL stores DateTime as `timestamp with time zone` (internally UTC) -- **Runtime quirk**: Npgsql returns `DateTimeOffset` when reading from database, even though model properties are `DateTime` -- Code that processes DateTime values from the database must handle BOTH `DateTime` and `DateTimeOffset` types -- See `DynamicExpressoEvaluator.ToFileTime()` for an example of handling both types -- This design choice maintains database portability (MySQL, SQL Server, etc. handle DateTimeOffset differently) - -**Raw SQL Nullable Parameters (CRITICAL):** -- NEVER use `(object?)value ?? DBNull.Value` as a parameter to `ExecuteSqlRawAsync` or `ExecuteSqlInterpolatedAsync` -- EF Core cannot infer the PostgreSQL type from bare `DBNull.Value`, causing: `InvalidOperationException: The current provider doesn't have a store type mapping for properties of type 'DBNull'` -- ALWAYS wrap nullable parameters with a typed `NpgsqlParameter`: `NullableParam(value, NpgsqlTypes.NpgsqlDbType.Text)` (see helper method in `ConnectedSystemRepository`, `ActivitiesRepository`, `MetaverseRepository`) -- This applies to ALL nullable columns in raw SQL INSERT/UPDATE statements — string, int, Guid, DateTime, bool, etc. - -**File Organisation:** -- One class per file - each class should have its own `.cs` file named after the class -- Exception: Enums are grouped into a single file per area/folder (e.g., `ConnectedSystemEnums.cs`, `PendingExportEnums.cs`) -- File names must match the class/interface name exactly (e.g., `MetaverseObject.cs` for `class MetaverseObject`) -- **Model placement**: All model/POCO/result classes MUST live in `src/JIM.Models/` — NEVER define them inline in service or server files in `JIM.Application` or other projects - - Exceptions: UI-specific models may live in `src/JIM.Web/Models/`, and API DTOs in `src/JIM.Web/Models/Api/` - - If a service method needs a result type, create it as its own file in the appropriate `JIM.Models/` subdirectory - -**Naming Patterns:** -- Methods: `GetObjectAsync`, `CreateMetaverseObjectAsync` -- Classes: Full descriptive names (avoid abbreviations) -- Properties: PascalCase with nullable reference types enabled - -**Tabs:** -- Use `` instead of `` for all top-level page tabs — it syncs the active tab with a `?t=slug` query string, enabling browser back/forward navigation -- Use plain `` only for tabs inside dialogs or nested sub-tabs where URL navigation is not needed - -**UI Element Sizing:** -- ALWAYS use normal/default sizes for ALL UI elements when adding new components -- Text: Use `Typo.body1` (default readable size) -- Chips: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) -- Buttons: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) -- Icons: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) -- Other MudBlazor components: Omit Size parameter to use default sizing -- Only use smaller sizes (`Typo.body2`, `Size.Small`, etc.) when explicitly requested by the user -- Users prefer readable, appropriately-sized UI elements by default +**Key rules (always apply):** +- Use async/await for all I/O operations (method suffix: `Async`) +- Use constructor injection for all dependencies +- **CRITICAL: Use British English (en-GB) for ALL text** — "authorisation", "synchronisation", "behaviour", "colour", etc. +- Use `DateTime.UtcNow` — NEVER `DateTime.Now` +- One class per file, file names match class names exactly +- All models/POCOs live in `src/JIM.Models/` — never inline in service files + +> **Full conventions (DateTime handling, raw SQL parameters, file organisation, naming patterns, UI sizing, MudBlazor tabs):** See `src/CLAUDE.md` ## Testing @@ -396,110 +250,11 @@ Prefer: Microsoft-maintained packages > established corporate-backed packages > ## Architecture Quick Reference -**Metaverse Pattern:** -- MetaverseObject = Central identity entity -- ConnectedSystemObject = External system identity -- SyncRule = Bidirectional mapping between systems -- All operations flow through the metaverse (never direct system-to-system) - -**Layer Dependencies (top to bottom):** -1. JIM.Web (Presentation - includes both Blazor UI and REST API at `/api/`) -2. JIM.Application (Business Logic) -3. JIM.Models (Domain) -4. JIM.Data, JIM.PostgresData (Data Access) - -**CRITICAL: Respect N-Tier Architecture - NEVER Bypass Layers:** - -JIM follows strict n-tier architecture. Each layer may ONLY call the layer directly below it: - -``` -+------------------+ -| JIM.Web | Blazor pages, API controllers -+--------+---------+ - | ONLY calls JimApplication (never Repository directly) - v -+------------------+ -| JIM.Application | Business logic, orchestration (Servers/) -+--------+---------+ - | ONLY calls Repository interfaces - v -+------------------+ -| JIM.Data | Repository interfaces -+------------------+ - | - v -+------------------+ -| JIM.PostgresData | EF Core implementations -+------------------+ -``` - -**Rules:** -- **JIM.Web** (UI/API) must ONLY access data through `JimApplication` facade (e.g., `Jim.Metaverse`, `Jim.Scheduler`, `Jim.ConnectedSystems`) -- **NEVER** call `Jim.Repository.*` directly from Blazor pages or API controllers -- If a method doesn't exist on the Application layer, ADD IT there - don't bypass to the repository -- This separation ensures business logic stays in one place and can be tested independently - -**Bad - Bypassing layers:** -```csharp -// In a Blazor page - WRONG! -var schedule = await Jim.Repository.Scheduling.GetScheduleAsync(id); -``` - -**Good - Respecting layers:** -```csharp -// In a Blazor page - CORRECT! -var schedule = await Jim.Scheduler.GetScheduleAsync(id); -``` - -## Common Development Tasks - -**Adding a Connector:** -1. Implement `IConnector` and capability interfaces -2. Add to `src/JIM.Connectors/` or create new project -3. Register in DI container -4. Add tests - -**Adding API Endpoint:** -1. Add method to controller in `src/JIM.Web/Controllers/Api/` -2. Use DTOs for request/response (in `src/JIM.Web/Models/Api/`) -3. Add XML comments for Swagger -4. Test via Swagger UI at `/api/swagger` +**Layer Dependencies:** JIM.Web -> JIM.Application -> JIM.Models -> JIM.Data/JIM.PostgresData. **NEVER bypass layers** — UI/API must only call `JimApplication`, never `Jim.Repository.*` directly. -**Modifying Database Schema:** -1. Update entity in `src/JIM.Models/` -2. Create migration: `dotnet ef migrations add [Name] --project src/JIM.PostgresData` -3. Review generated migration -4. Test: `dotnet ef database update --project src/JIM.PostgresData` -5. Commit migration files +**Metaverse Pattern:** All operations flow through the metaverse (MetaverseObject <-> SyncRule <-> ConnectedSystemObject). Never direct system-to-system. -**CRITICAL: NEVER flatten, squash, delete, or reset EF Core migrations.** Migrations are append-only. Deployed instances track applied migrations by name in `__EFMigrationsHistory` — removing old migrations and replacing them with a combined migration will break every existing deployment. - -**Updating Architecture Diagrams:** - -When making architectural changes (new containers, components, connectors, or significant restructuring): -1. Update `docs/diagrams/structurizr/workspace.dsl` to reflect the change -2. Regenerate SVGs: `jim-diagrams` (requires Docker) -3. Commit both the DSL changes and regenerated SVG files together - -> **DSL syntax and diagram details:** See `docs/diagrams/structurizr/README.md` - -## Development Workflows - -**Choose one of two workflows:** - -**Workflow 1 - Local Debugging (Recommended):** -1. Start database: `jim-db` -2. Press F5 in VS Code or run: `jim-web` -3. Debug with breakpoints and hot reload -4. Services: Web + API (https://localhost:7000), Swagger at `/api/swagger` - -**Workflow 2 - Full Docker Stack:** -1. Start all services: `jim-stack` -2. Access containerized services -3. Services: Web + API (http://localhost:5200), Swagger at `/api/swagger` - -**Use Workflow 1** for active development and debugging. -**Use Workflow 2** for integration testing or production-like environment. +> **Full N-tier rules, layer diagram, common development tasks (adding connectors, endpoints, migrations), and project locations:** See `src/CLAUDE.md` ## Integration Testing @@ -507,55 +262,6 @@ Use `./test/integration/Run-IntegrationTests.ps1` (PowerShell) - never invoke sc > **Full commands, templates, and runner details:** See `test/CLAUDE.md` -## Environment Setup - -**Required:** -- .NET 9.0 SDK -- PostgreSQL 18 (via Docker) -- Docker & Docker Compose - -**Configuration:** -- Copy `.env.example` to `.env` -- Set database credentials -- Configure SSO/OIDC settings (required) - -**Optional Environment Variables:** -- `JIM_ENCRYPTION_KEY_PATH` - Custom path for encryption key storage (default: `/data/keys` for Docker, or app data directory) - -**GitHub Codespaces:** -- PostgreSQL memory settings automatically optimized -- Use `jim-db` or `jim-stack` aliases (already configured) - -## Troubleshooting - -**Build fails:** -- Check .NET 9.0 SDK installed: `dotnet --version` -- Restore packages: `dotnet restore JIM.sln` - -**Tests fail:** -- Verify test method signature: `public async Task TestNameAsync()` -- Check `Assert.ThrowsAsync` is awaited: `await Assert.ThrowsAsync(...)` - -**Database connection:** -- Verify PostgreSQL running: `docker compose ps` -- Check `.env` connection string -- Apply migrations if needed - -**Sync Activities Failing with UnhandledError:** -- UnhandledError items in Activities indicate code/logic bugs, not data problems -- Check worker logs for the full exception stack trace using: `docker compose logs jim.worker --tail=1000 | grep -A 5 "Unhandled.*sync error"` -- Common causes: - - Query returning unexpected number of results (e.g., `SingleOrDefaultAsync` with duplicates) - - Missing or null data where code expected values - - Type casting errors or invalid data states -- DO NOT silently ignore UnhandledErrors - they indicate data integrity risk - -**Sync Statistics Not What Expected:** -- Check log for summary statistics at end of import/sync/export (look for "SUMMARY - Total objects") -- Verify Run Profile is selecting correct partition/container -- Verify sync rules are correctly scoped to object types -- Check for DuplicateObject errors - indicates deduplication is working - ## Workflow Best Practices **Git:** diff --git a/src/CLAUDE.md b/src/CLAUDE.md new file mode 100644 index 00000000..84cade60 --- /dev/null +++ b/src/CLAUDE.md @@ -0,0 +1,165 @@ +# Source Code Reference + +> Detailed coding conventions, architecture rules, and development tasks for `src/`. See root `CLAUDE.md` for behavioural rules and guardrails. + +## Key Project Locations + +**Where to add:** +- API endpoints: `JIM.Web/Controllers/Api/` +- API models/DTOs: `JIM.Web/Models/Api/` +- API extensions: `JIM.Web/Extensions/Api/` +- API middleware: `JIM.Web/Middleware/Api/` +- UI pages: `JIM.Web/Pages/` +- Blazor components: `JIM.Web/Shared/` +- Business logic: `JIM.Application/Servers/` +- Performance diagnostics: `JIM.Application/Diagnostics/` +- Domain models: `JIM.Models/` (see subdirectories: `Core/`, `Staging/`, `Transactional/`, `Utility/`) +- Database repositories: `JIM.PostgresData/` +- Connectors: `JIM.Connectors/` or new connector project +- Tests: `../test/JIM.Web.Api.Tests/`, `../test/JIM.Models.Tests/`, `../test/JIM.Worker.Tests/` + +## Code Style & Conventions + +**IMPORTANT Rules:** +- YOU MUST use async/await for all I/O operations (method suffix: `Async`) +- YOU MUST use constructor injection for all dependencies +- YOU MUST test method signature: `[Test] public async Task TestNameAsync()` +- **CRITICAL: Use British English (en-GB) for ALL text:** + - Code: "authorisation" not "authorization", "synchronisation" not "synchronization", "colour" not "color" + - Comments: "behaviour" not "behavior", "centre" not "center", "licence" not "license" (noun) + - Documentation: "organise" not "organize", "analyse" not "analyze", "programme" not "program" (unless referring to computer programs) + - UI text: "minimise" not "minimize", "optimise" not "optimize", "cancelled" not "canceled" + - Units: Metric only (metres, litres, kilograms, kilometres) - never use imperial units + - Date/Time: Always use UTC for storage and internal operations; display in user's local time zone where appropriate + - Exceptions: Technical terms, proper nouns, third-party library names, URLs + +**DateTime Handling (IMPORTANT):** +- Always use `DateTime` type (not `DateTimeOffset`) in models +- Always use `DateTime.UtcNow` for current time - NEVER use `DateTime.Now` +- PostgreSQL stores DateTime as `timestamp with time zone` (internally UTC) +- **Runtime quirk**: Npgsql returns `DateTimeOffset` when reading from database, even though model properties are `DateTime` +- Code that processes DateTime values from the database must handle BOTH `DateTime` and `DateTimeOffset` types +- See `DynamicExpressoEvaluator.ToFileTime()` for an example of handling both types +- This design choice maintains database portability (MySQL, SQL Server, etc. handle DateTimeOffset differently) + +**Raw SQL Nullable Parameters (CRITICAL):** +- NEVER use `(object?)value ?? DBNull.Value` as a parameter to `ExecuteSqlRawAsync` or `ExecuteSqlInterpolatedAsync` +- EF Core cannot infer the PostgreSQL type from bare `DBNull.Value`, causing: `InvalidOperationException: The current provider doesn't have a store type mapping for properties of type 'DBNull'` +- ALWAYS wrap nullable parameters with a typed `NpgsqlParameter`: `NullableParam(value, NpgsqlTypes.NpgsqlDbType.Text)` (see helper method in `ConnectedSystemRepository`, `ActivitiesRepository`, `MetaverseRepository`) +- This applies to ALL nullable columns in raw SQL INSERT/UPDATE statements — string, int, Guid, DateTime, bool, etc. + +**File Organisation:** +- One class per file - each class should have its own `.cs` file named after the class +- Exception: Enums are grouped into a single file per area/folder (e.g., `ConnectedSystemEnums.cs`, `PendingExportEnums.cs`) +- File names must match the class/interface name exactly (e.g., `MetaverseObject.cs` for `class MetaverseObject`) +- **Model placement**: All model/POCO/result classes MUST live in `JIM.Models/` — NEVER define them inline in service or server files in `JIM.Application` or other projects + - Exceptions: UI-specific models may live in `JIM.Web/Models/`, and API DTOs in `JIM.Web/Models/Api/` + - If a service method needs a result type, create it as its own file in the appropriate `JIM.Models/` subdirectory + +**Naming Patterns:** +- Methods: `GetObjectAsync`, `CreateMetaverseObjectAsync` +- Classes: Full descriptive names (avoid abbreviations) +- Properties: PascalCase with nullable reference types enabled + +**Tabs:** +- Use `` instead of `` for all top-level page tabs — it syncs the active tab with a `?t=slug` query string, enabling browser back/forward navigation +- Use plain `` only for tabs inside dialogs or nested sub-tabs where URL navigation is not needed + +**UI Element Sizing:** +- ALWAYS use normal/default sizes for ALL UI elements when adding new components +- Text: Use `Typo.body1` (default readable size) +- Chips: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) +- Buttons: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) +- Icons: Use `Size.Medium` or omit Size parameter entirely (defaults to Medium) +- Other MudBlazor components: Omit Size parameter to use default sizing +- Only use smaller sizes (`Typo.body2`, `Size.Small`, etc.) when explicitly requested by the user +- Users prefer readable, appropriately-sized UI elements by default + +## Architecture Quick Reference + +**Metaverse Pattern:** +- MetaverseObject = Central identity entity +- ConnectedSystemObject = External system identity +- SyncRule = Bidirectional mapping between systems +- All operations flow through the metaverse (never direct system-to-system) + +**Layer Dependencies (top to bottom):** +1. JIM.Web (Presentation - includes both Blazor UI and REST API at `/api/`) +2. JIM.Application (Business Logic) +3. JIM.Models (Domain) +4. JIM.Data, JIM.PostgresData (Data Access) + +**CRITICAL: Respect N-Tier Architecture - NEVER Bypass Layers:** + +JIM follows strict n-tier architecture. Each layer may ONLY call the layer directly below it: + +``` ++------------------+ +| JIM.Web | Blazor pages, API controllers ++--------+---------+ + | ONLY calls JimApplication (never Repository directly) + v ++------------------+ +| JIM.Application | Business logic, orchestration (Servers/) ++--------+---------+ + | ONLY calls Repository interfaces + v ++------------------+ +| JIM.Data | Repository interfaces ++------------------+ + | + v ++------------------+ +| JIM.PostgresData | EF Core implementations ++------------------+ +``` + +**Rules:** +- **JIM.Web** (UI/API) must ONLY access data through `JimApplication` facade (e.g., `Jim.Metaverse`, `Jim.Scheduler`, `Jim.ConnectedSystems`) +- **NEVER** call `Jim.Repository.*` directly from Blazor pages or API controllers +- If a method doesn't exist on the Application layer, ADD IT there - don't bypass to the repository +- This separation ensures business logic stays in one place and can be tested independently + +**Bad - Bypassing layers:** +```csharp +// In a Blazor page - WRONG! +var schedule = await Jim.Repository.Scheduling.GetScheduleAsync(id); +``` + +**Good - Respecting layers:** +```csharp +// In a Blazor page - CORRECT! +var schedule = await Jim.Scheduler.GetScheduleAsync(id); +``` + +## Common Development Tasks + +**Adding a Connector:** +1. Implement `IConnector` and capability interfaces +2. Add to `JIM.Connectors/` or create new project +3. Register in DI container +4. Add tests + +**Adding API Endpoint:** +1. Add method to controller in `JIM.Web/Controllers/Api/` +2. Use DTOs for request/response (in `JIM.Web/Models/Api/`) +3. Add XML comments for Swagger +4. Test via Swagger UI at `/api/swagger` + +**Modifying Database Schema:** +1. Update entity in `JIM.Models/` +2. Create migration: `dotnet ef migrations add [Name] --project src/JIM.PostgresData` +3. Review generated migration +4. Test: `dotnet ef database update --project src/JIM.PostgresData` +5. Commit migration files + +**CRITICAL: NEVER flatten, squash, delete, or reset EF Core migrations.** Migrations are append-only. Deployed instances track applied migrations by name in `__EFMigrationsHistory` — removing old migrations and replacing them with a combined migration will break every existing deployment. + +**Updating Architecture Diagrams:** + +When making architectural changes (new containers, components, connectors, or significant restructuring): +1. Update `docs/diagrams/structurizr/workspace.dsl` to reflect the change +2. Regenerate SVGs: `jim-diagrams` (requires Docker) +3. Commit both the DSL changes and regenerated SVG files together + +> **DSL syntax and diagram details:** See `docs/diagrams/structurizr/README.md`