diff --git a/PAX_Purview_Audit_Log_Processor_v1.10.5.ps1 b/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1
similarity index 95%
rename from PAX_Purview_Audit_Log_Processor_v1.10.5.ps1
rename to PAX_Purview_Audit_Log_Processor_v1.10.6.ps1
index a9b0b89..80049e8 100644
--- a/PAX_Purview_Audit_Log_Processor_v1.10.5.ps1
+++ b/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1
@@ -1,5 +1,5 @@
-# Portable Audit eXporter (PAX) - Purview Audit Log Processor
-# Version: v1.10.5
+# Portable Audit eXporter (PAX) - Purview Audit Log Processor
+# Version: v1.10.6
# Default Activity Type: CopilotInteraction (captures ALL M365 Copilot usage including all M365 apps and Teams meetings)
# DSPM for AI: Microsoft Purview Data Security Posture Management integration
# MIXED FREE/PAYG Activity Types: AIInteraction (currently Microsoft platforms only), ConnectedAIAppInteraction (Microsoft + third-party)
@@ -633,6 +633,12 @@
Working set (MB) threshold to trigger concurrency reduction.
Range: 256 to 32768. Default: 1500
+.PARAMETER MaxMemoryMB
+ Maximum process memory (MB) before flushing in-memory records to disk.
+ When exceeded, clears $allLogs after confirmed JSONL save to free memory.
+ Not compatible with -ExplodeDeep or -ExplodeArrays (ignored when explosion enabled).
+ Range: -1 to 65536. Default: -1 (auto = 75% of system RAM). Use 0 to disable.
+
.PARAMETER LowLatencyMs
Sustained low latency threshold to consider concurrency step-up.
Range: 100 to 600000. Default: 20000
@@ -1146,6 +1152,9 @@ param(
[ValidateRange(256,32768)]
[int]$MemoryPressureMB = 1500, # Working set (MB) threshold to trigger mild concurrency reduction
[Parameter(Mandatory = $false)]
+ [ValidateRange(-1,65536)]
+ [int]$MaxMemoryMB = -1, # Max process memory (MB) before flushing $allLogs to disk (-1 = auto 75%, 0 = disabled)
+ [Parameter(Mandatory = $false)]
[ValidateRange(100,600000)]
[int]$LowLatencyMs = 20000, # Sustained low latency threshold to consider concurrency step-up
[Parameter(Mandatory = $false)]
@@ -1744,7 +1753,7 @@ $m365UsageActivityBundle = @(
) | Select-Object -Unique
# Script version constant (must appear after param/help to keep param() valid as first executable block)
-$ScriptVersion = '1.10.5'
+$ScriptVersion = '1.10.6'
# --- Initialize/Clear persistent script variables to prevent cross-run contamination ---
# Note: Script-scoped variables persist across multiple script invocations in the same PowerShell session
@@ -2518,6 +2527,9 @@ $script:GraphAuditApiVersion_Previous = 'beta' # Fallback to this version if cu
$script:GraphAuditApiVersion = $null # Runtime-detected version (do not edit)
# ============================================================================
+# Suppress PowerShell's web request progress bar (prevents "Reading web response stream" noise)
+$ProgressPreference = 'SilentlyContinue'
+
# Telemetry tracking for Graph API parallel queries (per-slice lifecycle data)
$script:telemetryData = @()
@@ -3656,6 +3668,7 @@ function Invoke-TokenRefresh {
$script:Auth401MessageShown = $false # Reset for next auth failure cycle
Write-LogHost " [TOKEN-REFRESH] Successfully obtained fresh access token" -ForegroundColor Green
+ Write-LogHost " [TOKEN-REFRESH] Token acquired at $(Get-Date -Format 'HH:mm:ss') - proactive refresh at 30-minute age" -ForegroundColor DarkGray
}
else {
$result.Message = "Reconnected but could not extract access token"
@@ -3718,7 +3731,20 @@ function Refresh-GraphTokenIfNeeded {
$expiresOn = $script:SharedAuthState.ExpiresOn
$minutesRemaining = ($expiresOn - $now).TotalMinutes
- if ($minutesRemaining -gt $BufferMinutes) {
+ # PROACTIVE REFRESH FOR APPREG: Refresh at 30-minute token age (not just near expiry)
+ # AppRegistration can refresh silently, so we do this proactively to avoid 401s
+ $needsProactiveRefresh = $false
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ if ($script:AuthConfig.TokenIssueTime) {
+ $tokenAge = (Get-Date) - $script:AuthConfig.TokenIssueTime
+ if ($tokenAge.TotalMinutes -gt 30) {
+ $needsProactiveRefresh = $true
+ Write-LogHost " [TOKEN] Token age: $([Math]::Round($tokenAge.TotalMinutes, 1)) minutes - proactive refresh triggered" -ForegroundColor Yellow
+ }
+ }
+ }
+
+ if ($minutesRemaining -gt $BufferMinutes -and -not $needsProactiveRefresh) {
return $false # Token still valid, no refresh needed
}
@@ -3732,7 +3758,10 @@ function Refresh-GraphTokenIfNeeded {
}
$script:LastProactiveRefreshAttempt = Get-Date
- Write-LogHost " [TOKEN] Token expires in $([Math]::Round($minutesRemaining, 1)) minutes - attempting proactive refresh..." -ForegroundColor Yellow
+ # Log appropriate message based on trigger reason
+ if (-not $needsProactiveRefresh) {
+ Write-LogHost " [TOKEN] Token expires in $([Math]::Round($minutesRemaining, 1)) minutes - attempting proactive refresh..." -ForegroundColor Yellow
+ }
# Try to refresh using Azure.Identity (uses cached MSAL tokens, may prompt if needed)
$tokenInfo = Get-GraphAccessTokenWithExpiry
@@ -3756,16 +3785,27 @@ function Refresh-GraphTokenIfNeeded {
$script:SharedAuthState.ExpiresOn = (Get-Date).ToUniversalTime().AddMinutes(50)
$script:SharedAuthState.LastRefresh = Get-Date
$script:SharedAuthState.RefreshCount++
+ $script:AuthConfig.TokenIssueTime = Get-Date # Reset age timer for proactive refresh
Write-LogHost " [TOKEN] Token refreshed via AppRegistration (refresh #$($script:SharedAuthState.RefreshCount))" -ForegroundColor Green
return $true
}
}
- # SILENT REFRESH FAILED - Immediately invoke interactive re-auth prompt
- # This is the bulletproof path: user can walk away for hours, script waits at prompt
+ # SILENT REFRESH FAILED
+ # For AppRegistration + Force: FATAL exit (true headless operation)
+ # For AppRegistration without Force: Fall back to interactive prompt
+ # For interactive modes: Prompt user for re-authentication
Write-LogHost " [TOKEN] [!] Silent token refresh failed - interactive re-authentication required" -ForegroundColor Red
+ # AppRegistration mode with -Force: Silent refresh failure is fatal (no interactive fallback for headless runs)
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $Force) {
+ Write-LogHost " [TOKEN] FATAL: AppRegistration token refresh failed. Cannot continue headless (-Force mode)." -ForegroundColor Red
+ Write-LogHost " [TOKEN] Check: client secret expiration, certificate validity, or API permissions." -ForegroundColor Yellow
+ return 'Quit'
+ }
+
+ # Interactive modes OR AppRegistration without -Force: prompt user for re-authentication
$refreshResult = Invoke-TokenRefreshPrompt
if ($refreshResult -eq 'Quit') {
# User chose to quit - return special value for callers to handle
@@ -3879,6 +3919,7 @@ function Initialize-CheckpointForNewRun {
# Other settings
resultSize = if ($AllParameters.ResultSize) { $AllParameters.ResultSize } else { 10000 }
maxConcurrency = if ($AllParameters.MaxConcurrency) { $AllParameters.MaxConcurrency } else { 10 }
+ maxMemoryMB = if ($AllParameters.MaxMemoryMB) { $AllParameters.MaxMemoryMB } else { 0 }
useEOM = [bool]$AllParameters.UseEOM
autoCompleteness = [bool]$AllParameters.AutoCompleteness
includeTelemetry = [bool]$AllParameters.IncludeTelemetry
@@ -4526,7 +4567,7 @@ function Merge-IncrementalSaves {
foreach ($file in $incrementalFiles) {
try {
# If filtering by partition indices, check if this file matches
- # Filename format: Part{N}_timestamp_Nrecords.jsonl
+ # Filename format: Part{N}_timestamp_qid-{QueryId}_Nrecords.jsonl (recovery files use qid-recovery)
if ($OnlyPartitionIndices) {
$partMatch = [regex]::Match($file.Name, '^Part(\d+)_')
if ($partMatch.Success) {
@@ -4614,7 +4655,13 @@ function Merge-IncrementalSaves-Streaming {
[int[]]$OnlyPartitionIndices = $null,
[Parameter(Mandatory = $false)]
- [string[]]$Columns = $null
+ [string[]]$Columns = $null,
+
+ [Parameter(Mandatory = $false)]
+ [System.Collections.Generic.HashSet[string]]$ExcludeRecordIds = $null,
+
+ [Parameter(Mandatory = $false)]
+ [ref]$ActivityCounts = $null
)
$incrementalDir = Join-Path $OutputDirectory ".pax_incremental"
@@ -4667,8 +4714,8 @@ function Merge-IncrementalSaves-Streaming {
$startTime = Get-Date
$lastProgressTime = Get-Date
- # Track seen RecordIds for deduplication
- $seenIds = New-Object System.Collections.Generic.HashSet[string]
+ # Track seen RecordIds for deduplication (seed with any in-memory RecordIds already written to CSV)
+ $seenIds = if ($ExcludeRecordIds) { New-Object System.Collections.Generic.HashSet[string] ($ExcludeRecordIds) } else { New-Object System.Collections.Generic.HashSet[string] }
$duplicatesSkipped = 0
foreach ($file in $files) {
@@ -4712,6 +4759,12 @@ function Merge-IncrementalSaves-Streaming {
}
$opValue = if ($parsedAudit -and $parsedAudit.Operation) { $parsedAudit.Operation } else { $record.Operations }
+ # Track per-activity counts for Activity Type Breakdown
+ if ($ActivityCounts -and $opValue) {
+ if (-not $ActivityCounts.Value.ContainsKey($opValue)) { $ActivityCounts.Value[$opValue] = 0 }
+ $ActivityCounts.Value[$opValue]++
+ }
+
# Create normalized record matching expected schema
$normalizedRecord = [pscustomobject]@{
RecordId = if ($record.RecordId) { $record.RecordId } elseif ($record.Identity) { $record.Identity } elseif ($record.Id) { $record.Id } elseif ($parsedAudit -and $parsedAudit.Id) { $parsedAudit.Id } else { $null }
@@ -6916,6 +6969,28 @@ function script:GetArrayFast($parent, [string]$name) {
}
$effectiveExplodeForProgress = ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)
+
+# MEMORY MANAGEMENT: Resolve MaxMemoryMB (-1 = auto 75% of system RAM, 0 = disabled, >0 = explicit limit)
+$script:ResolvedMaxMemoryMB = $MaxMemoryMB
+if ($MaxMemoryMB -eq -1) {
+ # Auto-detect: use 75% of total physical memory
+ try {
+ $totalRAM = [math]::Round((Get-CimInstance -ClassName Win32_ComputerSystem -ErrorAction SilentlyContinue).TotalPhysicalMemory / 1MB, 0)
+ $script:ResolvedMaxMemoryMB = [math]::Round($totalRAM * 0.75, 0)
+ Write-LogHost "Memory management: Auto-detected ${totalRAM}MB total RAM -> limit set to $($script:ResolvedMaxMemoryMB)MB (75%)" -ForegroundColor Cyan
+ } catch {
+ # Fallback if CIM fails (e.g., Linux/macOS)
+ $script:ResolvedMaxMemoryMB = 4096
+ Write-LogHost "Memory management: Could not detect system RAM, defaulting to 4096MB limit" -ForegroundColor Yellow
+ }
+} elseif ($MaxMemoryMB -eq 0) {
+ $script:ResolvedMaxMemoryMB = 0
+ Write-LogHost "Memory management: DISABLED (-MaxMemoryMB 0)" -ForegroundColor DarkGray
+}
+
+# Memory flush mode: enabled when ResolvedMaxMemoryMB > 0 AND explosion is disabled (explosion needs full $allLogs in memory)
+$script:memoryFlushEnabled = ($script:ResolvedMaxMemoryMB -gt 0) -and (-not $ExplodeDeep) -and (-not $ExplodeArrays) -and (-not $ForcedRawInputCsvExplosion)
+$script:memoryFlushed = $false # Track if we've flushed $allLogs during this run (affects export path)
$enableParallelSwitchUsed = $EnableParallel.IsPresent
if ($enableParallelSwitchUsed) { $ParallelMode = 'On' }
@@ -8053,6 +8128,13 @@ Write-LogHost "=============================================" -ForegroundColor C
Write-LogHost ""
if ($ExplodeDeep -and $ExplodeArrays) { Write-LogHost "Note: -ExplodeDeep takes precedence over -ExplodeArrays (arrays will still explode, plus deep flatten)." -ForegroundColor DarkYellow }
if ($ForcedRawInputCsvExplosion -and -not $ExplodeDeep -and -not $ExplodeArrays.IsPresent) { Write-LogHost "RAWInputCSV provided -> forcing Purview array explosion (non-exploded mode disabled)." -ForegroundColor Yellow }
+if ($script:memoryFlushEnabled) {
+ $memSource = if ($MaxMemoryMB -eq -1) { "auto-detected" } else { "user-specified" }
+ Write-LogHost "Memory management: $($script:ResolvedMaxMemoryMB)MB limit ($memSource) - will flush to disk when exceeded" -ForegroundColor Cyan
+ Write-LogHost " Note: Not compatible with explosion modes (-ExplodeDeep/-ExplodeArrays) - those modes require in-memory processing." -ForegroundColor DarkGray
+} elseif ($script:ResolvedMaxMemoryMB -gt 0 -and ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)) {
+ Write-LogHost "Note: Memory limit ($($script:ResolvedMaxMemoryMB)MB) ignored because explosion mode is active" -ForegroundColor DarkYellow
+}
if ($RAWInputCSV) {
# Build snapshot then optionally inject EntraUsersOutput immediately after OutputFile
@@ -8066,6 +8148,7 @@ if ($RAWInputCSV) {
ExplodeArrays = $ForcedRawInputCsvExplosion
ExplodeDeep = $ExplodeDeep.IsPresent
UseEOM = $UseEOM.IsPresent
+ MaxMemoryMB = $(if ($script:ResolvedMaxMemoryMB -eq 0) { 'Off' } else { "$($script:ResolvedMaxMemoryMB)MB" + $(if ($MaxMemoryMB -eq -1) { ' (auto)' } else { '' }) })
MaxPartitions = $MaxPartitions
ResultSize = $ResultSize
PacingMs = $PacingMs
@@ -8154,6 +8237,7 @@ else {
$paramSnapshot['MaxPartitions'] = $MaxPartitions
$paramSnapshot['ResultSize'] = $ResultSize
$paramSnapshot['PacingMs'] = $PacingMs
+ $paramSnapshot['MaxMemoryMB'] = $(if ($script:ResolvedMaxMemoryMB -eq 0) { 'Off' } else { "$($script:ResolvedMaxMemoryMB)MB" + $(if ($MaxMemoryMB -eq -1) { ' (auto)' } else { '' }) })
}
# Common toggles and output options
@@ -9691,6 +9775,11 @@ try {
# Other settings
if ($cp.resultSize) { $ResultSize = $cp.resultSize }
if ($cp.maxConcurrency) { $MaxConcurrency = $cp.maxConcurrency }
+ # MaxMemoryMB: Allow user override on resume (different machine may have different RAM)
+ if (-not $PSBoundParameters.ContainsKey('MaxMemoryMB') -and $null -ne $cp.maxMemoryMB) {
+ $MaxMemoryMB = $cp.maxMemoryMB
+ Write-LogHost " Restored MaxMemoryMB from checkpoint: $MaxMemoryMB" -ForegroundColor DarkGray
+ }
if ($cp.useEOM) { $UseEOM = [switch]$true }
if ($cp.autoCompleteness) { $AutoCompleteness = [switch]$true }
if ($cp.includeTelemetry) { $IncludeTelemetry = [switch]$true }
@@ -9789,6 +9878,25 @@ $(if (-not $logFileExisted) { "=== Portable Audit eXporter (PAX) - Purview Audit
Write-LogHost "Resume mode initialized. Will continue from last checkpoint." -ForegroundColor Cyan
Write-LogHost ""
+
+ # Re-run MaxMemoryMB resolution after checkpoint restore (may have restored -1 for auto-detect)
+ $script:ResolvedMaxMemoryMB = $MaxMemoryMB
+ if ($MaxMemoryMB -eq -1) {
+ try {
+ $totalRAM = [math]::Round((Get-CimInstance -ClassName Win32_ComputerSystem -ErrorAction SilentlyContinue).TotalPhysicalMemory / 1MB, 0)
+ $script:ResolvedMaxMemoryMB = [math]::Round($totalRAM * 0.75, 0)
+ Write-LogHost "Memory management (resume): Auto-detected ${totalRAM}MB total RAM -> limit $($script:ResolvedMaxMemoryMB)MB (75%)" -ForegroundColor Cyan
+ } catch {
+ $script:ResolvedMaxMemoryMB = 4096
+ Write-LogHost "Memory management (resume): Could not detect system RAM, defaulting to 4096MB limit" -ForegroundColor Yellow
+ }
+ } elseif ($MaxMemoryMB -eq 0) {
+ Write-LogHost "Memory management (resume): DISABLED (-MaxMemoryMB 0)" -ForegroundColor DarkGray
+ } else {
+ Write-LogHost "Memory management (resume): Using $($script:ResolvedMaxMemoryMB)MB limit" -ForegroundColor DarkGray
+ }
+ # Re-evaluate memoryFlushEnabled with restored/resolved value
+ $script:memoryFlushEnabled = ($script:ResolvedMaxMemoryMB -gt 0) -and (-not $ExplodeDeep) -and (-not $ExplodeArrays) -and (-not $ForcedRawInputCsvExplosion)
}
# Authentication and Entra data collection (live mode only)
@@ -10633,19 +10741,20 @@ Write-LogHost "" # Output mode display with format-specific defaults
try {
# For AppRegistration auth ONLY: proactively refresh if token is approaching expiration
# AppRegistration can refresh automatically without user interaction
- # Token lifetime is typically 60-90 minutes; refresh proactively at 45 minutes
+ # Token lifetime is typically 60-90 minutes; refresh proactively at 30 minutes for safety buffer
if ($script:AuthConfig.CanReauthenticate -and $script:AuthConfig.Method -eq 'AppRegistration') {
$tokenAge = $null
if ($script:AuthConfig.TokenIssueTime) {
$tokenAge = (Get-Date) - $script:AuthConfig.TokenIssueTime
}
- # Refresh if token is older than 45 minutes (proactive before ~60 min expiry)
- if ($tokenAge -and $tokenAge.TotalMinutes -gt 45) {
+ # Refresh if token is older than 30 minutes (proactive, well before ~60 min expiry)
+ if ($tokenAge -and $tokenAge.TotalMinutes -gt 30) {
Write-LogHost " [TOKEN] Token age: $([Math]::Round($tokenAge.TotalMinutes, 1)) minutes - proactively refreshing..." -ForegroundColor Yellow
$refreshResult = Invoke-TokenRefresh -Force
if ($refreshResult.Success -and $refreshResult.NewToken) {
$accessToken = $refreshResult.NewToken
+ $script:AuthConfig.TokenIssueTime = Get-Date # Reset age timer
Write-LogHost " [TOKEN] Fresh token obtained for partition launch" -ForegroundColor Cyan
}
else {
@@ -10720,6 +10829,9 @@ Write-LogHost "" # Output mode display with format-specific defaults
# Define the ThreadJob scriptblock once for reuse in both initial and retry attempts
$queryJobScriptBlock = {
param($pStart, $pEnd, [array]$activity, $resultSize, $userIds, $idx, $tot, $sharedAuthState, $partition, $maxOutageMinutes, $apiVersion, $logPath, $existingQueryId)
+ # Suppress web request progress bar in job runspace
+ $ProgressPreference = 'SilentlyContinue'
+
# Helper function to build audit API URIs with correct version
function Get-AuditUri { param($path) return "https://graph.microsoft.com/$apiVersion/security/auditLog/$path" }
@@ -10898,7 +11010,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
while (-not $createSuccess) {
try {
$queryUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries"
- $createResponse = Invoke-RestMethod -Method POST -Uri $queryUri -Headers $headers -Body $queryBodyJson -ErrorAction Stop
+ $createResponse = Invoke-RestMethod -Method POST -Uri $queryUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -Body $queryBodyJson -ErrorAction Stop
$queryId = $createResponse.id
$telemetry.QueryCreatedAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
@@ -11125,7 +11237,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
else {
# Network outage exceeded tolerance
$outageMinutes = [Math]::Round($elapsedOutageSeconds / 60, 1)
- Write-Output "[ERROR] Partition $idx/$tot - Query creation failed: Network outage exceeded $maxOutageMinutes minute tolerance (${outageMinutes}m elapsed)"
+ Write-Output "[CREATE-FAILED] Partition $idx/$tot - Network outage exceeded $maxOutageMinutes minute tolerance (${outageMinutes}m elapsed) - will retry at end of run"
throw "Network outage exceeded $maxOutageMinutes minute tolerance during query creation"
}
}
@@ -11155,8 +11267,8 @@ Write-LogHost "" # Output mode display with format-specific defaults
# No automatic filter fallback allowed – capture diagnostics only
$errorDetails = "StatusCode: $(if ([string]::IsNullOrEmpty($statusCode)) { $_.Exception.Response.StatusCode } else { $statusCode }), Message: $($_.Exception.Message)"
- Write-Host "[ERROR] Partition $idx/$tot - Query creation FAILED: $errorDetails" -ForegroundColor Red
- Write-Output "[ERROR] Partition $idx/$tot - Query creation failed (non-retriable): $errorDetails"
+ Write-Host "[CREATE-FAILED] Partition $idx/$tot - Query creation failed: $errorDetails" -ForegroundColor Red
+ Write-Output "[ERROR] Partition $idx/$tot - Query creation failed (will retry at end of run): $errorDetails"
if ($bodyText) {
Write-Output "[GRAPH-ERROR] Partition $idx/$tot - Response body: $bodyText"
}
@@ -11230,7 +11342,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
try {
$deleteUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries/$queryId"
Invoke-RestMethod -Method DELETE -Uri $deleteUri `
- -Headers $headers -ErrorAction SilentlyContinue | Out-Null
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
} catch {
# Silently continue - cleanup failure shouldn't block error reporting
}
@@ -11243,7 +11355,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
try {
$statusUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries/$queryId"
$statusCheckResponse = Invoke-RestMethod -Method GET -Uri $statusUri `
- -Headers $headers -ErrorAction Stop
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
} catch {
# If we can't check status, continue with normal flow
}
@@ -11252,7 +11364,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
# DELETE query and return SplitRequired signal
try {
Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
- -Headers $headers -ErrorAction SilentlyContinue | Out-Null
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
} catch {
# Silently continue
}
@@ -11273,7 +11385,7 @@ Write-LogHost "" # Output mode display with format-specific defaults
# Poll query status with 429 throttling detection
try {
$statusResponse = Invoke-RestMethod -Method GET -Uri (Get-AuditUri -path "queries/$queryId") `
- -Headers $headers -ErrorAction Stop
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
# Reset outage tracking on success
if ($netOutageStart) {
$duration = (Get-Date) - $netOutageStart
@@ -11455,7 +11567,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
# RECORD COUNT PREVIEW: Get exact count before fetching data (enables preemptive subdivision)
try {
$countUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries?`$count=true&`$filter=queryId eq '$queryId'"
- $countResponse = Invoke-RestMethod -Method GET -Uri $countUri -Headers $headers -TimeoutSec 10 -ErrorAction Stop
+ $countResponse = Invoke-RestMethod -Method GET -Uri $countUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -TimeoutSec 10 -ErrorAction Stop
$previewCount = $countResponse.'@odata.count'
Write-Output "[COUNT] Query $queryId succeeded - Actual record count: $previewCount"
$telemetry.PreviewRecordCount = $previewCount
@@ -11501,11 +11613,25 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
'failed' {
$telemetry.Status = 'failed'
Write-Host "✗ Query failed - Partition $idx/$tot - Query ID: $queryId" -ForegroundColor Red
+ # Delete failed query from Purview to free server slot
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {}
+ }
throw "Query failed"
}
'cancelled' {
$telemetry.Status = 'cancelled'
Write-Host "✗ Query cancelled - Partition $idx/$tot - Query ID: $queryId" -ForegroundColor Red
+ # Delete cancelled query from Purview to free server slot
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {}
+ }
throw "Query cancelled"
}
'queued' {
@@ -11543,6 +11669,16 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
}
}
} if (-not $queryComplete) {
+ # Clean up orphaned query from Purview before failing
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ Write-Output "[CLEANUP] Partition $idx/$tot - Deleted query $queryId from Purview (poll exhausted)"
+ } catch {
+ # Silently continue - cleanup failure shouldn't block retry
+ }
+ }
throw "Query timed out"
}
@@ -11551,6 +11687,8 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$fetchNetworkErrorStart = $null
$unexpectedProcessingError = $false
$unexpectedProcessingMessage = $null
+ $fetchErrorRetryCount = 0
+ $maxFetchErrorRetries = 3 # Retry unexpected errors 3 times before giving up
# CRITICAL: When resultSize=0, fetch unlimited records (don't check count)
# When resultSize>0, stop when we reach the limit (EOM mode behavior)
@@ -11562,7 +11700,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
while ($fetchRetries -lt $maxFetchRetries -and -not $fetchSuccess) {
try {
- $recordsResponse = Invoke-RestMethod -Method GET -Uri $recordsUri -Headers $headers -ErrorAction Stop
+ $recordsResponse = Invoke-RestMethod -Method GET -Uri $recordsUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
$fetchSuccess = $true
# Reset network error tracking on success
$fetchNetworkErrorStart = $null
@@ -11832,24 +11970,40 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
# Examples: JSON parsing failures, unexpected response format, etc.
$unexpectedError = $errorMsg
$unexpectedStack = $_.ScriptStackTrace
+
+ # Increment retry counter and check if retries remain
+ $fetchErrorRetryCount++
+
+ if ($fetchErrorRetryCount -lt $maxFetchErrorRetries) {
+ # Retries remain - log and retry the same page
+ Write-Output "[FETCH-RETRY] Partition $idx/$tot - Unexpected error ($fetchErrorRetryCount/$maxFetchErrorRetries) - Retrying in 30s: $unexpectedError"
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [FETCH-RETRY] Partition $idx/$tot (Query $queryId) - Retry $fetchErrorRetryCount/$maxFetchErrorRetries for: $unexpectedError"
+ $logMsg | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+ Start-Sleep -Seconds 30
+ continue # Retry pagination loop with same $recordsUri
+ }
+
+ # All retries exhausted - fail the partition
$unexpectedProcessingError = $true
$unexpectedProcessingMessage = $unexpectedError
# Output error message (will be deduplicated by parent)
- Write-Output "[ERROR] Partition $idx/$tot - Unexpected error during record processing (will retry if attempts remain)"
+ Write-Output "[ERROR] Partition $idx/$tot - Unexpected error during record processing after $fetchErrorRetryCount retries - will retry at end of run"
# Full error details to ERROR stream (will be captured by main thread)
Write-Error "[ERROR] Partition $idx/$tot (Query $queryId) - Unexpected record processing error: $unexpectedError`n Stack: $unexpectedStack" -ErrorAction Continue
# Thread-safe file logging
try {
- $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [ERROR] Partition $idx/$tot (Query $queryId) - Unexpected record processing error: $unexpectedError"
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [ERROR] Partition $idx/$tot (Query $queryId) - Unexpected record processing error after $fetchErrorRetryCount retries: $unexpectedError"
$logMsg | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
" Stack trace: $unexpectedStack" | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
} catch {
# Ignore logging errors in job
}
- # Break pagination loop to avoid infinite errors
+ # Break pagination loop - retries exhausted
break
}
$t1 = Get-Date
@@ -11923,6 +12077,16 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if ($unexpectedProcessingError) {
$telemetry.Status = 'failed'
+ # Clean up Purview query to free server slot after all fetch retries exhausted
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ Write-Output "[CLEANUP] Partition $idx/$tot - Deleted query $queryId after $fetchErrorRetryCount failed fetch retries"
+ } catch {
+ # Silently continue - cleanup failure shouldn't block retry
+ }
+ }
throw [System.Exception]::new("Unexpected record processing error: $unexpectedProcessingMessage")
}
@@ -11933,7 +12097,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if ($queryId) {
try {
Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
- -Headers $headers -ErrorAction SilentlyContinue | Out-Null
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
} catch {
# Silently continue - cleanup failure shouldn't block results
}
@@ -12019,17 +12183,44 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
# ============================================================
# CHECKPOINT: Reactive token refresh for auth failures (401s)
# This ALWAYS runs when AuthFailureDetected is true (not gated by CheckpointEnabled)
- # User can wait at the R/Q prompt indefinitely without script failure
+ # AppRegistration: automatic silent refresh (headless)
+ # Interactive modes: user can wait at R/Q prompt indefinitely
# ============================================================
if (Test-ShouldPromptTokenRefresh) {
- $refreshResult = Invoke-TokenRefreshPrompt
- if ($refreshResult -eq 'Quit') {
- # User chose to quit - save checkpoint if enabled and exit
- if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
- Show-CheckpointExitMessage
- exit 0
+ # AppRegistration: Try automatic silent refresh first
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false
+ Write-LogHost " [AUTH] Token refreshed automatically (AppRegistration)" -ForegroundColor Green
+ } elseif ($Force) {
+ # -Force mode: FATAL exit (true headless operation)
+ Write-LogHost " [AUTH] FATAL: AppRegistration token refresh failed (-Force mode)" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ } else {
+ # No -Force: fall back to interactive prompt
+ Write-LogHost " [AUTH] Silent refresh failed - falling back to interactive prompt" -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+ }
+ } else {
+ # Interactive modes: prompt user
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ # User chose to quit - save checkpoint if enabled and exit
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
}
- # If 'Refreshed', we proceed with fresh token
+ # Proceed with fresh token
}
# Backpressure: Wait for a slot if we've reached MaxConcurrency
@@ -12083,21 +12274,61 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
}
# REACTIVE AUTH CHECK: Handle 401 errors during backpressure wait
+ # AppRegistration: automatic silent refresh (headless)
+ # Interactive modes: prompt user for re-authentication
if ($script:AuthFailureDetected) {
Write-LogHost "" -ForegroundColor Red
Write-LogHost " [AUTH] 401 detected during job launch - initiating token refresh..." -ForegroundColor Red
- $refreshResult = Invoke-TokenRefreshPrompt
- if ($refreshResult -eq 'Quit') {
- if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
- Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
- return
- }
- # Update shared auth state for thread jobs
- $tokenInfo = Get-GraphAccessTokenWithExpiry
- if ($tokenInfo) {
- $script:SharedAuthState.Token = $tokenInfo.Token
- $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
- $script:SharedAuthState.LastRefresh = Get-Date
+
+ # AppRegistration: Use automatic silent refresh (no user interaction)
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false
+ # Update shared auth state for thread jobs
+ $script:SharedAuthState.Token = $refreshResult.NewToken
+ $script:SharedAuthState.ExpiresOn = (Get-Date).ToUniversalTime().AddMinutes(50)
+ $script:SharedAuthState.LastRefresh = Get-Date
+ Write-LogHost " [AUTH] Token refreshed automatically (AppRegistration)" -ForegroundColor Green
+ } elseif ($Force) {
+ # -Force mode: FATAL exit (true headless operation)
+ Write-LogHost " [AUTH] FATAL: AppRegistration token refresh failed (-Force mode)" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to authentication failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ } else {
+ # No -Force: fall back to interactive prompt
+ Write-LogHost " [AUTH] Silent refresh failed - falling back to interactive prompt" -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ # Update shared auth state for thread jobs
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo) {
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ }
+ }
+ } else {
+ # Interactive modes: prompt user
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ # Update shared auth state for thread jobs
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo) {
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ }
}
Write-LogHost " [AUTH] Token refreshed - resuming job launch" -ForegroundColor Green
}
@@ -12171,6 +12402,15 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:partitionStatus[$jobPartition.Index].LastError = '401 Unauthorized - token expired'
}
break # IMMEDIATE EXIT - don't process more output
+ } else {
+ # FIX: Non-401 [ERROR] messages (e.g., "Unexpected error during record processing")
+ # must also mark the partition as Failed for retry
+ $jobPartition = $jobMeta[$activeJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Failed'
+ $script:partitionStatus[$jobPartition.Index].LastError = $output
+ Write-LogHost " [RETRY-QUEUE] Partition $($jobPartition.Index)/$($jobPartition.Total) queued for retry at end of run" -ForegroundColor Yellow
+ }
}
}
elseif ($output -match '^\[403-(CREATE|POLL|FETCH)\]') {
@@ -12211,8 +12451,15 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
$script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
- # Add logs to collection
- if ($output.Logs -and $output.Logs.Count -gt 0) {
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($activeJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($jobPartition.Index)/$($jobPartition.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip when memory flush enabled - data goes to JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
$allLogs.AddRange($output.Logs)
}
@@ -12234,9 +12481,19 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_$($output.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
$output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Release source reference to allow GC to reclaim memory
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: When enabled, we skip AddRange and use JSONL-only path
+ # Set memoryFlushed flag to signal streaming export
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (limit: $($script:ResolvedMaxMemoryMB)MB)" -ForegroundColor Yellow
+ }
} catch {
Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
}
@@ -12401,8 +12658,8 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
$script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
- # Add logs to collection
- if ($output.Logs -and $output.Logs.Count -gt 0) {
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
$allLogs.AddRange($output.Logs)
}
@@ -12424,9 +12681,18 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_$($output.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
$output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
} catch {
Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
}
@@ -12524,6 +12790,13 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:shownJobMessages[$msgKey] = $true
}
}
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($existingJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost ($output -replace '^\[SUCCESS\]\s*','') -ForegroundColor Green
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
}
elseif ($output -isnot [string] -and -not $script:processedJobIds.Contains($existingJob.Id)) {
# FULL RESULT PROCESSING: Handle result objects immediately to ensure JSONL save
@@ -12535,8 +12808,15 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
$script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
- # Add logs to collection
- if ($output.Logs -and $output.Logs.Count -gt 0) {
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($existingJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($jobPartition.Index)/$($jobPartition.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
$allLogs.AddRange($output.Logs)
}
@@ -12558,9 +12838,18 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_$($output.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
$output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
} catch {
Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
}
@@ -12686,6 +12975,9 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
elseif ($output -match '^\[STATUS\] Query running') {
$msgKey = "$($job.Id):STATUS"
}
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($job.Id):SUCCESS"
+ }
elseif ($output -match '^\[403-CREATE\]|^\[403-FETCH\]') {
# Use the full output as key to deduplicate identical 403 messages
# This prevents "Attempt 2/3" from repeating but allows different attempts to show
@@ -12765,9 +13057,16 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$script:partitionStatus[$pt.Index].QueryId = $output.QueryId
$script:partitionStatus[$pt.Index].RecordCount = $output.RetrievedCount
- # Add logs to collection immediately using AddRange (non-blocking, O(1) vs O(n))
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($job.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($pt.Index)/$($pt.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
# Receive-Job can only be called once, so we must collect now
- if ($output.Logs -and $output.Logs.Count -gt 0) {
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
$allLogs.AddRange($output.Logs)
}
@@ -12790,10 +13089,19 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_$($output.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
# Write as JSON Lines (NDJSON) - one record per line for recoverability
$output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Partition $($pt.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
} catch {
Write-LogHost " [WARN] Failed to save incremental data for Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Yellow
}
@@ -13172,8 +13480,8 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$dataAddedToCollection = $false
$recordsBeforeAdd = $allLogs.Count
- # Add logs to collection first
- if ($res.Logs -and $res.Logs.Count -gt 0) {
+ # Add logs to collection (skip when memory flush enabled - data goes to JSONL only)
+ if ($res.Logs -and $res.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
foreach ($log in $res.Logs) {
[void]$allLogs.Add($log)
}
@@ -13200,7 +13508,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_$($res.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($res.QueryId)_$($res.RetrievedCount)records.jsonl"
# Write as JSON Lines (NDJSON) - one record per line for recoverability
$res.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Partition $($pt.Index): $($res.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
@@ -13593,7 +13901,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if (-not (Test-Path $incrementalDir)) {
New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
}
- $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_$($res.RetrievedCount)records.jsonl"
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($res.QueryId)_$($res.RetrievedCount)records.jsonl"
# Write as JSON Lines (NDJSON) - one record per line for recoverability
$res.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
Write-LogHost " [SAVE] Retry Partition $($pt.Index): $($res.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
@@ -13839,6 +14147,178 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
}
$script:CurrentServiceFilter = $null
+ # ============================================================================
+ # FINAL SAFETY NET: Ensure ALL partitions were completed before export phase
+ # This catches any partitions that slipped through all retry mechanisms
+ # ============================================================================
+ if ($script:partitionStatus -and $script:partitionStatus.Count -gt 0 -and -not $UseEOM) {
+ # Find partitions not in terminal success states
+ $incompletePartitions = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($incompletePartitions.Count -gt 0) {
+ Write-LogHost "" -ForegroundColor Yellow
+ Write-LogHost "============================================================" -ForegroundColor Yellow
+ Write-LogHost "[FINAL-RECONCILE] $($incompletePartitions.Count) partition(s) incomplete - initiating final recovery" -ForegroundColor Yellow
+ Write-LogHost "============================================================" -ForegroundColor Yellow
+
+ # List each incomplete partition
+ foreach ($incomplete in $incompletePartitions) {
+ $lastErr = if ($incomplete.LastError) { " (Last error: $($incomplete.LastError.Substring(0, [Math]::Min(80, $incomplete.LastError.Length)))...)" } else { "" }
+ Write-LogHost " Partition $($incomplete.Index): Status=$($incomplete.Status)$lastErr" -ForegroundColor DarkYellow
+ }
+
+ $maxFinalAttempts = 5
+ $finalAttempt = 0
+ $recoveredCount = 0
+
+ while ($finalAttempt -lt $maxFinalAttempts) {
+ # Re-check which partitions still need recovery
+ $stillIncomplete = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($stillIncomplete.Count -eq 0) {
+ Write-LogHost "[FINAL-RECONCILE] All partitions recovered successfully!" -ForegroundColor Green
+ break
+ }
+
+ $finalAttempt++
+ Write-LogHost "[FINAL-RECONCILE] Attempt $finalAttempt/$maxFinalAttempts - $($stillIncomplete.Count) partition(s) remaining" -ForegroundColor Yellow
+
+ # Refresh token before retry (critical for long-running exports)
+ $refreshResult = Refresh-GraphTokenIfNeeded -BufferMinutes 5
+ if ($refreshResult -is [string] -and $refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # Get fresh token for recovery jobs
+ $recoveryToken = Get-GraphAccessToken
+ if (-not $recoveryToken) {
+ Write-LogHost "[FINAL-RECONCILE] Cannot obtain access token - saving checkpoint and exiting" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # Brief cooldown between attempts
+ if ($finalAttempt -gt 1) {
+ $cooldown = Get-Random -Minimum 15 -Maximum 30
+ Write-LogHost " Waiting $cooldown seconds before retry..." -ForegroundColor Gray
+ Start-Sleep -Seconds $cooldown
+ }
+
+ # Process each incomplete partition sequentially (safer for final recovery)
+ foreach ($incomplete in $stillIncomplete) {
+ $pt = $incomplete.Partition
+ if (-not $pt) {
+ Write-LogHost " [WARN] Partition $($incomplete.Index) has no partition object - cannot recover" -ForegroundColor Red
+ continue
+ }
+
+ Write-LogHost " [RECOVER] Retrying Partition $($pt.Index)/$($pt.Total)..." -ForegroundColor Cyan
+
+ try {
+ # Reset status for retry
+ $script:partitionStatus[$pt.Index].Status = 'NotStarted'
+ $script:partitionStatus[$pt.Index].AttemptNumber++
+
+ # Use sequential processing for recovery (Invoke-ActivityTimeWindowProcessing)
+ $logs = Invoke-ActivityTimeWindowProcessing -ActivityType $pt.Activity -StartDate $pt.PStart -EndDate $pt.PEnd -PartitionIndex $pt.Index -TotalPartitions $pt.Total -UseEOMMode $false
+
+ if ($logs) {
+ $logArray = if ($logs -is [Array]) { $logs } else { @($logs) }
+
+ # Add to $allLogs
+ foreach ($item in $logArray) { [void]$allLogs.Add($item) }
+
+ # Save to JSONL for streaming export
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) { New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null }
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-recovery_$($logArray.Count)records.jsonl"
+ $logArray | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+
+ # Mark complete
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].RecordCount = $logArray.Count
+ $recoveredCount++
+
+ Write-LogHost " [RECOVERED] Partition $($pt.Index): $($logArray.Count) records" -ForegroundColor Green
+
+ # Update metrics
+ $script:metrics.TotalRecordsFetched += $logArray.Count
+ } else {
+ # Zero records is valid - mark complete
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].RecordCount = 0
+ $recoveredCount++
+ Write-LogHost " [RECOVERED] Partition $($pt.Index): 0 records (empty time window)" -ForegroundColor Green
+ }
+
+ # Save checkpoint after each successful recovery
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $pt.Index -State 'Completed' -RecordCount ($logArray.Count)
+ }
+ } catch {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $_.Exception.Message
+ Write-LogHost " [FAILED] Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Red
+ }
+ }
+ }
+
+ # Final status report
+ $finalIncomplete = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($finalIncomplete.Count -gt 0) {
+ Write-LogHost "" -ForegroundColor Red
+ Write-LogHost "============================================================" -ForegroundColor Red
+ Write-LogHost "[FINAL-RECONCILE] WARNING: $($finalIncomplete.Count) partition(s) could not be recovered after $maxFinalAttempts attempts" -ForegroundColor Red
+ Write-LogHost "============================================================" -ForegroundColor Red
+ foreach ($failed in $finalIncomplete) {
+ Write-LogHost " Partition $($failed.Index): $($failed.LastError)" -ForegroundColor Red
+ }
+ Write-LogHost " These partitions will be missing from the export. Use -Resume to retry later." -ForegroundColor Yellow
+ } else {
+ Write-LogHost "[FINAL-RECONCILE] Complete: Recovered $recoveredCount partition(s) via final safety net" -ForegroundColor Green
+ }
+ Write-LogHost "" -ForegroundColor White
+ }
+ }
+
+ # MEMORY FLUSH MODE: If we flushed $allLogs during fetch, enable streaming merge from JSONL files
+ # This path is only for non-explosion mode (explosion is excluded from memory flush feature)
+ if ($script:memoryFlushed) {
+ Write-LogHost " [MEMORY] Memory flush occurred during fetch - enabling streaming export from JSONL files" -ForegroundColor Yellow
+ $script:UseStreamingMergeForExport = $true
+ $script:StreamingMergeDirectory = Split-Path $script:PartialOutputPath -Parent
+
+ # Get all completed partition indices from this run for streaming merge
+ # Note: partitionStatus values are hashtables with a 'Partition' key containing the partition object;
+ # the Index property lives on the partition object, not on the status hashtable itself
+ $completedPartitions = @($script:partitionStatus.Values | Where-Object { $_.Status -eq 'Complete' } | ForEach-Object { $_.Partition.Index })
+ $script:StreamingMergePartitions = $completedPartitions
+
+ # Count records from JSONL files for metrics
+ $incrementalDir = Join-Path $script:StreamingMergeDirectory ".pax_incremental"
+ $estimatedFromJSONL = 0
+ if (Test-Path $incrementalDir) {
+ $jsonlFiles = Get-ChildItem -Path $incrementalDir -Filter "*${global:ScriptRunTimestamp}*.jsonl" -ErrorAction SilentlyContinue
+ foreach ($f in $jsonlFiles) {
+ if ($f.Name -match '_(\d+)records\.jsonl$') {
+ $estimatedFromJSONL += [int]$Matches[1]
+ }
+ }
+ }
+ $script:StreamingMergeRecordCount = $estimatedFromJSONL
+ Write-LogHost " [MEMORY] Found $($estimatedFromJSONL.ToString('N0')) records across $($completedPartitions.Count) partitions for streaming export" -ForegroundColor DarkCyan
+ }
+
# MERGE INCREMENTAL SAVES: Only needed for Resume mode to recover data from skipped partitions
# Fresh runs already have all records in $allLogs (added when each partition completed)
# In resume mode, only merge data for partitions that were SKIPPED (already completed before this run)
@@ -13937,7 +14417,10 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
# Cleanup happens at end of script (before exit) to allow recovery if explosion/export fails
} else {
$mergedFromIncremental = 0
- $script:UseStreamingMergeForExport = $false
+ # Preserve UseStreamingMergeForExport if memory flush already set it (fresh run with JSONL-only data)
+ if (-not $script:memoryFlushed) {
+ $script:UseStreamingMergeForExport = $false
+ }
# Note: Incremental files are retained until successful script completion for data safety
}
@@ -13947,6 +14430,9 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
Write-LogHost "Records retrieved this run: $($allLogs.Count)" -ForegroundColor Cyan
Write-LogHost "Records from prior run (streaming): $($mergedFromIncremental.ToString('N0'))" -ForegroundColor Cyan
Write-LogHost "Total records for export: $(($allLogs.Count + $mergedFromIncremental).ToString('N0'))" -ForegroundColor Green
+ } elseif ($script:UseStreamingMergeForExport -and $script:StreamingMergeRecordCount -gt 0) {
+ # Fresh run with memory flush - records are in JSONL, not $allLogs
+ Write-LogHost "Total audit records retrieved: $($script:StreamingMergeRecordCount.ToString('N0')) (streaming from JSONL)" -ForegroundColor Cyan
} else {
Write-LogHost "Total audit records retrieved: $($allLogs.Count)" -ForegroundColor Cyan
}
@@ -14004,7 +14490,15 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
}
}
- Write-LogHost "Unique audit records: $($allLogs.Count)" -ForegroundColor Cyan
+ # Show accurate record count — in streaming mode allLogs may be empty because records went JSONL→CSV directly
+ if ($script:UseStreamingMergeForExport) {
+ # StreamingMergeRecordCount = memory flush fresh run; mergedFromIncremental = deferred resume merge
+ $streamCount = if ($script:StreamingMergeRecordCount -gt 0) { $script:StreamingMergeRecordCount } else { $mergedFromIncremental }
+ $effectiveTotal = $allLogs.Count + $streamCount
+ Write-LogHost "Unique audit records: $($effectiveTotal.ToString('N0')) (streaming records deduplicated during export)" -ForegroundColor Cyan
+ } else {
+ Write-LogHost "Unique audit records: $($allLogs.Count)" -ForegroundColor Cyan
+ }
if ($script:Hit10KLimit -or $script:Hit1MLimit) {
Write-LogHost "";
$limitType = if ($script:Hit1MLimit) { "1M (Graph API)" } else { "10K (EOM)" }
@@ -14059,6 +14553,12 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
Write-LogHost "Emitting header-only CSV (0 rows) for deterministic downstream processing..." -ForegroundColor Cyan
$headerColumns = if ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion) { if ($IncludeM365Usage -and $RAWInputCSV) { Get-M365UsageWideHeader -RawCsvPath $RAWInputCSV -BaseHeader $M365UsageBaseHeader } else { $PurviewExplodedHeader } } else { @('RecordType', 'CreationDate', 'UserIds', 'Operations', 'ResultStatus', 'ResultCount', 'Identity', 'IsValid', 'ObjectState', 'Id', 'CreationTime', 'Operation', 'OrganizationId', 'RecordTypeNum', 'ResultStatus_Audit', 'UserKey', 'UserType', 'Version', 'Workload', 'UserId', 'AppId', 'ClientAppId', 'CorrelationId', 'ModelId', 'ModelProvider', 'ModelFamily', 'TokensTotal', 'TokensInput', 'TokensOutput', 'DurationMs', 'OutcomeStatus', 'ConversationId', 'TurnNumber', 'RetryCount', 'ClientVersion', 'ClientPlatform', 'AgentId', 'AgentName', 'AgentVersion', 'AgentCategory', 'AppIdentity', 'ApplicationName', 'AuditData', 'CopilotEventData') }
try { $outputDirEmpty = Split-Path $OutputFile -Parent; if (-not (Test-Path $outputDirEmpty)) { New-Item -ItemType Directory -Path $outputDirEmpty -Force | Out-Null }; $enc = New-Object System.Text.UTF8Encoding($false); $sw = [System.IO.StreamWriter]::new($OutputFile, $false, $enc); $escapedCols = @(); foreach ($col in $headerColumns) { $c = [string]$col; $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' '); $escaped = $c -replace '"', '""'; if ($needsQuote) { $escaped = '"' + $escaped + '"' }; $escapedCols += , $escaped }; $sw.WriteLine(($escapedCols -join ',')); $sw.Flush(); $sw.Dispose() } catch { Write-LogHost "Failed to write header-only CSV: $($_.Exception.Message)" -ForegroundColor Red }
+ # Finalize checkpoint: rename _PARTIAL files and delete checkpoint (same pattern as normal completion)
+ if ($script:CheckpointEnabled -and $script:PartialOutputPath -and (Test-Path $script:PartialOutputPath)) {
+ Complete-CheckpointRun -FinalOutputPath $script:FinalOutputPath
+ $OutputFile = $script:FinalOutputPath
+ $LogFile = $script:LogFile
+ }
$script:metrics.TotalStructuredRows = 0; $script:metrics.EffectiveChunkSize = 0; Set-ProgressPhase -Phase 'Complete' -Status 'No data'; Complete-Progress; Write-LogHost "Header-only CSV created at: $OutputFile" -ForegroundColor Green; $script:ScriptCompleted = $true; return
}
@@ -14111,8 +14611,16 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$ExplosionThreads # User-specified value (already validated 2-32)
}
+ # Compute effective record count accounting for streaming merge (where $allLogs is empty, records are in JSONL)
+ $effectiveRecordCount = if ($script:UseStreamingMergeForExport) {
+ $sc = if ($script:StreamingMergeRecordCount -gt 0) { $script:StreamingMergeRecordCount } elseif ($mergedFromIncremental -gt 0) { $mergedFromIncremental } else { 0 }
+ $allLogs.Count + $sc
+ } else {
+ $allLogs.Count
+ }
+
# Enable parallel only if PS7+, more than 500 records, AND threads > 1
- $useParallelExplosion = $script:IsPS7 -and ($allLogs.Count -gt 500) -and ($actualExplosionThreads -gt 1)
+ $useParallelExplosion = $script:IsPS7 -and ($effectiveRecordCount -gt 500) -and ($actualExplosionThreads -gt 1)
$parallelBatchSize = 1000 # Records per parallel batch
$parallelThrottleLimit = $actualExplosionThreads
@@ -14123,8 +14631,8 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
Write-LogHost "Parallel processing: DISABLED (requires PowerShell 7+)" -ForegroundColor Gray
} elseif ($ExplosionThreads -eq 1) {
Write-LogHost "Parallel processing: DISABLED (forced serial via -ExplosionThreads 1)" -ForegroundColor Gray
- } elseif ($allLogs.Count -le 500) {
- Write-LogHost "Parallel processing: DISABLED (only $($allLogs.Count) records - threshold is 500)" -ForegroundColor Gray
+ } elseif ($effectiveRecordCount -le 500) {
+ Write-LogHost "Parallel processing: DISABLED (only $effectiveRecordCount records - threshold is 500)" -ForegroundColor Gray
}
}
@@ -14156,18 +14664,26 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
# First, write any in-memory records from THIS run's partitions (if any)
$inMemoryCount = $allLogs.Count
+ $streamingActivityCounts = @{}
if ($inMemoryCount -gt 0) {
Write-LogHost " Writing $($inMemoryCount.ToString('N0')) in-memory records from current run..." -ForegroundColor DarkCyan
Open-CsvWriter -Path $exportTemp -Columns $fastPathColumns
$csvWriter = $true
$batch = New-Object System.Collections.Generic.List[object]
+ $inMemoryRecordIds = New-Object System.Collections.Generic.HashSet[string]
$batchSize = 5000
foreach ($log in $allLogs) {
$auditData = $log.AuditData
$parsedAudit = if ($log.PSObject.Properties['_ParsedAuditData']) { $log._ParsedAuditData } else { try { $auditData | ConvertFrom-Json -ErrorAction SilentlyContinue } catch { $null } }
$opValue = if ($parsedAudit -and $parsedAudit.Operation) { $parsedAudit.Operation } else { $log.Operations }
+ # Track per-activity counts for Activity Type Breakdown
+ if ($opValue) {
+ if (-not $streamingActivityCounts.ContainsKey($opValue)) { $streamingActivityCounts[$opValue] = 0 }
+ $streamingActivityCounts[$opValue]++
+ }
+
$fastRecord = [pscustomobject]@{
RecordId = if ($log.RecordId) { $log.RecordId } elseif ($log.Identity) { $log.Identity } elseif ($log.Id) { $log.Id } elseif ($parsedAudit -and $parsedAudit.Id) { $parsedAudit.Id } else { $null }
CreationDate = if ($log.CreationDate) { $log.CreationDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { '' }
@@ -14178,6 +14694,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
AssociatedAdminUnitsNames = $null
}
$batch.Add($fastRecord)
+ if ($fastRecord.RecordId) { [void]$inMemoryRecordIds.Add($fastRecord.RecordId) }
if ($batch.Count -ge $batchSize) {
Write-CsvRows -Rows $batch -Columns $fastPathColumns
@@ -14204,7 +14721,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
if ($inMemoryCount -gt 0) {
# Streaming merge needs to append to existing file - use a second temp file then combine
$streamingTemp = Join-Path ([System.IO.Path]::GetTempPath()) ("pax_streaming_" + [guid]::NewGuid().ToString() + ".tmp")
- $streamedCount = Merge-IncrementalSaves-Streaming -OutputFile $streamingTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns
+ $streamedCount = Merge-IncrementalSaves-Streaming -OutputFile $streamingTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns -ExcludeRecordIds $inMemoryRecordIds -ActivityCounts ([ref]$streamingActivityCounts)
# Append streaming temp to main temp (skip header line from streaming file)
if ((Test-Path $streamingTemp) -and $streamedCount -gt 0) {
@@ -14215,7 +14732,7 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$totalStreamedRecords = $inMemoryCount + $streamedCount
} else {
# No in-memory records - stream directly to final temp file
- $totalStreamedRecords = Merge-IncrementalSaves-Streaming -OutputFile $exportTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns
+ $totalStreamedRecords = Merge-IncrementalSaves-Streaming -OutputFile $exportTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns -ActivityCounts ([ref]$streamingActivityCounts)
}
# Move temp file to final output
@@ -14234,9 +14751,10 @@ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exc
$columnOrder = $fastPathColumns
$schemaFrozen = $true
- # In streaming merge (non-explosion), Structured equals Retrieved (1:1 mapping)
- foreach ($opKey in $script:metrics.Activities.Keys) {
- $script:metrics.Activities[$opKey].Structured = $script:metrics.Activities[$opKey].Retrieved
+ # Populate per-activity metrics from actual streaming counts (inline handlers don't track these reliably)
+ $script:metrics.Activities = @{}
+ foreach ($opKey in $streamingActivityCounts.Keys) {
+ $script:metrics.Activities[$opKey] = @{ Retrieved = $streamingActivityCounts[$opKey]; Structured = $streamingActivityCounts[$opKey] }
}
# Store original count for ratio comparisons (allLogs was cleared for RAM)
@@ -15008,7 +15526,8 @@ function Profile-AuditData { param([object]$AuditData) } # No-op stub for thread
# Final explosion progress update (100% completion) - SERIAL MODE ONLY
# Parallel mode has its own completion summary, skip this to avoid duplicate/confusing output
- if (-not $useParallelExplosion) {
+ # Also skip when fast path or streaming merge already handled export (skipToPostProcessing=true)
+ if (-not $useParallelExplosion -and -not $skipToPostProcessing) {
if ($allLogs.Count -ge $explosionProgressInterval) {
$elapsed = (Get-Date) - $te0
$rate = [int]($processedRecordCount / $elapsed.TotalSeconds)
diff --git a/README.md b/README.md
index 3057e84..2baa7df 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
# Portable Audit eXporter (PAX) Solution Set
-
**Portable Audit eXporter (PAX)** exports Copilot and AI usage data from Purview and Graph API audit logs via Graph API or EOM methods. All solutions export to CSV or Excel formats, ready for analysis in Power BI or your preferred data analysis tool.
@@ -67,7 +66,7 @@ This is an experimental script. On occasion, you may notice small deviations fro
---
-> **🔍 Purview Audit Log Processor:** Download the script → [`PAX_Purview_Audit_Log_Processor_v1.10.5.ps1`](https://github.com/microsoft/PAX/releases/download/purview-v1.10.5/PAX_Purview_Audit_Log_Processor_v1.10.5.ps1)
+> **🔍 Purview Audit Log Processor:** Download the script → [`PAX_Purview_Audit_Log_Processor_v1.10.6.ps1`](https://github.com/microsoft/PAX/releases/download/purview-v1.10.6/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1)
>
> **📖 Resources:** [Latest Documentation](https://github.com/microsoft/PAX/blob/release/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md) | [Latest Release Notes](https://github.com/microsoft/PAX/blob/release/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md)
>
diff --git a/release_documentation/.gitkeep b/release_documentation/.gitkeep
index bf88538..8e7057d 100644
--- a/release_documentation/.gitkeep
+++ b/release_documentation/.gitkeep
@@ -1 +1 @@
-# Last updated: 2026-01-30 (PAX v1.0.17, Graph v1.0.1, Purview v1.10.5, CopilotInteractions v1.2.0)
\ No newline at end of file
+# Last updated: 2026-02-10 (PAX v1.0.18, Graph v1.0.1, Purview v1.10.6, CopilotInteractions v1.2.0)
\ No newline at end of file
diff --git a/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md b/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md
index c43589f..ee6270e 100644
--- a/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md
+++ b/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md
@@ -1,6 +1,6 @@
# Portable Audit eXporter (PAX) -
Purview Audit Log Processor
-> **📥 Quick Start:** Download the script → [`PAX_Purview_Audit_Log_Processor_v1.10.5.ps1`](https://github.com/microsoft/PAX/releases/download/purview-v1.10.5/PAX_Purview_Audit_Log_Processor_v1.10.5.ps1)
+> **📥 Quick Start:** Download the script → [`PAX_Purview_Audit_Log_Processor_v1.10.6.ps1`](https://github.com/microsoft/PAX/releases/download/purview-v1.10.6/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1)
>
> **📋 Release Notes:** See what's new → [v1.10.x Release Notes](https://github.com/microsoft/PAX/blob/release/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md) | [All Release Notes](https://github.com/microsoft/PAX/tree/release/release_notes/Purview_Audit_Log_Processor)
>
@@ -8,7 +8,7 @@
>
> **📚 Documentation Archive:** [v1.10.x Documentation](https://github.com/microsoft/PAX/blob/release/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md) | [All Documentation](https://github.com/microsoft/PAX/tree/release/release_documentation/Purview_Audit_Log_Processor)
-**Script:** `PAX_Purview_Audit_Log_Processor_v1.10.5.ps1`
+**Script:** `PAX_Purview_Audit_Log_Processor_v1.10.6.ps1`
**Documentation Version:** 1.10.x
**Audience:** IT admins, security/compliance analysts, BI/data teams
**Runtime:** PowerShell 5.1 (compatible) / PowerShell 7+ (recommended)
@@ -51,7 +51,7 @@ This is an experimental script. On occasion, you may notice small deviations fro
12. [Combining Filters](#combining-filters)
13. [DSPM for AI](#dspm-for-ai)
14. [Excel Export](#excel-export)
-15. [Incremental Data Collection](#incremental-data-collection-appendfile)
+15. [Incremental Data Collection](#incremental-data-collection)
16. [Checkpoint & Resume](#checkpoint--resume)
17. [Output Files & Schema](#output-files--schema)
18. [Activity Types Reference](#activity-types-reference)
@@ -143,6 +143,7 @@ The **Portable Audit eXporter (PAX)** is an enterprise-grade PowerShell script t
- **Learned Block Sizes:** Per-activity and global adaptive sizing based on observed densities
- **Fast Data Writer:** Direct `StreamWriter` usage for CSV; ImportExcel module for Excel exports
- **Schema Sampling:** Configurable initial sampling to optimize column discovery vs. memory usage
+- **Memory Management:** Automatic memory monitoring (`-MaxMemoryMB`) that streams records directly to JSONL files when system memory reaches the threshold (75% of RAM by default)
@@ -450,7 +451,7 @@ powershell -ExecutionPolicy Bypass -File .\PAX_Purview_Audit_Log_Processor.ps1 -
**Notes:**
-- See [Incremental Data Collection](#incremental-data-collection-appendfile) section for complete documentation
+- See [Incremental Data Collection](#incremental-data-collection) section for complete documentation
- Validates header compatibility before appending
- Works with both live query and offline replay modes
- NOT compatible with `-IncludeUserInfo` or `-OnlyUserInfo`
@@ -1165,6 +1166,33 @@ ExchangeAdmin, ExchangeItem, ExchangeMailbox, SharePointFileOperation, SharePoin
---
+#### `-MaxMemoryMB` (int)
+
+**Purpose:** Memory threshold that controls when PAX switches to JSONL-only streaming mode (records bypass in-memory collection and are written directly to incremental JSONL files). Active by default — PAX automatically monitors memory usage and streams to disk when the threshold is reached.
+**Range:** `-1` to `65536`
+**Default:** `-1` (auto = 75% of system RAM)
+**Adjust When:**
+
+- Running on memory-constrained machines where 75% of RAM is still too generous
+- Running alongside other processes that need available RAM — set an explicit lower cap
+- Scheduled/unattended exports where you want a predictable, fixed memory ceiling
+
+**Notes:**
+
+- Always active by default at 75% of system RAM — no action needed for most users
+- Set to `0` to disable the memory threshold entirely (all records collected in memory)
+- Not compatible with `-ExplodeArrays` or `-ExplodeDeep` (explosion modes always use in-memory processing; the threshold is ignored with a logged warning)
+- Stored in checkpoint and can be overridden with `-Resume` (e.g., resuming on different hardware)
+
+**Examples:**
+
+```
+-MaxMemoryMB 4096 # Override auto-detection — cap at 4 GB
+-MaxMemoryMB 0 # Disable — keep all records in memory
+```
+
+---
+
### Observability & Completeness Parameters
#### `-EmitMetricsJson` (switch)
@@ -1271,6 +1299,7 @@ The `-Resume` switch restores ALL settings from the checkpoint file to ensure da
| `-ClientId` | Override client ID (for AppRegistration) |
| `-ClientSecret` | Provide client secret (for AppRegistration) |
| `-ExplosionThreads` | Override thread count for parallel explosion (e.g., resuming on different hardware) |
+| `-MaxMemoryMB` | Override memory threshold (e.g., resuming on different hardware) |
**NOT Allowed with `-Resume`:**
@@ -1821,7 +1850,7 @@ elseif ($LASTEXITCODE -eq 20) { Write-Host 'Circuit breaker tripped – investig
-### Performance Tuning
+### Performance Tuning Examples
💻 Show Performance Tuning Examples
@@ -1838,6 +1867,9 @@ elseif ($LASTEXITCODE -eq 20) { Write-Host 'Circuit breaker tripped – investig
# Parallel explosion for large datasets (PS7+ only)
./PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -ExplosionThreads 8 -StartDate 2025-10-01 -EndDate 2025-10-31
+
+# Cap memory at 4 GB for large standard exports
+./PAX_Purview_Audit_Log_Processor.ps1 -MaxMemoryMB 4096 -StartDate 2025-10-01 -EndDate 2025-10-31
```
@@ -3821,6 +3853,7 @@ This reactive approach is more reliable than time-based prompts because token li
- `-Auth` - Override authentication method
- `-TenantId`, `-ClientId`, `-ClientSecret` - Auth credentials for AppRegistration
- `-ExplosionThreads` - Override thread count for parallel explosion (e.g., resuming on different hardware)
+- `-MaxMemoryMB` - Override memory threshold (e.g., resuming on different hardware)
**NOT Allowed with `-Resume`:**
- Any other parameter (dates, activities, explosion settings, etc.)
@@ -5060,6 +5093,18 @@ pwsh -ExecutionPolicy Bypass -File ./PAX_Purview_Audit_Log_Processor.ps1 `
-EndDate 2025-10-02
```
+**For Large Standard (Non-Exploded) Exports:**
+
+PAX automatically monitors memory and streams to JSONL when 75% of system RAM is reached. Use `-MaxMemoryMB` only to override the default threshold or disable it.
+
+```powershell
+# Override auto-detection — explicit 4 GB cap on memory-constrained machines
+./PAX_Purview_Audit_Log_Processor.ps1 -MaxMemoryMB 4096 -StartDate 2025-10-01 -EndDate 2025-10-31
+
+# Disable memory threshold — keep all records in memory (not recommended for large exports)
+./PAX_Purview_Audit_Log_Processor.ps1 -MaxMemoryMB 0 -StartDate 2025-10-01 -EndDate 2025-10-31
+```
+
### Parallel Execution Tuning
diff --git a/release_notes/.gitkeep b/release_notes/.gitkeep
index bf88538..8e7057d 100644
--- a/release_notes/.gitkeep
+++ b/release_notes/.gitkeep
@@ -1 +1 @@
-# Last updated: 2026-01-30 (PAX v1.0.17, Graph v1.0.1, Purview v1.10.5, CopilotInteractions v1.2.0)
\ No newline at end of file
+# Last updated: 2026-02-10 (PAX v1.0.18, Graph v1.0.1, Purview v1.10.6, CopilotInteractions v1.2.0)
\ No newline at end of file
diff --git a/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md b/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md
index 6c989b7..6ad8955 100644
--- a/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md
+++ b/release_notes/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Release_Note_v1.10.0.md
@@ -3,7 +3,7 @@
## Release Information
- **Version:** 1.10.x
-- **Release Date:** 2026-01-30
+- **Release Date:** 2026-02-10
- **Released By:** Microsoft Copilot Growth ROI Advisory Team (copilot-roi-advisory-team-gh@microsoft.com)
---
@@ -12,7 +12,7 @@
Download the script below. For questions or issues, refer to the documentation.
-- **PAX Purview Audit Log Processor Script v1.10.5:** [PAX_Purview_Audit_Log_Processor_v1.10.5.ps1](https://github.com/microsoft/PAX/releases/download/purview-v1.10.5/PAX_Purview_Audit_Log_Processor_v1.10.5.ps1)
+- **PAX Purview Audit Log Processor Script v1.10.6:** [PAX_Purview_Audit_Log_Processor_v1.10.6.ps1](https://github.com/microsoft/PAX/releases/download/purview-v1.10.6/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1)
- **Documentation v1.10.x (Markdown):** [PAX_Purview_Audit_Log_Processor_Documentation_v1.10.x.md](https://github.com/microsoft/PAX/blob/release/release_documentation/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_Documentation_v1.10.0.md)
---
@@ -25,7 +25,7 @@ The **Microsoft 365 Usage Bundle** (`-IncludeM365Usage`) is a single-switch acti
**Checkpoint & Resume** (`-Resume`) enables recovery from interrupted exports—a critical capability for multi-hour queries spanning large date ranges. PAX automatically saves progress after each partition completes, allowing seamless resumption after token expiry, network interruptions, or system restarts. Combined with intelligent token refresh (silent refresh attempts before prompting, proactive refresh for AppRegistration), this ensures reliable completion of even the longest exports.
-Additional enhancements include **parallel explosion processing** (`-ExplosionThreads`) for faster post-retrieval performance on PS7+, **automatic 1M record limit detection** for Graph API queries (with BlockHours auto-subdivision), new CopilotInteraction control switches, an execution telemetry export option, improved automation support with the `-Force` parameter, and UX safeguards when many output files or tabs are expected.
+Additional enhancements include **memory management** (`-MaxMemoryMB`) to prevent out-of-memory crashes on large exports by streaming records through JSONL files instead of accumulating them in memory, **parallel explosion processing** (`-ExplosionThreads`) for faster post-retrieval performance on PS7+, **automatic 1M record limit detection** for Graph API queries (with BlockHours auto-subdivision), new CopilotInteraction control switches, an execution telemetry export option, improved automation support with the `-Force` parameter, and UX safeguards when many output files or tabs are expected.
---
@@ -420,6 +420,42 @@ If minimum window reached:
---
+### Memory Management: `-MaxMemoryMB`
+
+| Area | Details |
+| --- | --- |
+| **Purpose** | Automatically prevents out-of-memory conditions during large audit log exports (100K+ records) by streaming records directly to JSONL files on disk instead of accumulating them in memory. Active by default — no switch required. |
+| **Default** | `-1` (auto-detect: 75% of system RAM). Use `0` to disable and restore original unlimited behavior. |
+| **How It Works** | Records are written directly to JSONL files on disk instead of accumulating in memory. At export time, records are streamed from JSONL files to CSV in batches with HashSet-based deduplication. |
+| **Limitation** | Not compatible with explosion modes (`-ExplodeDeep`/`-ExplodeArrays`), which require all records in memory. When explosion is specified, `-MaxMemoryMB` is ignored with a warning. |
+| **Checkpoint** | Value is saved in checkpoint JSON and restored on `-Resume`. Can be overridden on the resume command line. |
+
+#### Example
+
+```powershell
+# Default (auto-detect 75% of system RAM)
+./PAX_Purview_Audit_Log_Processor.ps1 `
+ -StartDate 2026-01-01 `
+ -EndDate 2026-02-01 `
+ -OutputPath "C:\Exports\"
+
+# Explicit 4GB limit
+./PAX_Purview_Audit_Log_Processor.ps1 `
+ -StartDate 2026-01-01 `
+ -EndDate 2026-02-01 `
+ -MaxMemoryMB 4096 `
+ -OutputPath "C:\Exports\"
+
+# Disable memory management (unlimited, original behavior)
+./PAX_Purview_Audit_Log_Processor.ps1 `
+ -StartDate 2026-01-01 `
+ -EndDate 2026-02-01 `
+ -MaxMemoryMB 0 `
+ -OutputPath "C:\Exports\"
+```
+
+---
+
## Bug Fixes
- **(v1.10.0) Activity Type Breakdown metrics:** Fixed an issue where "Retrieved" counts showed 0 in the Activity Type Breakdown and Pipeline Summary sections. Per-activity retrieved counts now display correctly in all code paths.
@@ -450,6 +486,16 @@ If minimum window reached:
- **(v1.10.5) AppRegistration token refresh failure:** Fixed "Parameter set cannot be resolved using the specified named parameters" error during automatic token refresh in long-running AppRegistration operations. The `Invoke-TokenRefresh` function had the same parameter set conflict fixed in v1.10.2 for initial authentication—passing `-ClientId` alongside `-ClientSecretCredential` when the Graph SDK expects ClientId embedded only in the PSCredential.
+- **(v1.10.6) AppRegistration token reliability for long-running exports:** Fixed multiple issues causing 401 authentication cascades during exports exceeding 60 minutes with `-Auth AppRegistration`. ThreadJob parallel partitions now build fresh headers from the shared auth state for every API call (12 locations fixed), token refresh logic now correctly uses AppRegistration credentials instead of defaulting to interactive WebLogin, and proactive token refresh now runs periodically every 30 minutes throughout the export.
+
+- **(v1.10.6) Partition error recovery and final reconciliation:** Fixed an issue where partitions encountering non-authentication errors were not being queued for retry, potentially resulting in missing data in the final export. Added a final reconciliation safety net before export that detects any incomplete partitions and retries them sequentially (up to 5 attempts). Error messages now accurately indicate that failed partitions will be retried automatically.
+
+- **(v1.10.6) Query slot cleanup and fetch retry:** Failed partitions now clean up their server-side query slots immediately, preventing orphaned queries from filling all 10 concurrent slots and blocking subsequent queries. Also added retry logic for record fetch failures (3 attempts, 30-second delays) to preserve costly server-side query preparation work before deleting the query.
+
+- **(v1.10.6) Zero-record run cleanup:** Fixed `_PARTIAL` suffix remaining on output CSV and log filenames when all partitions completed successfully but returned 0 records. Checkpoint files are now properly cleaned up on zero-record runs.
+
+- **(v1.10.6) Log message completeness:** Fixed missing and duplicate "Query succeeded" messages in the log file. All three ThreadJob output processing code paths now reliably emit exactly one success message per partition.
+
---
## Known Considerations
diff --git a/script_archive/.gitkeep b/script_archive/.gitkeep
index bf88538..8e7057d 100644
--- a/script_archive/.gitkeep
+++ b/script_archive/.gitkeep
@@ -1 +1 @@
-# Last updated: 2026-01-30 (PAX v1.0.17, Graph v1.0.1, Purview v1.10.5, CopilotInteractions v1.2.0)
\ No newline at end of file
+# Last updated: 2026-02-10 (PAX v1.0.18, Graph v1.0.1, Purview v1.10.6, CopilotInteractions v1.2.0)
\ No newline at end of file
diff --git a/script_archive/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1 b/script_archive/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1
new file mode 100644
index 0000000..80049e8
--- /dev/null
+++ b/script_archive/Purview_Audit_Log_Processor/PAX_Purview_Audit_Log_Processor_v1.10.6.ps1
@@ -0,0 +1,16570 @@
+# Portable Audit eXporter (PAX) - Purview Audit Log Processor
+# Version: v1.10.6
+# Default Activity Type: CopilotInteraction (captures ALL M365 Copilot usage including all M365 apps and Teams meetings)
+# DSPM for AI: Microsoft Purview Data Security Posture Management integration
+# MIXED FREE/PAYG Activity Types: AIInteraction (currently Microsoft platforms only), ConnectedAIAppInteraction (Microsoft + third-party)
+# PAYG Activity Types: AIAppInteraction (third-party AI via network DLP)
+# ---PAYG only applies to third-party AI apps/agents audit records and never applies to any audit records generated by Microsoft AI apps/agents
+# NOTE: Graph API mode automatically detects v1.0 (GA) or beta endpoints at runtime (no config needed)
+# NOTE: Uses operationFilters with operation names for ALL activity types (e.g., "CopilotInteraction", "AIInteraction")
+# See: https://learn.microsoft.com/en-us/office/office-365-management-api/office-365-management-activity-api-schema
+<#
+.SYNOPSIS
+ Export Microsoft Purview audit logs for Microsoft 365 Copilot and DSPM for AI activity types with optional Purview-aligned row explosion and deep flattening.
+
+.DESCRIPTION
+ Modes:
+ Standard - One row per audit record (raw CopilotEventData JSON preserved)
+ -ExplodeArrays - Produces canonical Purview exploded schema (35 fixed columns)
+ -ExplodeDeep - Same 35-column Purview schema + appended deep-flattened CopilotEventData.* columns
+
+ Graph API Version Configuration:
+ - PAX automatically detects the correct Graph API security/auditLog endpoint version
+ - Configurable version variables near top of script (lines ~1519-1527):
+ $script:GraphAuditApiVersion_Current = 'v1.0' # Try this version first
+ $script:GraphAuditApiVersion_Previous = 'beta' # Fallback if current unavailable
+ - Detection occurs once per session and is reused automatically
+ - Single-line terminal output shows which version is active
+ - Manual override: Edit the version variables if Microsoft releases new API versions
+ - Default: Tries v1.0 first (expected GA Q1 2026), falls back to beta if unavailable
+ - No command-line switches needed - fully automatic with manual override capability
+
+ Parallel Explosion Processing (PS7+ only):
+ - After data retrieval, explosion of records into rows can be parallelized
+ - Automatic on PS7+ with >500 records (uses job queue with ~1000 records per chunk)
+ - Control via -ExplosionThreads: 0=auto (2-16 threads), 1=serial, 2-32=explicit
+ - Output is identical to serial mode (same columns, data, row count; only order may differ)
+ - Falls back to serial processing on PowerShell 5.1
+
+ Reliability & Resilience:
+ - Automatic retry logic: Up to 3 attempts per partition with smart cooldown
+ - End-of-run summary: Shows Complete/Incomplete/Failed partitions with QueryIds
+ - Partial success: Continues processing with successful partitions if some fail
+ - Query naming: PAX_Query__PartX/Total visible in Purview UI
+ - Unified concurrency: MaxConcurrency parameter controls both EOM and Graph API modes (default: 10)
+ - Checkpoint & Resume: All auth modes automatically save progress to checkpoint files,
+ enabling resumption after Ctrl+C, network failures, or any interruption via -Resume
+
+ Offline Replay (-RAWInputCSV):
+ * Ingest a previously exported raw Purview audit CSV (must contain AuditData JSON column)
+ * Skips authentication & live Search-UnifiedAuditLog queries entirely
+ * Forces at least Purview array explosion even if -ExplodeArrays not supplied
+ * Optional -ExplodeDeep further deep‑flattens CopilotEventData.*
+ * Allows only filtering parameters (StartDate / EndDate / ActivityTypes / AgentId / AgentsOnly / PromptFilter / ExcludeAgents / UserIds) plus OutputFile, AppendFile & explosion switches
+ * Disallowed with RAWInputCSV (error if present): BlockHours, ResultSize, PacingMs, Auth, ParallelMode, MaxParallelGroups, MaxConcurrency, EnableParallel, GroupNames
+ * StartDate / EndDate act as inclusive(lower)/exclusive(upper) UTC filters on CreationDate in the replay dataset
+ * ActivityTypes filters by Operation (case‑insensitive membership)
+ * AgentId filters for specific AgentId value(s); AgentsOnly includes any record with an AgentId present
+ * PromptFilter filters messages by isPrompt property (Prompt/Response/Both/Null)
+ * ExcludeAgents removes records with AgentId present (inverse of AgentsOnly)
+ * UserIds filters by UserId extracted from AuditData JSON (client-side filtering)
+ * GroupNames is NOT supported in replay mode (requires authentication for group expansion)
+ * Non‑exploded 1:1 mode is intentionally disabled for deterministic schema in offline transforms
+
+ Filtering:
+ -AgentId : Filter to records matching specific AgentId value(s)
+ -AgentsOnly : Filter to records with any AgentId present (mutually exclusive with -ExcludeAgents)
+ -ExcludeAgents : Filter to records WITHOUT AgentId (mutually exclusive with -AgentId/-AgentsOnly)
+ -PromptFilter
+ Prompt : Only export messages where Message_isPrompt = True
+ Response : Only export messages where Message_isPrompt = False
+ Both : Export messages with either True or False isPrompt values
+ Null : Only export messages with null/undefined isPrompt values (rare)
+ Note: PromptFilter uses two-stage filtering for optimal performance:
+ Stage 1 (Pre-filter): Filters records before explosion based on message content
+ Stage 2 (Message-level): Filters individual messages during explosion
+
+ -UserIds : Filter to specific user identifier(s)
+ LIVE MODE: SERVER-SIDE filtering at Purview (efficient, no unnecessary data transfer)
+ REPLAY MODE: CLIENT-SIDE filtering by extracting UserId from AuditData JSON (slower but functional)
+
+ Accepted formats:
+ • User Principal Name (UPN): "john.doe@contoso.com"
+ • SMTP Address: "john.doe@contoso.com"
+ • User GUID: "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+ Examples:
+ -UserIds "john.doe@contoso.com"
+ -UserIds "john.doe@contoso.com","jane.smith@contoso.com","bob.jones@contoso.com"
+
+ -GroupNames : Filter to members of distribution/security group(s)
+ LIVE MODE ONLY: Groups automatically expanded to individual users after authentication using Get-DistributionGroupMember
+ REPLAY MODE: NOT SUPPORTED (requires authentication) - use -UserIds with explicit email addresses instead
+
+ Accepted formats (LIVE MODE only):
+ • Group Display Name: "Executive Leadership Team"
+ • Group Email (Alias): "exec-team@contoso.com"
+ • Group GUID: "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+ • Distinguished Name: "CN=ExecTeam,OU=Groups,DC=contoso,DC=com"
+ Examples:
+ -GroupNames "Executive Leadership"
+ -GroupNames "exec-team@contoso.com"
+ -GroupNames "Engineering Managers","Product Leads","Sales Directors"
+ Note: Groups are expanded once after authentication
+ Blocked in replay mode (-RAWInputCSV) - script will exit with error
+
+ Combining UserIds + GroupNames (LIVE MODE ONLY):
+ • When both are specified, the script combines and deduplicates the user lists
+ • Example: -UserIds "ceo@contoso.com" -GroupNames "Board of Directors"
+ Pulls records for the CEO plus all expanded board members (duplicates removed)
+ • Not available in replay mode - use -UserIds only
+
+ COMBINING FILTERS - Powerful Use Cases:
+
+ All filters can be combined for highly targeted data extraction. Filter application order is now CONSISTENT across both modes:
+
+ FILTER APPLICATION ORDER (BOTH MODES):
+ 1. User/Group filtering (server-side in live mode via -UserIds, client-side in replay mode)
+ 2. Agent filtering (AgentsOnly, AgentId, or ExcludeAgents)
+ 3. PromptFilter (during explosion: Prompt, Response, Both, or Null)
+
+ NOTE: Applying User/Group filtering first improves performance by reducing the dataset before subsequent filters.
+
+ TWO-FILTER COMBINATIONS:
+
+ User + Agent:
+ Use Case: Analyze specific user(s) interactions with Copilot agents
+ Example: "Show me all agent usage by our power users"
+ Command: -UserIds "poweruser@contoso.com" -AgentsOnly
+
+ User + PromptFilter:
+ Use Case: Focus on conversation patterns (prompts/responses) for specific users
+ Example: "Show me only the questions asked by the executive team"
+ Command: -GroupNames "Executive Team" -PromptFilter Prompt
+ Result: Removes resource-only explosion rows, keeps only message data
+
+ Agent + PromptFilter:
+ Use Case: Analyze agent conversation quality, prompt engineering effectiveness
+ Example: "Show me all prompts sent to our custom declarative agent"
+ Command: -AgentId "CopilotStudio.Declarative.abc123" -PromptFilter Prompt
+
+ THREE-FILTER COMBINATION:
+
+ User + Agent + PromptFilter:
+ Use Case: Deep-dive conversation analysis for specific users with agents
+ Example: "Show me all questions the sales team asked our Sales Copilot agent"
+ Command: -GroupNames "Sales Team" -AgentId "SalesCopilot.Agent" -PromptFilter Prompt
+ Benefits:
+ • Server-side filtering reduces data transfer (live mode)
+ • Agent filter removes non-agent interactions
+ • PromptFilter removes responses and resource-only rows
+ • Result: Clean dataset of just sales team questions to the agent
+
+ REPLAY MODE COMBINATIONS:
+ All filter combinations work in replay mode except GroupNames
+ Use -UserIds with explicit email addresses instead of -GroupNames
+ Example: -RAWInputCSV "data.csv" -UserIds "user@contoso.com" -AgentsOnly -PromptFilter Both
+
+ PowerShell 5.1 & 7+ supported. Parallel query retrieval and explosion processing require 7+.
+
+.EXECUTIONPOLICY
+ No internal execution policy bypass. Use external host invocation if needed:
+ powershell.exe -ExecutionPolicy Bypass -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02
+ pwsh.exe -ExecutionPolicy Bypass -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02
+
+.POWERSHELLVERSIONS
+ PS 5.1 & 7+. Query parallelization and explosion parallelization require PS 7+.
+
+.EXAMPLE
+ # Basic export with auto-generated timestamped filename
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -OutputPath C:\Temp\
+.EXAMPLE
+ # Array explosion mode with auto-generated filename
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -OutputPath C:\Temp\
+.EXAMPLE
+ # Deep column explosion with auto-generated filename
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -OutputPath C:\Temp\
+.EXAMPLE
+ # PowerShell 5.1 compatible (no parallelization)
+ powershell -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -OutputPath C:\Temp\
+.EXAMPLE
+ # Offline replay (simple forced explosion) of a previously exported raw CSV
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -RAWInputCSV .\output\Copilot_RAW_20251001.csv -OutputPath C:\Temp\
+.EXAMPLE
+ # Offline replay with date & activity filtering + deep flatten
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -RAWInputCSV .\output\Copilot_RAW_20251001.csv -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -ActivityTypes CopilotInteraction -OutputPath C:\Temp\
+.EXAMPLE
+ # Deep flatten (wide) with higher schema sample & moderate chunk size (balance column coverage vs memory)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -StreamingSchemaSample 4000 -StreamingChunkSize 3000 -OutputPath C:\Temp\
+.EXAMPLE
+ # Extremely wide deep flatten: maximize schema sample, reduce chunk size for lower peak memory
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -StreamingSchemaSample 6000 -StreamingChunkSize 1500 -OutputPath C:\Temp\
+.EXAMPLE
+ # Fast header freeze (narrow schema expectation) – smaller sample, larger chunk for throughput
+ # NOTE: In parallel mode (PS7+), full schema discovery scans ALL rows regardless of StreamingSchemaSample.
+ # These tuning examples primarily affect serial mode (PS5.1 or -ExplosionThreads 1).
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -StreamingSchemaSample 800 -StreamingChunkSize 6000 -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to only records with agents present
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -AgentsOnly -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to only records WITHOUT agents
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -ExcludeAgents -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to only prompt messages (Message_isPrompt = True)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -PromptFilter Prompt -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to only response messages (Message_isPrompt = False)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -PromptFilter Response -OutputPath C:\Temp\
+.EXAMPLE
+ # Combine filters: agents + prompts only
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeArrays -StartDate 2025-10-01 -EndDate 2025-10-02 -AgentsOnly -PromptFilter Prompt -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to specific users
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -UserIds "john.doe@contoso.com","jane.smith@contoso.com" -OutputPath C:\Temp\
+.EXAMPLE
+ # Emit metrics JSON alongside CSV (auto-generated timestamped filename)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-01 -EmitMetricsJson -OutputPath C:\Temp\
+.EXAMPLE
+ # Emit metrics JSON to custom path
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-01 -EmitMetricsJson -MetricsPath C:\Temp\purview_metrics_20251001.json -OutputPath C:\Temp\
+.EXAMPLE
+ # AutoCompleteness remediation workflow: first run incomplete (exit code 10), second run resolves saturated windows
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-05 -EndDate 2025-10-05 -EmitMetricsJson -OutputPath C:\Temp\
+ # (Exit code 10 indicates saturated windows remain)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-05 -EndDate 2025-10-05 -AutoCompleteness -EmitMetricsJson -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to security group members (automatically expanded)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -GroupNames "Executive Leadership" -OutputPath C:\Temp\
+.EXAMPLE
+ # Filter to multiple groups
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -GroupNames "Executive Team","Engineering Managers" -OutputPath C:\Temp\
+.EXAMPLE
+ # Graph API: SharePoint/OneDrive document activity with record & service filters
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-12-01 -EndDate 2025-12-02 -ActivityTypes FileAccessed,FilePreviewed -RecordTypes sharePointFileOperation -ServiceTypes SharePoint,OneDrive -OutputPath C:\Temp\
+.EXAMPLE
+ # Microsoft 365 usage bundle (Exchange, SharePoint, OneDrive, Teams, Forms, Stream, Planner, PowerApps)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-11-01 -EndDate 2025-11-02 -IncludeM365Usage -CombineOutput -OutputPath C:\Temp\
+.EXAMPLE
+ # Export with execution telemetry for performance analysis
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-11-01 -EndDate 2025-11-02 -IncludeTelemetry -OutputPath C:\Temp\
+.EXAMPLE
+ # Combine individual users and groups
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -UserIds "ceo@contoso.com" -GroupNames "Board of Directors" -OutputPath C:\Temp\
+.EXAMPLE
+ # Replay mode with user filtering (client-side filtering from JSON)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -RAWInputCSV .\output\Copilot_RAW_20251001.csv -UserIds "john.doe@contoso.com","jane.smith@contoso.com" -OutputPath C:\Temp\
+.EXAMPLE
+ # COMBINING FILTERS: User + PromptFilter (conversation focus, removes resource-only rows)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -UserIds "poweruser@contoso.com" -PromptFilter Both -OutputPath C:\Temp\
+.EXAMPLE
+ # COMBINING FILTERS: Group + Agent (team adoption of specific agent)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -GroupNames "Sales Team" -AgentsOnly -OutputPath C:\Temp\
+.EXAMPLE
+ # COMBINING FILTERS: User + Agent + PromptFilter (prompts sent to agents by specific users)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -UserIds "analyst@contoso.com" -AgentId "DataAnalysis.Agent" -PromptFilter Prompt -OutputPath C:\Temp\
+.EXAMPLE
+ # COMBINING FILTERS: Replay mode with User + Agent + PromptFilter
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -RAWInputCSV .\data.csv -UserIds "exec@contoso.com" -AgentsOnly -PromptFilter Both -OutputPath C:\Temp\
+
+.EXAMPLE
+ # APPENDING: Append to existing CSV file (relative filename in OutputPath directory)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-15 -EndDate 2025-10-16 -AppendFile "MyReport.csv" -OutputPath C:\Temp\
+.EXAMPLE
+ # APPENDING: Append to existing CSV file (full path)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-15 -EndDate 2025-10-16 -AppendFile "C:\Data\Audit\CopilotActivity.csv"
+.EXAMPLE
+ # APPENDING: Append to existing Excel workbook (requires -ExportWorkbook, -CombineOutput recommended)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-15 -EndDate 2025-10-16 -AppendFile "WeeklyReport.xlsx" -ExportWorkbook -CombineOutput -OutputPath C:\Temp\
+.EXAMPLE
+ # APPENDING: Append with single activity type to existing CSV
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-15 -EndDate 2025-10-16 -AppendFile "CopilotOnly.csv" -ActivityTypes CopilotInteraction -OutputPath C:\Temp\
+
+.EXAMPLE
+ # Export to Excel workbook (multi-tab by activity type - default behavior)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ExportWorkbook
+
+.EXAMPLE
+ # Export to Excel workbook with combined output (single-tab with all activity types)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ExportWorkbook -CombineOutput
+
+.EXAMPLE
+ # CSV export with separate files per activity type (default behavior)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ActivityTypes CopilotInteraction,ConnectedAIAppInteraction
+
+.EXAMPLE
+ # CSV export with combined output (single file with all activity types)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ActivityTypes CopilotInteraction,ConnectedAIAppInteraction -CombineOutput
+
+.EXAMPLE
+ # Append data to existing Excel workbook (same activity types)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-03 -EndDate 2025-10-04 -ExportWorkbook -AppendFile
+
+.EXAMPLE
+ # Excel export with DSPM features (multi-tab with AIAppInteraction)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ExportWorkbook -IncludeDSPMForAI -ActivityTypes CopilotInteraction,AIAppInteraction
+
+.EXAMPLE
+ # Excel export with array explosion and deep flattening
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-10-01 -EndDate 2025-10-02 -ExportWorkbook -ExplodeDeep
+
+.EXAMPLE
+ # Resume interrupted operation (auto-discover checkpoint in OutputPath)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -Resume -OutputPath C:\Temp\
+
+.EXAMPLE
+ # Resume from specific checkpoint file
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -Resume "C:\Temp\.pax_checkpoint_20251215_143022.json"
+
+.EXAMPLE
+ # Resume with Force (use most recent checkpoint without prompting)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -Resume -OutputPath C:\Temp\ -Force
+
+.EXAMPLE
+ # Parallel explosion with explicit 8 threads (PS7+ only)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -ExplosionThreads 8 -OutputPath C:\Temp\
+
+.EXAMPLE
+ # Force serial explosion for debugging/comparison (disables parallel)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -ExplodeDeep -StartDate 2025-10-01 -EndDate 2025-10-02 -ExplosionThreads 1 -OutputPath C:\Temp\
+
+.NOTES
+ Reliability Features:
+ - Automatic retry: Failed partitions retried up to 3 times with smart delays
+ - Status tracking: Each partition tracked with QueryId and QueryName throughout execution
+ - Partial success: Script continues with successful data even if some partitions fail
+ - End summary: Detailed report showing Complete/Incomplete/Failed partitions
+ - Query names: Visible in Purview as PAX_Query__PartX/Total
+ Example: PAX_Query_20241101_0000-20241101_0100_Part27/134
+
+ Concurrency Control:
+ - MaxConcurrency (default: 12): Single parameter for both modes
+ • EOM mode: Limits concurrent serial queries
+ • Graph API mode: Limits concurrent partition execution
+ - Replaced previous MaxActivePartitions parameter (use MaxConcurrency instead)
+ - Example: -MaxConcurrency 8 (reduces from default 12 for rate-limit sensitive environments)
+
+ Graph API Version Detection:
+ PAX automatically detects whether to use v1.0 (GA) or beta endpoints for the Microsoft Graph
+ security audit API. Detection occurs on first API call and is remembered for the session. This
+ ensures seamless transition when Microsoft promotes the API from beta to v1.0 (expected Q1 2026).
+
+ Version Configuration (near top of script, manually editable):
+ $script:GraphAuditApiVersion_Current = 'v1.0' # Try this version first
+ $script:GraphAuditApiVersion_Previous = 'beta' # Fallback if current unavailable
+
+ Version Selection:
+ • If current version available: Uses configured current (default: v1.0)
+ • If current version unavailable: Falls back to configured previous (default: beta)
+ • Detection is remembered per session (no repeated checks)
+
+ Benefits:
+ • Zero-downtime transition when API reaches GA
+ • Backward compatibility with beta-only tenants
+ • Manual override capability by editing version variables at top of script
+ • Single-line log output shows which version is being used
+
+ Future Updates:
+ If Microsoft releases a new version (e.g., v2.0), simply edit the variables:
+ $script:GraphAuditApiVersion_Current = 'v2.0'
+ $script:GraphAuditApiVersion_Previous = 'v1.0'
+
+ Example End-of-Run Summary:
+ ═══════════════════════════════════════════════════════════════
+ QUERY SUBMISSION SUMMARY
+ ═══════════════════════════════════════════════════════════════
+ Total Partitions: 134
+ Sent and Complete: 131
+ [!] Sent but Incomplete: 2
+ ✗ Never Sent: 1
+ ═══════════════════════════════════════════════════════════════
+
+ Checkpoint & Resume:
+ All authentication modes automatically create checkpoint files to preserve progress
+ during long-running operations. This enables resumption after Ctrl+C, network failures,
+ token expiry, or any interruption without losing completed work.
+
+ Checkpoint Behavior:
+ • Created automatically for ALL auth modes (WebLogin, DeviceCode, AppRegistration)
+ • Saved to OutputPath as .pax_checkpoint_.json
+ • Updated after each partition completes
+ • Deleted automatically on successful completion
+ • Stores ALL processing parameters for complete state restoration
+
+ Token Refresh:
+ • AppRegistration mode:
+ - Proactive refresh at ~45-50 minutes (before token expiry)
+ - Reactive refresh on 401 Unauthorized (backup)
+ - Fully automatic, silent re-authentication using stored credentials
+ • Interactive modes (WebLogin/DeviceCode):
+ - Reactive only: Triggered when 401 Unauthorized detected
+ - Attempts silent refresh first (using SDK cached refresh token)
+ - Prompts user only if silent refresh fails
+ • 403 Forbidden errors: Indicates permissions issue, NOT token expiry
+ - Token refresh will not help; check AuditLog.Read.All consent and roles
+
+ Resume Mode (Standalone):
+ The -Resume switch restores ALL settings from the checkpoint file.
+ No other processing parameters can be specified with -Resume.
+
+ Allowed with -Resume:
+ • -Force Use most recent checkpoint without prompting
+ • -Auth Override authentication method
+ • -TenantId Override tenant ID (for AppRegistration)
+ • -ClientId Override client ID (for AppRegistration)
+ • -ClientSecret Provide client secret (for AppRegistration)
+
+ Usage Examples:
+ • -Resume Auto-discover checkpoint in current directory
+ • -Resume "path\to\file" Use specific checkpoint file
+ • -Resume -Force Use most recent checkpoint without prompting
+ • -Resume -Auth DeviceCode Resume with different auth method
+
+ API Record Limits & Auto-Subdivision:
+ Graph API 1,000,000 Record Limit:
+ The Microsoft Graph security/auditLog API has a hard limit of 1,000,000 records per query.
+ PAX automatically detects when this limit is reached and handles it gracefully.
+
+ Detection:
+ • Identified when a partition returns exactly 1,000,000 records with no nextLink
+ • Warning displayed: "[SUBDIVISION] Partition X/Y - Fetched 1,000,000 records (Graph API limit reached)"
+
+ Auto-Subdivision:
+ • Uses same BlockHours subdivision algorithm as EOM 10K limit handling
+ • Partition time window is halved and re-queried recursively
+ • Minimum window: 0.016667 hours (1 minute) - cannot subdivide below this
+ • If minimum reached, warning displayed and available records returned
+
+ Recommendations for High-Volume Tenants:
+ • Use smaller -BlockHours (e.g., 0.25 or 0.1) for very active tenants
+ • Consider shorter date ranges for initial exports
+ • Monitor "[SUBDIVISION]" messages to tune BlockHours
+
+ EOM 10,000 Record Limit:
+ The Exchange Online Management (EOM) Search-UnifiedAuditLog cmdlet returns maximum 10,000
+ records per query. PAX automatically subdivides time windows when this limit is reached.
+
+ Performance Optimization:
+ Parallel Explosion (PS7+ only):
+ After records are retrieved from Purview, the explosion phase (converting records
+ to rows with -ExplodeArrays or -ExplodeDeep) can be parallelized for significant
+ speedup on large datasets.
+
+ Behavior:
+ • Automatic on PS7+ when >500 records retrieved
+ • Uses job queue pattern: many small chunks (~1000 records) with N concurrent workers
+ • Better load balancing than fixed chunks when record complexity varies
+ • Full schema discovery: scans ALL rows for 100% column coverage (not sampling)
+
+ Control via -ExplosionThreads:
+ • 0 (default): Auto-detect based on CPU cores (2 to 8 threads)
+ • 1: Force serial processing (for debugging or comparison)
+ • 2-8: Explicit thread count (capped at 8 for stability)
+
+ Output Equivalence:
+ • Identical columns, data values, and row counts vs serial mode
+ • Row order may differ (chunks complete in parallel, not sequentially)
+ • Works with -ExplodeArrays, -ExplodeDeep, live mode, and replay mode
+
+.PARAMETER StartDate
+ Start date for audit log query in live mode (format: yyyy-MM-dd or MM/dd/yyyy).
+ In replay mode (-RAWInputCSV), acts as inclusive lower bound filter on CreationDate.
+
+.PARAMETER EndDate
+ End date for audit log query in live mode (format: yyyy-MM-dd or MM/dd/yyyy).
+ In replay mode (-RAWInputCSV), acts as exclusive upper bound filter on CreationDate.
+
+.PARAMETER OutputPath
+ Directory path where all output files will be created with auto-generated timestamped filenames.
+ Default: C:\Temp\
+
+.PARAMETER FlatDepth
+ Maximum JSON flatten depth for exploding CopilotEventData and AuditData (default 120).
+
+ The script automatically generates descriptive filenames based on:
+ • Activity types being exported
+ • Export mode (CSV vs Excel, combined vs separate)
+ • Current timestamp (yyyyMMdd_HHmmss format)
+
+ Examples of auto-generated filenames:
+ • Purview_Audit_CopilotInteraction_20251110_143022.csv
+ • Purview_Audit_CombinedUsageActivity_20251110_143022.csv
+ • Purview_Audit_MultiTab_20251110_143022.xlsx
+
+ Note: OutputPath accepts ONLY directory paths, not filenames.
+ Use -AppendFile parameter to specify a custom filename for appending to existing files.
+
+.PARAMETER Auth
+ Authentication method. Options:
+ • WebLogin – Interactive browser authentication
+ • DeviceCode – Device code flow for headless scenarios
+ • Credential – Legacy username/password prompt or GRAPH_* env vars
+ • Silent – Managed identity or pre-cached token
+ • AppRegistration – Service principal using client secret or certificate
+
+.PARAMETER TenantId
+ Azure AD tenant ID (GUID). Required for -Auth AppRegistration unless GRAPH_TENANT_ID
+ environment variable is set.
+
+.PARAMETER ClientId
+ Azure AD app registration client ID (GUID). Required for -Auth AppRegistration unless
+ GRAPH_CLIENT_ID environment variable is set.
+
+.PARAMETER ClientSecret
+ Client secret value for app registration authentication. You can pass it directly,
+ convert from a secure string, or set it through GRAPH_CLIENT_SECRET.
+
+.PARAMETER ClientCertificateThumbprint
+ Thumbprint of a certificate located in the CurrentUser or LocalMachine "My" store.
+ Used when -Auth AppRegistration should authenticate with a certificate instead of a
+ client secret. Optional environment variable: GRAPH_CLIENT_CERT_THUMBPRINT.
+
+.PARAMETER ClientCertificateStoreLocation
+ Certificate store to search when using ClientCertificateThumbprint. Valid values:
+ CurrentUser (default) or LocalMachine.
+
+.PARAMETER ClientCertificatePath
+ Path to a PFX file containing the certificate for app registration auth. Optional
+ environment variable: GRAPH_CLIENT_CERT_PATH.
+
+.PARAMETER ClientCertificatePassword
+ Password for the PFX file specified by ClientCertificatePath. Accepts secure string or
+ plain text (converted internally). Optional environment variable:
+ GRAPH_CLIENT_CERT_PASSWORD.
+
+.PARAMETER BlockHours
+ Time window size in hours for each audit log query block.
+ Range: 0.016667 to 24 hours. Default: 0.5 (30 minutes)
+
+.PARAMETER PartitionHours
+ Time partition size in hours for Graph API parallel processing.
+ Range: 1-72. Default: 0 (auto-calculated based on date range and MaxPartitions)
+
+.PARAMETER MaxPartitions
+ Maximum number of time partitions for parallel query execution.
+ Range: 1-1000. Default: 160
+
+.PARAMETER ResultSize
+ Maximum records returned per Search-UnifiedAuditLog query.
+ Range: 1 to 10000. Default: 10000
+
+.PARAMETER PacingMs
+ Delay in milliseconds between audit log queries for throttling control.
+ Range: 0 to 10000. Default: 0
+
+.PARAMETER ActivityTypes
+ Array of Purview audit activity types to query.
+ Default: @('CopilotInteraction')
+ Examples: CopilotInteraction, ConnectedAIAppInteraction, AIInteraction, AIAppInteraction
+ Note: CopilotInteraction captures ALL Microsoft 365 Copilot usage including Teams meetings (AppHost="Teams")
+
+.PARAMETER RecordTypes
+ Filter audit records by record type (Graph API mode only).
+ Accepts one or more record type names that map to Microsoft 365 workload categories.
+ Examples: SharePointFileOperation, ExchangeItem, MicrosoftTeams, AzureActiveDirectory
+
+ BEHAVIOR:
+ • Standard mode: Filters server-side to specified record types
+ • With -IncludeM365Usage: Merged with curated M365 usage bundle record types
+
+ LIMITATIONS:
+ • Graph API mode only (not supported with -UseEOM)
+ • Replay mode (-RAWInputCSV): Not supported (server-side filter only)
+
+ Use with -ActivityTypes and -ServiceTypes for precise workload targeting.
+
+.PARAMETER ServiceTypes
+ Filter audit records by service/workload (Graph API mode only).
+ Accepts one or more service names representing Microsoft 365 workloads.
+ Examples: Exchange, SharePoint, OneDrive, MicrosoftTeams, AzureActiveDirectory
+
+ BEHAVIOR:
+ • Standard mode: Filters server-side to specified services
+ • Note: Multiple services may cause separate queries per service
+
+ CRITICAL LIMITATIONS:
+ • Graph API mode only (not supported with -UseEOM)
+ • Replay mode (-RAWInputCSV): Not supported (server-side filter only)
+ • IGNORED when -IncludeM365Usage is active: The M365 usage bundle intentionally
+ sets ServiceTypes to null for optimal single-pass query performance.
+ Your -ServiceTypes values will be silently overridden.
+
+ RECOMMENDATION: For M365 usage scenarios, use -IncludeM365Usage without -ServiceTypes.
+ For targeted workload queries without M365 bundle, use -ServiceTypes with -ActivityTypes.
+
+.PARAMETER ExplodeArrays
+ Enable Purview array explosion to canonical 35-column schema.
+ Explodes Messages[], Resources[], and Contexts[] arrays into separate rows.
+
+.PARAMETER ExplodeDeep
+ Enable deep flattening with Purview array explosion.
+ Produces 35 fixed columns + appended deep-flattened CopilotEventData.* columns.
+
+.PARAMETER RAWInputCSV
+ Path to previously exported raw Purview audit CSV for offline replay.
+ Must contain AuditData JSON column. Bypasses live Search-UnifiedAuditLog queries.
+ Forces array explosion even if -ExplodeArrays not specified.
+
+.PARAMETER MaxConcurrency
+ Maximum concurrent queries/partitions (1-10).
+ - EOM mode: limits concurrent serial queries
+ - Graph API mode: limits concurrent partition execution
+ Default: 10 (Microsoft Purview enforces a 10 concurrent search job limit per user account)
+
+.PARAMETER EnableParallel
+ Force enable parallel processing (overrides ParallelMode setting).
+
+.PARAMETER MaxParallelGroups
+ Maximum activity groups processed concurrently in parallel mode.
+ Range: 0 to 50. Default: 4
+
+.PARAMETER ParallelMode
+ Parallel processing mode. Options: Off, On, Auto (default)
+ Auto: Enables parallel for PowerShell 7+ environments automatically.
+
+.PARAMETER ExplosionThreads
+ Number of threads for parallel explosion processing (post-retrieval phase).
+ 0 (default): Auto-detect based on CPU cores (2 to 8 threads)
+ 1: Force serial processing (disable parallel explosion)
+ 2-8: Explicit thread count (capped at 8 for stability)
+ Requires PowerShell 7+. Falls back to serial on PS5.
+
+.PARAMETER DisableAdaptive
+ Disable adaptive safeguards (memory/latency/concurrency smoothing).
+
+.PARAMETER ProgressSmoothingAlpha
+ Weight for smoothing dynamic progress total recalculation.
+ Range: 0.0 to 1.0. Default: 0.3 (0 = off)
+
+.PARAMETER HighLatencyMs
+ Partition average latency threshold (ms) triggering concurrency reduction.
+ Range: 1000 to 600000. Default: 90000
+
+.PARAMETER MemoryPressureMB
+ Working set (MB) threshold to trigger concurrency reduction.
+ Range: 256 to 32768. Default: 1500
+
+.PARAMETER MaxMemoryMB
+ Maximum process memory (MB) before flushing in-memory records to disk.
+ When exceeded, clears $allLogs after confirmed JSONL save to free memory.
+ Not compatible with -ExplodeDeep or -ExplodeArrays (ignored when explosion enabled).
+ Range: -1 to 65536. Default: -1 (auto = 75% of system RAM). Use 0 to disable.
+
+.PARAMETER LowLatencyMs
+ Sustained low latency threshold to consider concurrency step-up.
+ Range: 100 to 600000. Default: 20000
+
+.PARAMETER LowLatencyConsecutive
+ Required consecutive low-latency groups before step-up.
+ Range: 1 to 10. Default: 2
+
+.PARAMETER ThroughputDropPct
+ Percentage drop vs baseline required (with high latency) to justify reduction.
+ Range: 1 to 100. Default: 15
+
+.PARAMETER ThroughputSmoothingAlpha
+ EMA smoothing for throughput baseline.
+ Range: 0.0 to 1.0. Default: 0.3
+
+.PARAMETER AdaptiveConcurrencyCeiling
+ Upper bound for adaptive concurrency step-ups.
+ Range: 1 to 50. Default: 6
+
+.PARAMETER ExportProgressInterval
+ Frequency of progress updates during export phase.
+ Range: 1 to 10000. Default: 10
+
+.PARAMETER StreamingSchemaSample
+ Number of rows sampled before freezing CSV schema in streaming mode (SERIAL MODE ONLY).
+ Range: 100 to 50000. Default: 5000
+ Higher values capture more columns but delay schema freeze.
+ NOTE: In parallel mode (PS7+), full schema discovery is used instead, scanning ALL rows
+ for 100% column coverage. This parameter only affects serial mode processing.
+
+.PARAMETER StreamingChunkSize
+ Number of rows per write batch in streaming CSV export.
+ Range: 100 to 50000. Default: 5000
+ Lower values reduce memory pressure, higher values improve throughput.
+
+.PARAMETER AgentId
+ Filter to records matching specific AgentId value(s).
+ Example: -AgentId "CopilotStudio.Declarative.abc123"
+
+.PARAMETER AgentsOnly
+ Filter to records with any AgentId present (mutually exclusive with -ExcludeAgents).
+
+.PARAMETER PromptFilter
+ Filter messages by isPrompt property.
+ Options: Prompt (True), Response (False), Both (True/False), Null (undefined)
+
+.PARAMETER CircuitBreakerThreshold
+ Consecutive block failures before opening circuit breaker.
+ Range: 1 to 50. Default: 5
+
+.PARAMETER CircuitBreakerCooldownSeconds
+ Cooldown duration (seconds) after circuit breaker trips.
+ Range: 5 to 3600. Default: 120
+
+.PARAMETER BackoffBaseSeconds
+ Base seconds for exponential backoff between block retries.
+ Range: 0.1 to 120. Default: 1.0
+
+.PARAMETER BackoffMaxSeconds
+ Maximum cap for exponential backoff delay (seconds).
+ Range: 1 to 600. Default: 45
+
+.PARAMETER ExcludeAgents
+ Filter to records WITHOUT AgentId (mutually exclusive with -AgentId/-AgentsOnly).
+
+.PARAMETER UserIds
+ Filter to specific user identifier(s).
+ LIVE MODE: Server-side filtering at Purview (efficient).
+ REPLAY MODE: Client-side filtering from AuditData JSON (slower).
+ Accepts UPN, SMTP address, or user GUID.
+
+.PARAMETER GroupNames
+ Filter to members of distribution/security group(s).
+ LIVE MODE ONLY: Groups automatically expanded after authentication.
+ REPLAY MODE: NOT SUPPORTED (requires authentication).
+
+.PARAMETER Help
+ Display script help information.
+
+.PARAMETER EmitMetricsJson
+ Emit structured metrics JSON alongside output file.
+ Default filename: .metrics.json
+
+.PARAMETER MetricsPath
+ Override metrics output path. Requires -EmitMetricsJson.
+
+.PARAMETER AutoCompleteness
+ Aggressively subdivide windows returning server 10K limit until complete or minimum window reached.
+
+.PARAMETER IncludeTelemetry
+ Export execution telemetry CSV alongside audit data (Graph API mode only).
+ Creates a separate CSV file with one row per partition containing timing and performance metrics.
+ Useful for analyzing query execution patterns, identifying bottlenecks, and capacity planning.
+ File naming: _telemetry_.csv
+ Not available in EOM mode or OnlyUserInfo mode.
+
+.PARAMETER IncludeCopilotInteraction
+ Adds CopilotInteraction to the activity list even when you provide a custom -ActivityTypes array.
+ Useful when combining Copilot telemetry with targeted classic workloads without redefining defaults.
+
+.PARAMETER IncludeM365Usage
+ Adds a curated Microsoft 365 usage bundle spanning Exchange, SharePoint, OneDrive, Teams,
+ Forms, Stream, Planner, PowerApps, and Office desktop apps.
+
+ ACTIVITY TYPES INCLUDED:
+ Authentication: UserLoggedIn
+ Exchange: MailboxLogin, MailItemsAccessed, Send, SendOnBehalf, SoftDelete, HardDelete,
+ MoveToDeletedItems, CopyToFolder
+ SharePoint/OneDrive (Files): FileAccessed, FileDownloaded, FileUploaded, FileModified,
+ FileDeleted, FileMoved, FileCheckedIn, FileCheckedOut, FileRecycled, FileRestored,
+ FileVersionsAllDeleted
+ SharePoint/OneDrive (Sharing): SharingSet, SharingInvitationCreated, SharingInvitationAccepted,
+ SharedLinkCreated, SharingRevoked, AddedToSecureLink, RemovedFromSecureLink, SecureLinkUsed
+ Groups: AddMemberToUnifiedGroup, RemoveMemberFromUnifiedGroup
+ Teams (Team/Channel): TeamCreated, TeamDeleted, TeamArchived, TeamSettingChanged,
+ TeamMemberAdded, TeamMemberRemoved, MemberAdded, MemberRemoved, MemberRoleChanged,
+ ChannelAdded, ChannelDeleted, ChannelSettingChanged, ChannelOwnerResponded,
+ ChannelMessageSent, ChannelMessageDeleted, BotAddedToTeam, BotRemovedFromTeam,
+ TabAdded, TabRemoved, TabUpdated, ConnectorAdded, ConnectorRemoved, ConnectorUpdated
+ Teams (Chat/Messaging): TeamsSessionStarted, ChatCreated, ChatRetrieved, ChatUpdated,
+ MessageSent, MessageRead, MessageDeleted, MessageUpdated, MessagesListed,
+ MessageCreation, MessageCreatedHasLink, MessageEditedHasLink,
+ MessageHostedContentRead, MessageHostedContentsListed, SensitiveContentShared
+ Teams (Meetings): MeetingCreated, MeetingUpdated, MeetingDeleted, MeetingStarted, MeetingEnded,
+ MeetingParticipantJoined, MeetingParticipantLeft, MeetingParticipantRoleChanged,
+ MeetingRecordingStarted, MeetingRecordingEnded, MeetingDetail, MeetingParticipantDetail,
+ LiveNotesUpdate, AINotesUpdate, RecordingExported, TranscriptsExported
+ Teams (Apps/Approvals): AppInstalled, AppUpgraded, AppUninstalled, CreatedApproval,
+ ApprovedRequest, RejectedApprovalRequest, CanceledApprovalRequest
+ Office Apps: Create, Edit, Open, Save, Print
+ Forms: CreateForm, EditForm, DeleteForm, ViewForm, CreateResponse, SubmitResponse,
+ ViewResponse, DeleteResponse
+ Stream: StreamModified, StreamViewed, StreamDeleted, StreamDownloaded
+ Planner: PlanCreated, PlanDeleted, PlanModified, TaskCreated, TaskDeleted, TaskModified,
+ TaskAssigned, TaskCompleted
+ PowerApps: LaunchedApp, CreatedApp, EditedApp, DeletedApp, PublishedApp
+ Copilot: CopilotInteraction
+
+ RECORD TYPES INCLUDED:
+ ExchangeAdmin, ExchangeItem, ExchangeMailbox, SharePointFileOperation,
+ SharePointSharingOperation, SharePoint, OneDrive, MicrosoftTeams, OfficeNative,
+ MicrosoftForms, MicrosoftStream, PlannerPlan, PlannerTask, PowerAppsApp
+
+ IMPORTANT: When this switch is active, -ServiceTypes parameter is ignored and set to null.
+ This ensures optimal single-pass query performance across all Microsoft 365 workloads.
+
+.PARAMETER IncludeDSPMForAI
+ Include DSPM for AI activity types: ConnectedAIAppInteraction, AIInteraction, AIAppInteraction.
+ Note: Some activity types may trigger PAYG billing. See billing information prompt for details.
+
+.PARAMETER ExcludeCopilotInteraction
+ Exclude Microsoft 365 Copilot activity type (CopilotInteraction).
+ Overrides custom list and default behavior. Use with -IncludeDSPMForAI to query only DSPM activity types.
+
+.PARAMETER ExportWorkbook
+ Export data to Excel workbook (.xlsx) instead of CSV files.
+ Without -CombineOutput: Creates multi-tab workbook with one tab per activity type.
+ With -CombineOutput: Creates single-tab workbook with all data combined.
+ Tab formatting: AutoSize columns, freeze top row, bold headers, preserve leading zeros.
+ Requires ImportExcel module (auto-installs if missing).
+ File naming includes "DSPM" suffix only if DSPM features enabled.
+
+.PARAMETER AppendFile
+ Append activity data to an existing output file instead of creating a new timestamped file.
+ Accepts either a filename (combined with -OutputPath) or a full path to the existing file.
+
+ **Filename Resolution:**
+ • Relative filename: -AppendFile "MyReport.csv" → Uses -OutputPath directory
+ • Full path: -AppendFile "C:\Data\Report.xlsx" → Uses exact path specified
+
+ **Requirements:**
+ • File must already exist (create it first without -AppendFile)
+ • File extension must match export mode (.csv without -ExportWorkbook, .xlsx with -ExportWorkbook)
+ • Cannot be used with -IncludeUserInfo or -OnlyUserInfo (EntraUsers data is never appended)
+ • Requires single-file output mode (see Single-File Output Requirements below)
+
+ **Single-File Output Requirements:**
+ Must use ONE of these modes to ensure single output file:
+ 1. Excel mode: -ExportWorkbook (all activity types in one .xlsx with multiple tabs)
+ 2. Combined CSV: -CombineOutput (all activity types merged into one .csv file)
+ 3. Single activity type: -ActivityTypes CopilotInteraction (only one activity type selected)
+
+ **CSV Mode Behavior:**
+ • Appends rows to existing CSV file
+ • Validates headers match exactly (case-sensitive column names and order)
+ • If headers mismatch: Script exits with error showing column differences
+ • Compatible with: Standard (1:1), -ExplodeArrays, -ExplodeDeep modes
+
+ **Excel Mode Behavior:**
+ • Requires -ExportWorkbook parameter
+ • Validates column headers match existing tabs
+ • If headers match: Appends new rows to existing tabs
+ • If headers mismatch: Creates timestamped duplicate tabs (preserves both datasets)
+ • Compatible with both multi-tab and -CombineOutput modes
+
+ **EntraUsers Export Restrictions:**
+ • Cannot use -AppendFile with -IncludeUserInfo or -OnlyUserInfo
+ • EntraUsers data represents a point-in-time snapshot, not time-based activity data
+ • Each EntraUsers export should create a fresh dataset with current user information
+
+ **Always Timestamped (Never Overwritten):**
+ Even when using -AppendFile, these files are always timestamped:
+ • Log files: *_.log
+ • Telemetry files: *_telemetry_.csv
+ • Metrics files: *_metrics_.json (unless -MetricsPath specified)
+
+ **Error Scenarios:**
+ • File not found: Script exits (create initial file first without -AppendFile)
+ • CSV header mismatch: Script exits with detailed column differences
+ • Excel without -ExportWorkbook: Script exits with error
+ • Multiple output files would be created: Script exits with single-file output requirement
+ • EntraUsers modes: Script exits (append not supported for snapshot data)
+
+.PARAMETER CombineOutput
+ Combines all activity types into a single output file or tab.
+
+ **Default Behavior (without -CombineOutput):**
+ • CSV: One separate CSV file per activity type (plus EntraUsers_MAClicensing_.csv if -IncludeUserInfo)
+ • Excel: Multi-tab workbook (one tab per activity type; EntraUsers tab appended last if -IncludeUserInfo)
+
+ **With -CombineOutput switch:**
+ • CSV: Single combined activity file named Purview_Audit_CombinedUsageActivity_.csv (plus separate EntraUsers_MAClicensing_.csv if -IncludeUserInfo)
+ • Excel: First tab named CombinedUsageActivity (no timestamp) with all activity rows; separate EntraUsers tab if -IncludeUserInfo
+
+ Entra user/org data is never merged into the combined activity dataset—always exported separately.
+
+ Recommended: Use combined mode for ingestion pipelines; separated mode for granular analysis.
+
+.PARAMETER Force
+ Force execution without interactive prompts. Automatically accepts defaults for:
+ 1. DSPM for AI Billing Information: Automatically continues when -IncludeDSPMForAI is enabled
+ 2. Conflict Resolution (ExcludeCopilotInteraction): Automatically honors -ExcludeCopilotInteraction when conflict with -ActivityTypes
+ Use this switch for unattended/automated executions (CI/CD pipelines, scheduled tasks).
+
+.PARAMETER SkipDiagnostics
+ Skip pre-query capability diagnostics (advanced).
+
+.PARAMETER UseEOM
+ Use Exchange Online Management mode with Search-UnifiedAuditLog cmdlet.
+ When specified, queries Purview audit logs via EOM PowerShell module (serial processing only).
+ Default mode (when omitted) uses Microsoft Graph Security API with parallel processing support.
+
+ PERMISSIONS REQUIRED (EOM mode):
+ • Exchange Online RBAC Roles:
+ - View-Only Audit Logs role
+ - Compliance Management role group
+ - Organization Management role group
+ - Or custom role with Search-UnifiedAuditLog cmdlet permission
+
+ PARALLEL PROCESSING:
+ EOM mode is SERIAL-ONLY. Parallel processing is automatically disabled.
+ If -EnableParallel or -ParallelMode is specified with -UseEOM, script will exit with error.
+
+
+.PARAMETER IncludeUserInfo
+ Include Entra (Microsoft Entra ID) user directory & Copilot license enrichment (Graph API mode only).
+ Exports an independent EntraUsers file/tab with:
+ • Core identity & profile details (name, UPN, job, department, location, organization info)
+ • Account & sync state (accountEnabled, onPremSync attributes, creation / change stamps)
+ • Manager expansion (identity + basic role/job fields via $expand=manager)
+ • Contact & routing (mail, proxyAddresses flattened, preferredLanguage)
+ • License enrichment (assignedLicenses list + hasLicense boolean for Copilot detection)
+
+ Separation Principle: EntraUsers data is never merged into activity rows; always a distinct artifact.
+
+ Requirements:
+ • Graph API mode (not supported with -UseEOM)
+ • Graph permissions: User.Read.All, Organization.Read.All
+ • One-time directory + license fetch at startup (typ. +10–20s)
+
+ License Detection Logic:
+ 1. Match known Copilot SKU IDs (curated list)
+ 2. Fallback name pattern search containing "Copilot" for future SKUs
+
+ Performance: Single batched fetch + hashtable lookups; no per-record calls.
+
+ Use Cases:
+ • License compliance & adoption
+ • Mapping usage to directory attributes
+ • Identifying unlicensed usage patterns
+
+ Not available in EOM mode.
+
+.PARAMETER OnlyUserInfo
+ Export ONLY Entra user directory and license information (skips all audit log retrieval).
+ This is a specialized mode for quickly exporting user licensing data without querying audit logs.
+
+ BEHAVIOR:
+ • Authenticates to Microsoft Graph
+ • Fetches Entra user directory and license data
+ • Exports standalone EntraUsers_MAClicensing_.csv file
+ • Skips all audit log queries (completes in 5-15 seconds vs. minutes/hours)
+ • Automatically enables -IncludeUserInfo
+
+ OUTPUT:
+ Single CSV file: EntraUsers_MAClicensing_YYYYMMDD_HHMMSS.csv
+ Contains 37 columns including:
+ - Identity fields (UPN, displayName, id, mail)
+ - Profile data (jobTitle, department, officeLocation)
+ - Manager hierarchy (manager info expanded)
+ - License assignments (assignedLicenses + hasCopilotLicense boolean)
+
+ COMPATIBLE PARAMETERS (can be used WITH -OnlyUserInfo):
+ • -OutputPath : Specify output directory
+ • -Auth : Choose authentication method (WebLogin, DeviceCode, etc.)
+ • -ExportWorkbook : Export to Excel instead of CSV
+ • -CombineOutput : (Has no effect, but allowed for script compatibility)
+ • -DisableAdaptive : (Has no effect, but allowed)
+ • -Debug / -Verbose : Enable diagnostic output
+
+ INCOMPATIBLE PARAMETERS (cannot be used with -OnlyUserInfo):
+ Audit Retrieval:
+ • StartDate, EndDate : No audit queries to filter
+ • ActivityTypes : Cleared by -OnlyUserInfo
+ • IncludeDSPMForAI : Activity type modifier
+ • ExcludeCopilotInteraction: Activity type modifier
+ • BlockHours, PartitionHours, MaxPartitions, ResultSize, PacingMs
+ • AutoCompleteness : Audit log completeness checks
+ • StreamingSchemaSample, StreamingChunkSize, ExportProgressInterval
+
+ Filtering:
+ • UserIds, GroupNames : User filtering requires audit logs
+ • AgentId, AgentsOnly, ExcludeAgents: Agent filtering requires audit logs
+ • PromptFilter : Message filtering requires audit logs
+
+ Processing:
+ • ExplodeArrays, ExplodeDeep: Explosion requires audit records
+ • RAWInputCSV : Replay mode exports audit data
+
+ Parallelization:
+ • ParallelMode, MaxParallelGroups, MaxConcurrency, EnableParallel
+ • MaxActivePartitions : Query execution settings
+
+ EOM Mode:
+ • UseEOM : Exchange Online Management mode
+
+ USE CASES:
+ 1. License compliance auditing (quick snapshot of all user licenses)
+ 2. Periodic license data exports for tracking/trending
+ 3. Standalone user directory exports for cross-referencing
+ 4. Rapid licensing status checks without audit log overhead
+
+ EXAMPLES:
+ # Basic user-only export
+ .\PAX_Purview_Audit_Log_Processor.ps1 -OnlyUserInfo
+
+ # Export to specific directory
+ .\PAX_Purview_Audit_Log_Processor.ps1 -OnlyUserInfo -OutputPath "D:\UserData\"
+ .EXAMPLE
+ # Curated Microsoft 365 usage bundle (adds activity, record, and service filters automatically)
+ pwsh -File .\PAX_Purview_Audit_Log_Processor.ps1 -StartDate 2025-11-01 -EndDate 2025-11-02 -IncludeM365Usage -CombineOutput -OutputFile C:\Temp\M365Usage.csv
+
+ # Export as Excel workbook
+ .\PAX_Purview_Audit_Log_Processor.ps1 -OnlyUserInfo -ExportWorkbook
+
+ # Use device code auth (for automation/headless scenarios)
+ .\PAX_Purview_Audit_Log_Processor.ps1 -OnlyUserInfo -Auth DeviceCode
+
+ # App registration auth (client secret)
+ .\PAX_Purview_Audit_Log_Processor.ps1 -Auth AppRegistration -TenantId "" -ClientId "" -ClientSecret (ConvertTo-SecureString "" -AsPlainText -Force)
+
+ # App registration auth (certificate thumbprint)
+ .\PAX_Purview_Audit_Log_Processor.ps1 -Auth AppRegistration -TenantId "" -ClientId "" -ClientCertificateThumbprint ""
+
+ PERFORMANCE:
+ Typical execution time: 5-15 seconds (vs. minutes/hours for audit log queries)
+ Network traffic: Minimal (only user directory + license API calls)
+
+ NOT AVAILABLE IN EOM MODE: Requires Microsoft Graph API (user directory/licenses not in EOM).
+
+.PARAMETER MaxNetworkOutageMinutes
+ Maximum continuous network outage the script will tolerate during audit log operations (query creation, polling, record retrieval).
+ Applies to transient network errors: 502 Bad Gateway, 503 Service Unavailable, 504 Gateway Timeout, connection failures.
+ Script automatically retries failed operations with randomized delays (30-60s) until connectivity is restored or timeout is exceeded.
+ Clean terminal output shows error summaries with countdown timers; full error details logged to file for troubleshooting.
+ Progress is preserved - no data loss during network interruptions.
+ Exceeding this window aborts with clear error message indicating tolerance exceeded.
+ Default: 30 minutes (adjustable 1-120)
+
+.PARAMETER Resume
+ Resume an interrupted operation from a checkpoint file.
+ Checkpoint files are automatically created during all auth modes to allow resumption
+ after Ctrl+C, network failures, token expiry, or any interruption.
+
+ IMPORTANT: Resume mode is STANDALONE.
+ All processing parameters are restored from the checkpoint file.
+ You cannot specify other parameters with -Resume (except auth overrides).
+
+ USAGE:
+ -Resume Auto-discover checkpoint in current directory/OutputPath
+ -Resume "path\to\file" Use specific checkpoint file
+
+ ALLOWED WITH -Resume:
+ -Force Use most recent checkpoint without prompting
+ -Auth Override authentication method
+ -TenantId, -ClientId Override auth credentials (for AppRegistration)
+ -ClientSecret Provide client secret (for AppRegistration)
+
+ NOT ALLOWED WITH -Resume:
+ Any other parameter (dates, activities, explosion settings, etc.)
+ These are all restored from the checkpoint to ensure data consistency.
+
+ CHECKPOINT LOCATION:
+ Files are created in OutputPath with pattern: .pax_checkpoint_.json
+
+#>
+
+param(
+ [Parameter(Mandatory = $false)]
+ [string]$StartDate, # Live mode: if omitted (with EndDate) auto-populated later; Replay: optional filter
+
+ [Parameter(Mandatory = $false)]
+ [string]$EndDate, # Live mode: if omitted (with StartDate) auto-populated; Replay: optional filter
+
+ [Parameter(Mandatory = $false)]
+ [string]$OutputPath = "C:\Temp\",
+
+
+ [Parameter(Mandatory = $false)]
+ [ValidateSet('WebLogin', 'DeviceCode', 'Credential', 'Silent', 'AppRegistration')]
+ [string]$Auth = 'WebLogin',
+
+ [Parameter(Mandatory = $false)]
+ [string]$TenantId,
+
+ [Parameter(Mandatory = $false)]
+ [string]$ClientId,
+
+ [Parameter(Mandatory = $false)]
+ [string]$ClientSecret,
+
+ [Parameter(Mandatory = $false)]
+ [string]$ClientCertificateThumbprint,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateSet('CurrentUser','LocalMachine')]
+ [string]$ClientCertificateStoreLocation = 'CurrentUser',
+
+ [Parameter(Mandatory = $false)]
+ [string]$ClientCertificatePath,
+
+ [Parameter(Mandatory = $false)]
+ [System.Security.SecureString]$ClientCertificatePassword,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0.016667, 24)]
+ [double]$BlockHours = 0.5,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1, 72)]
+ [int]$PartitionHours = 0,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1, 1000)]
+ [int]$MaxPartitions = 160,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1, 10000)]
+ [int]$ResultSize = 10000,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0, 10000)]
+ [int]$PacingMs = 0,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$ActivityTypes = @('CopilotInteraction'),
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$RecordTypes,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$ServiceTypes,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$ExplodeArrays,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$ExplodeDeep,
+ [Parameter(Mandatory = $false)]
+ [int]$FlatDepth = 120,
+ # Offline replay of a previously downloaded raw Purview audit CSV (bypasses live Search-UnifiedAuditLog)
+ [Parameter(Mandatory = $false)]
+ [string]$RAWInputCSV,
+ [Parameter(Mandatory = $false)]
+ # Controls concurrent execution: EOM mode limits serial queries, Graph API mode limits partition parallelism
+ [int]$MaxConcurrency = 10,
+ [Parameter(Mandatory = $false)]
+ [switch]$EnableParallel,
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0, 50)]
+ # Allows multiple activity groups to be processed concurrently (aligns with Microsoft's ~10 query safe limit)
+ [int]$MaxParallelGroups = 8,
+ [Parameter(Mandatory = $false)]
+ [ValidateSet('Off', 'On', 'Auto')]
+ # Default now 'Auto' so that PS 7+ environments engage parallel processing automatically unless explicitly turned Off.
+ [string]$ParallelMode = 'Auto',
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0, 32)]
+ # 0=auto-detect (2-16 threads based on CPU), 1=serial, 2-32=explicit thread count. Requires PS7+.
+ [int]$ExplosionThreads = 0,
+ [Parameter(Mandatory = $false)]
+ [switch]$DisableAdaptive, # Disable adaptive safeguards (memory/latency/concurrency smoothing)
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0.0,1.0)]
+ [double]$ProgressSmoothingAlpha = 0.3, # Weight for smoothing dynamic progress total recalculation (0 => off)
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1000,600000)]
+ [int]$HighLatencyMs = 90000, # Partition average latency threshold (ms) triggering mild concurrency reduction
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(256,32768)]
+ [int]$MemoryPressureMB = 1500, # Working set (MB) threshold to trigger mild concurrency reduction
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(-1,65536)]
+ [int]$MaxMemoryMB = -1, # Max process memory (MB) before flushing $allLogs to disk (-1 = auto 75%, 0 = disabled)
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(100,600000)]
+ [int]$LowLatencyMs = 20000, # Sustained low latency threshold to consider concurrency step-up
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1,10)]
+ [int]$LowLatencyConsecutive = 2, # Required consecutive low-latency groups before step-up
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1,100)]
+ [int]$ThroughputDropPct = 15, # % drop vs baseline required (with high latency) to justify reduction
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0.0,1.0)]
+ [double]$ThroughputSmoothingAlpha = 0.3,# EMA smoothing for throughput baseline
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1,50)]
+ [int]$AdaptiveConcurrencyCeiling = 6, # Upper bound for adaptive step-ups
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1, 10000)]
+ [int]$ExportProgressInterval = 10,
+
+ # Streaming export is always-on
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(100, 50000)]
+ [int]$StreamingSchemaSample = 5000,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(100, 50000)]
+ [int]$StreamingChunkSize = 5000,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$AgentId,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$AgentsOnly,
+
+ [Parameter(Mandatory = $false)]
+ [ValidateSet('Prompt', 'Response', 'Both', 'Null')]
+ [string]$PromptFilter,
+
+ # --- Reliability Enhancements (Backoff & Circuit Breaker) ---
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1,50)]
+ [int]$CircuitBreakerThreshold = 5, # Consecutive block failures before opening circuit breaker
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(5,3600)]
+ [int]$CircuitBreakerCooldownSeconds = 120, # Cooldown duration after breaker trips
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(0.1,120)]
+ [double]$BackoffBaseSeconds = 1.0, # Base seconds for exponential backoff between block retries
+ [Parameter(Mandatory = $false)]
+ [ValidateRange(1,600)]
+ [int]$BackoffMaxSeconds = 45, # Max cap for exponential backoff delay
+
+ [Parameter(Mandatory = $false)]
+ [switch]$ExcludeAgents,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$UserIds,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$GroupNames,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$Help,
+
+ # Emit structured metrics JSON alongside CSV (OutputFile name with .metrics.json)
+ [Parameter(Mandatory = $false)]
+ [switch]$EmitMetricsJson,
+
+ # Override metrics output path (optional). If provided and -EmitMetricsJson specified, writes here instead of OutputFile substitution.
+ [Parameter(Mandatory = $false)]
+ [string]$MetricsPath,
+
+ # Ensure completeness: aggressively subdivide any window still returning server 10K limit until below threshold or min window reached.
+ [Parameter(Mandatory = $false)]
+ [switch]$AutoCompleteness,
+
+ # DSPM for AI: Include DSPM activity types (ConnectedAIAppInteraction, AIInteraction, AIAppInteraction)
+ [Parameter(Mandatory = $false)]
+ [switch]$IncludeCopilotInteraction,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$IncludeM365Usage,
+
+ [Parameter(Mandatory = $false)]
+ [switch]$IncludeDSPMForAI,
+
+ # DSPM for AI: Exclude CopilotInteraction activity type (overrides custom list and default fallback)
+ [Parameter(Mandatory = $false)]
+ [switch]$ExcludeCopilotInteraction,
+
+ # Excel Export: Export data to Excel workbook (.xlsx) instead of CSV files
+ [Parameter(Mandatory = $false)]
+ [switch]$ExportWorkbook,
+
+ # Append data to existing file (CSV or Excel - requires -ExportWorkbook for Excel)
+ # Provide filename (e.g., "MyReport.xlsx") or full path (e.g., "C:\Data\Report.csv")
+ [Parameter(Mandatory = $false)]
+ [string]$AppendFile,
+
+ # Combine all activity types into single output file/tab (CSV or Excel)
+ # CSV default when omitted: separate files per activity type
+ # Excel default when omitted: separate tabs per activity type
+ # Use -CombineOutput switch to merge all activity types into one file/tab
+ [Parameter(Mandatory = $false)]
+ [switch]$CombineOutput,
+
+ # Force execution without interactive prompts (PAYG warning, conflict resolution)
+ [Parameter(Mandatory = $false)]
+ [switch]$Force,
+
+ # Skip pre-query capability diagnostics (advanced)
+ [Parameter(Mandatory = $false)]
+ [switch]$SkipDiagnostics,
+
+ # Use Exchange Online Management mode (Search-UnifiedAuditLog cmdlet, serial-only)
+ [Parameter(Mandatory = $false)]
+ [switch]$UseEOM,
+
+ # Include Entra user directory and license information in export (adds separate EntraUsers file/tab; column count may evolve)
+ [Parameter(Mandatory = $false)]
+ [switch]$IncludeUserInfo,
+
+ # Export only Entra user directory and license information (skips all audit log retrieval)
+ [Parameter(Mandatory = $false)]
+ [switch]$OnlyUserInfo,
+
+ # Maximum minutes to tolerate continuous network outage during Graph async polling & record retrieval (adaptive backoff). Default 30.
+ [Parameter(Mandatory = $false)]
+ [int]$MaxNetworkOutageMinutes = 30,
+
+ # Export Graph API telemetry CSV for partition timing analysis and troubleshooting
+ [Parameter(Mandatory = $false)]
+ [switch]$IncludeTelemetry,
+
+ # Resume from checkpoint file - HANDLED VIA $args (not param block) to support:
+ # -Resume (auto-discover checkpoint in OutputPath)
+ # -Resume "path/to/file" (explicit checkpoint path)
+ # This parameter captures any remaining arguments for manual -Resume parsing
+ [Parameter(Mandatory = $false, ValueFromRemainingArguments = $true)]
+ [string[]]$RemainingArgs
+)
+
+# DEBUG MARKER (removed to reduce noise)
+
+# ============================================================
+# MANUAL -Resume PARAMETER PARSING
+# Enables: -Resume (auto-discover) and -Resume "path" (explicit)
+# ============================================================
+$Resume = $null
+$ResumeSpecified = $false
+if ($RemainingArgs -and $RemainingArgs.Count -gt 0) {
+ for ($i = 0; $i -lt $RemainingArgs.Count; $i++) {
+ if ($RemainingArgs[$i] -eq '-Resume') {
+ $ResumeSpecified = $true
+ # Check if next argument exists and is not another parameter
+ if (($i + 1) -lt $RemainingArgs.Count -and $RemainingArgs[$i + 1] -notmatch '^-') {
+ $Resume = $RemainingArgs[$i + 1]
+ $i++ # Skip the path argument
+ } else {
+ $Resume = '' # Auto-discover mode
+ }
+ }
+ }
+}
+
+# ============================================================
+# PROMOTE AUTH PARAMETERS TO SCRIPT SCOPE
+# Enables access from within functions (e.g., Connect-PurviewAudit)
+# ============================================================
+$script:TenantId = $TenantId
+$script:ClientId = $ClientId
+$script:ClientSecret = $ClientSecret
+$script:ClientCertificateThumbprint = $ClientCertificateThumbprint
+$script:ClientCertificateStoreLocation = $ClientCertificateStoreLocation
+$script:ClientCertificatePath = $ClientCertificatePath
+$script:ClientCertificatePassword = $ClientCertificatePassword
+
+ function Resolve-CommaSeparatedValues {
+ param([string[]]$Values)
+
+ if (-not $Values -or $Values.Count -eq 0) {
+ return $Values
+ }
+
+ $finalActivityTypes = @()
+
+ # Step 1: Add explicit -ActivityTypes parameter values (if provided and not default)
+ if ($PSBoundParameters.ContainsKey('ActivityTypes') -and $ActivityTypes) {
+ foreach ($actType in $ActivityTypes) {
+ if ($actType -and $actType -ne '') {
+ $finalActivityTypes += $actType
+ }
+ }
+ if ($finalActivityTypes.Count -gt 0) {
+ Write-LogHost "Custom ActivityTypes provided: $($finalActivityTypes -join ', ')" -ForegroundColor Gray
+ }
+ }
+
+ # Step 2: Add DSPM for AI activity types if switch enabled
+ if ($IncludeDSPMForAI) {
+ $finalActivityTypes += 'ConnectedAIAppInteraction'
+ $finalActivityTypes += 'AIInteraction'
+ $finalActivityTypes += 'AIAppInteraction'
+ Write-LogHost "DSPM for AI: Adding ConnectedAIAppInteraction, AIInteraction, AIAppInteraction (See billing information for details)" -ForegroundColor Cyan
+ }
+
+ # Step 3: Add CopilotInteraction when explicitly requested
+ if ($IncludeCopilotInteraction -and -not ($finalActivityTypes -contains $copilotBaseActivityType)) {
+ $finalActivityTypes += $copilotBaseActivityType
+ Write-LogHost "IncludeCopilotInteraction: Adding $copilotBaseActivityType (explicit request)" -ForegroundColor Cyan
+ }
+
+ # Step 4: Add Microsoft 365 usage bundle when requested
+ Write-LogHost ("IncludeM365Usage switch present: {0}" -f $IncludeM365Usage.IsPresent) -ForegroundColor DarkGray
+ if ($IncludeM365Usage) {
+ $finalActivityTypes += $m365UsageActivityBundle
+ Write-LogHost ("M365 Usage bundle: Adding {0} activity types across Exchange/SharePoint/OneDrive/Teams" -f $m365UsageActivityBundle.Count) -ForegroundColor Cyan
+
+ $RecordTypes = @(
+ if ($RecordTypes) { $RecordTypes }
+ $m365UsageRecordBundle
+ ) | Where-Object { $_ } | Select-Object -Unique
+ if ($RecordTypes.Count -eq 0) { $RecordTypes = $null }
+
+ # CRITICAL: Do NOT set ServiceTypes for M365 usage mode - Graph API should get ALL workloads in single pass
+ # Multiple serviceFilter values cause unnecessary workload splits (Exchange, SharePoint, OneDrive, Teams)
+ # Instead, send NO serviceFilter and let Graph API return all workloads in one query per partition
+ $ServiceTypes = $null
+
+ if ($RecordTypes) {
+ Write-LogHost "M365 Usage bundle: RecordTypes => $($RecordTypes -join ', ')" -ForegroundColor Gray
+ }
+ Write-LogHost "M365 Usage mode: ServiceTypes => NULL (single workload pass, all services combined)" -ForegroundColor Cyan
+ }
+
+ # Step 5: BASE ACTIVITY TYPE - Add CopilotInteraction as default base type
+ # This is the core Microsoft 365 Copilot activity type (FREE, included in M365 Copilot licensing)
+ # Captures ALL M365 Copilot usage including Teams meetings, Word, Excel, PowerPoint, Outlook, etc.
+ # Auto-add when:
+ # 1. User didn't explicitly provide -ActivityTypes parameter (default behavior), OR
+ # 2. User specified any DSPM switch (implies Copilot context needed)
+ # Exception: Always respect -ExcludeCopilotInteraction (handled in Step 6)
+ $userProvidedCustomTypes = $PSBoundParameters.ContainsKey('ActivityTypes')
+ $userWantsDSPM = $IncludeDSPMForAI
+ if (-not $ExcludeCopilotInteraction) {
+ # Auto-add if no custom types provided OR if DSPM switches used (implies Copilot data needed)
+ if (-not $userProvidedCustomTypes -or $userWantsDSPM) {
+ # Add CopilotInteraction if not already present
+ if (-not ($finalActivityTypes -contains $copilotBaseActivityType)) {
+ $finalActivityTypes = @($copilotBaseActivityType) + $finalActivityTypes
+ }
+ }
+ }
+
+ # Step 6: EXCLUSION OVERRIDE - Remove CopilotInteraction if -ExcludeCopilotInteraction is true
+ if ($ExcludeCopilotInteraction) {
+ $finalActivityTypes = $finalActivityTypes | Where-Object { $_ -ne $copilotBaseActivityType }
+ }
+
+ # Step 7: Final deduplication and validation
+ $finalActivityTypes = @($finalActivityTypes | Select-Object -Unique)
+
+ return $finalActivityTypes
+}
+
+function Send-PromptNotification {
+ <#
+ .SYNOPSIS
+ Plays a system beep to alert user that a prompt requires attention.
+ .DESCRIPTION
+ Useful when user is working in other windows and needs to be notified
+ when a prompt appears that requires input.
+ #>
+
+ try {
+ # Play 3 short beeps to get attention
+ [Console]::Beep(800, 200) # 800Hz for 200ms
+ Start-Sleep -Milliseconds 100
+ [Console]::Beep(1000, 200) # 1000Hz for 200ms
+ Start-Sleep -Milliseconds 100
+ [Console]::Beep(1200, 300) # 1200Hz for 300ms (slightly longer final beep)
+ }
+ catch {
+ # Silently fail if beep not supported (e.g., some server environments)
+ }
+}
+
+# Validate -OnlyUserInfo parameter compatibility
+if ($OnlyUserInfo) {
+ $incompatibleParams = @()
+
+ # Date filtering parameters
+ if ($PSBoundParameters.ContainsKey('StartDate')) { $incompatibleParams += " - StartDate (not applicable for user-only export)" }
+ if ($PSBoundParameters.ContainsKey('EndDate')) { $incompatibleParams += " - EndDate (not applicable for user-only export)" }
+
+ # Activity configuration parameters
+ if ($PSBoundParameters.ContainsKey('ActivityTypes')) { $incompatibleParams += " - ActivityTypes (cleared by -OnlyUserInfo)" }
+ if ($IncludeM365Usage) { $incompatibleParams += " - IncludeM365Usage (activity type modifier)" }
+ if ($IncludeDSPMForAI) { $incompatibleParams += " - IncludeDSPMForAI (activity type modifier)" }
+ if ($ExcludeCopilotInteraction) { $incompatibleParams += " - ExcludeCopilotInteraction (activity type modifier)" }
+
+ # Audit retrieval settings
+ if ($PSBoundParameters.ContainsKey('BlockHours') -and $BlockHours -ne 0.5) { $incompatibleParams += " - BlockHours (audit query partitioning)" }
+ if ($PSBoundParameters.ContainsKey('PartitionHours') -and $PartitionHours -ne 0) { $incompatibleParams += " - PartitionHours (audit query partitioning)" }
+ if ($PSBoundParameters.ContainsKey('MaxPartitions') -and $MaxPartitions -ne 160) { $incompatibleParams += " - MaxPartitions (audit query limits)" }
+ if ($PSBoundParameters.ContainsKey('ResultSize') -and $ResultSize -ne 10000) { $incompatibleParams += " - ResultSize (audit query page size)" }
+ if ($PSBoundParameters.ContainsKey('PacingMs') -and $PacingMs -ne 0) { $incompatibleParams += " - PacingMs (audit query throttling)" }
+ if ($AutoCompleteness) { $incompatibleParams += " - AutoCompleteness (audit log completeness checks)" }
+ if ($PSBoundParameters.ContainsKey('StreamingSchemaSample') -and $StreamingSchemaSample -ne 5000) { $incompatibleParams += " - StreamingSchemaSample (audit record schema sampling)" }
+ if ($PSBoundParameters.ContainsKey('StreamingChunkSize') -and $StreamingChunkSize -ne 5000) { $incompatibleParams += " - StreamingChunkSize (audit streaming batch size)" }
+ if ($PSBoundParameters.ContainsKey('ExportProgressInterval') -and $ExportProgressInterval -ne 10) { $incompatibleParams += " - ExportProgressInterval (audit export progress)" }
+
+ # Filtering parameters
+ if ($PSBoundParameters.ContainsKey('AgentId')) { $incompatibleParams += " - AgentId (audit record filtering)" }
+ if ($AgentsOnly) { $incompatibleParams += " - AgentsOnly (audit record filtering)" }
+ if ($ExcludeAgents) { $incompatibleParams += " - ExcludeAgents (audit record filtering)" }
+ if ($PSBoundParameters.ContainsKey('PromptFilter')) { $incompatibleParams += " - PromptFilter (audit record content filtering)" }
+ if ($PSBoundParameters.ContainsKey('UserIds')) { $incompatibleParams += " - UserIds (audit record filtering; Entra fetch retrieves all users)" }
+ if ($PSBoundParameters.ContainsKey('GroupNames')) { $incompatibleParams += " - GroupNames (audit record filtering)" }
+ if ($PSBoundParameters.ContainsKey('RecordTypes')) { $incompatibleParams += " - RecordTypes (audit record filtering)" }
+ if ($PSBoundParameters.ContainsKey('ServiceTypes')) { $incompatibleParams += " - ServiceTypes (audit record filtering)" }
+
+ # Processing mode parameters
+ if ($ExplodeArrays) { $incompatibleParams += " - ExplodeArrays (audit record array expansion)" }
+ if ($ExplodeDeep) { $incompatibleParams += " - ExplodeDeep (audit record deep expansion)" }
+ if ($PSBoundParameters.ContainsKey('RAWInputCSV')) { $incompatibleParams += " - RAWInputCSV (offline audit replay mode)" }
+
+ # Parallel processing parameters
+ if ($EnableParallel) { $incompatibleParams += " - EnableParallel (parallel audit query execution)" }
+ if ($PSBoundParameters.ContainsKey('MaxConcurrency') -and $MaxConcurrency -ne 10) { $incompatibleParams += " - MaxConcurrency (concurrent query/partition limit)" }
+ if ($PSBoundParameters.ContainsKey('MaxParallelGroups') -and $MaxParallelGroups -ne 8) { $incompatibleParams += " - MaxParallelGroups (parallel activity group limit)" }
+ if ($PSBoundParameters.ContainsKey('ParallelMode') -and $ParallelMode -ne 'Auto') { $incompatibleParams += " - ParallelMode (parallel processing mode)" }
+ if ($DisableAdaptive) { $incompatibleParams += " - DisableAdaptive (adaptive concurrency controls)" }
+ if ($PSBoundParameters.ContainsKey('ProgressSmoothingAlpha') -and $ProgressSmoothingAlpha -ne 0.3) { $incompatibleParams += " - ProgressSmoothingAlpha (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('HighLatencyMs') -and $HighLatencyMs -ne 90000) { $incompatibleParams += " - HighLatencyMs (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('MemoryPressureMB') -and $MemoryPressureMB -ne 1500) { $incompatibleParams += " - MemoryPressureMB (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('LowLatencyMs') -and $LowLatencyMs -ne 20000) { $incompatibleParams += " - LowLatencyMs (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('LowLatencyConsecutive') -and $LowLatencyConsecutive -ne 2) { $incompatibleParams += " - LowLatencyConsecutive (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('ThroughputDropPct') -and $ThroughputDropPct -ne 15) { $incompatibleParams += " - ThroughputDropPct (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('ThroughputSmoothingAlpha') -and $ThroughputSmoothingAlpha -ne 0.3) { $incompatibleParams += " - ThroughputSmoothingAlpha (adaptive tuning)" }
+ if ($PSBoundParameters.ContainsKey('AdaptiveConcurrencyCeiling') -and $AdaptiveConcurrencyCeiling -ne 6) { $incompatibleParams += " - AdaptiveConcurrencyCeiling (adaptive tuning)" }
+
+ # Reliability parameters (audit-specific)
+ if ($PSBoundParameters.ContainsKey('CircuitBreakerThreshold') -and $CircuitBreakerThreshold -ne 5) { $incompatibleParams += " - CircuitBreakerThreshold (block failure circuit breaker)" }
+ if ($PSBoundParameters.ContainsKey('CircuitBreakerCooldownSeconds') -and $CircuitBreakerCooldownSeconds -ne 120) { $incompatibleParams += " - CircuitBreakerCooldownSeconds (circuit breaker cooldown)" }
+ if ($PSBoundParameters.ContainsKey('BackoffBaseSeconds') -and $BackoffBaseSeconds -ne 1.0) { $incompatibleParams += " - BackoffBaseSeconds (block retry backoff)" }
+ if ($PSBoundParameters.ContainsKey('BackoffMaxSeconds') -and $BackoffMaxSeconds -ne 45) { $incompatibleParams += " - BackoffMaxSeconds (block retry max backoff)" }
+
+ # Alternative modes
+ if ($UseEOM) { $incompatibleParams += " - UseEOM (Exchange Online Management mode incompatible with Graph Entra enrichment)" }
+
+ # Output combination parameters
+ if ($CombineOutput) { $incompatibleParams += " - CombineOutput (only relevant with multiple activity types)" }
+ if ($AppendFile) { $incompatibleParams += " - AppendFile (appending user-only data to existing audit output not supported)" }
+
+ if ($incompatibleParams.Count -gt 0) {
+ Write-Host ""
+ Write-Host "ERROR: The -OnlyUserInfo switch cannot be used with the following parameters:" -ForegroundColor Red
+ Write-Host ""
+ $incompatibleParams | ForEach-Object { Write-Host $_ -ForegroundColor Yellow }
+ Write-Host ""
+ Write-Host "The -OnlyUserInfo switch exports only Entra user directory and license information (no audit logs)." -ForegroundColor Cyan
+ Write-Host ""
+ Write-Host "Compatible parameters:" -ForegroundColor Green
+ Write-Host " - OutputPath (where to save the file)" -ForegroundColor White
+ Write-Host " - Auth (authentication method: WebLogin, DeviceCode, Credential, Silent)" -ForegroundColor White
+ Write-Host " - ExportWorkbook (export to Excel format)" -ForegroundColor White
+ Write-Host " - Force (bypass interactive prompts)" -ForegroundColor White
+ Write-Host " - MaxNetworkOutageMinutes (network resilience for Graph API calls)" -ForegroundColor White
+ Write-Host " - EmitMetricsJson (track Entra retrieval metrics)" -ForegroundColor White
+ Write-Host " - MetricsPath (custom metrics output location)" -ForegroundColor White
+ Write-Host " - SkipDiagnostics (skip pre-query capability checks)" -ForegroundColor White
+ Write-Host ""
+ Write-Host "Please remove the incompatible parameters and try again." -ForegroundColor Cyan
+ Write-Host ""
+ exit 1
+ }
+
+ # If validation passes, configure for user-only export
+ Write-Host ""
+ Write-Host "INFO: -OnlyUserInfo mode enabled. Skipping all audit log retrieval, exporting only Entra user data." -ForegroundColor Green
+ Write-Host ""
+ $IncludeUserInfo = $true
+ $ActivityTypes = @()
+}
+
+# Canonical maps for Graph filter normalization
+$recordTypeCanonicalMap = @{
+ 'azureactivedirectory' = 'AzureActiveDirectory'
+ 'azureactivedirectoryaccountlogon' = 'AzureActiveDirectoryAccountLogon'
+ 'azureactivedirectorystslogon' = 'AzureActiveDirectoryStsLogon'
+ 'exchangeadmin' = 'ExchangeAdmin'
+ 'exchangeitem' = 'ExchangeItem'
+ 'exchangemailbox' = 'ExchangeMailbox'
+ 'sharepointfileoperation' = 'SharePointFileOperation'
+ 'sharepointsharingoperation' = 'SharePointSharingOperation'
+ 'sharepoint' = 'SharePoint'
+ 'onedrive' = 'OneDrive'
+ 'microsoftteams' = 'MicrosoftTeams'
+}
+
+$serviceCanonicalMap = @{
+ 'azureactivedirectory' = 'AzureActiveDirectory'
+ 'exchange' = 'Exchange'
+ 'sharepoint' = 'SharePoint'
+ 'onedrive' = 'OneDrive'
+ 'teams' = 'Teams'
+}
+
+# Normalize optional Graph filter passthrough parameters (dedupe & trim)
+# Split ActivityTypes if provided as comma-separated string
+if ($ActivityTypes) {
+ $processedActivityTypes = New-Object System.Collections.Generic.List[string]
+ foreach ($value in $ActivityTypes) {
+ if ($null -eq $value) { continue }
+ $raw = $value.ToString()
+ foreach ($piece in ($raw -split ',')) {
+ $token = $piece.Trim(" '""`t")
+ if ([string]::IsNullOrWhiteSpace($token)) { continue }
+ $processedActivityTypes.Add($token)
+ }
+ }
+ $ActivityTypes = @(
+ $processedActivityTypes |
+ ForEach-Object { $_.Trim() } |
+ Where-Object { $_ } |
+ Select-Object -Unique
+ )
+ if ($ActivityTypes.Count -eq 0) {
+ $ActivityTypes = $null
+ }
+}
+
+if ($RecordTypes) {
+ $processedRecordTypes = New-Object System.Collections.Generic.List[string]
+ foreach ($value in $RecordTypes) {
+ if ($null -eq $value) { continue }
+ $raw = $value.ToString()
+ foreach ($piece in ($raw -split ',')) {
+ $token = $piece.Trim(" '""`t")
+ if ([string]::IsNullOrWhiteSpace($token)) { continue }
+ $processedRecordTypes.Add($token)
+ }
+ }
+ $RecordTypes = @(
+ $processedRecordTypes |
+ ForEach-Object { $_.Trim() } |
+ Where-Object { $_ } |
+ Select-Object -Unique
+ )
+ if ($RecordTypes.Count -eq 0) {
+ $RecordTypes = $null
+ } else {
+ $RecordTypes = @(
+ foreach ($rt in $RecordTypes) {
+ $key = $rt.ToLowerInvariant()
+ if ($recordTypeCanonicalMap.ContainsKey($key)) { $recordTypeCanonicalMap[$key] } else { $rt }
+ }
+ ) | Select-Object -Unique
+ }
+}
+
+if ($ServiceTypes) {
+ $processedServiceTypes = New-Object System.Collections.Generic.List[string]
+ foreach ($value in $ServiceTypes) {
+ if ($null -eq $value) { continue }
+ $raw = $value.ToString()
+ foreach ($piece in ($raw -split ',')) {
+ $token = $piece.Trim(" '""`t")
+ if ([string]::IsNullOrWhiteSpace($token)) { continue }
+ $processedServiceTypes.Add($token)
+ }
+ }
+ $ServiceTypes = @(
+ $processedServiceTypes |
+ ForEach-Object { $_.Trim() } |
+ Where-Object { $_ } |
+ Select-Object -Unique
+ )
+ if ($ServiceTypes.Count -eq 0) {
+ $ServiceTypes = $null
+ } else {
+ $ServiceTypes = @(
+ foreach ($svc in $ServiceTypes) {
+ $key = $svc.ToLowerInvariant()
+ if ($serviceCanonicalMap.ContainsKey($key)) { $serviceCanonicalMap[$key] } else { $svc }
+ }
+ ) | Select-Object -Unique
+ }
+}
+
+# Mapping of audit record types to supported workloads for Graph security audit queries
+$recordTypeWorkloadMap = @{
+ 'azureActiveDirectory' = @('AzureActiveDirectory')
+ 'azureActiveDirectoryAccountLogon' = @('AzureActiveDirectory')
+ 'azureActiveDirectoryStsLogon' = @('AzureActiveDirectory')
+ 'exchangeAdmin' = @('Exchange')
+ 'exchangeItem' = @('Exchange')
+ 'exchangeMailbox' = @('Exchange')
+ 'sharePointFileOperation' = @('SharePoint','OneDrive')
+ 'sharePointSharingOperation' = @('SharePoint','OneDrive')
+ 'sharePoint' = @('SharePoint','OneDrive')
+ 'onedrive' = @('OneDrive')
+ 'microsoftTeams' = @('Teams')
+ # M365 usage record types: Process in first workload pass to avoid creating additional passes
+ # These record types capture cross-workload activities (Office apps, Forms, Stream, Planner, PowerApps)
+ # Mapping to Exchange ensures they run in the first service-filtered pass
+ 'officeNative' = @('Exchange')
+ 'microsoftForms' = @('Exchange')
+ 'microsoftStream' = @('Exchange')
+ 'plannerPlan' = @('Exchange')
+ 'plannerTask' = @('Exchange')
+ 'powerAppsApp' = @('Exchange')
+}
+
+$serviceOperationMap = @{
+ 'AzureActiveDirectory' = @('UserLoggedIn','UserLoginFailed','AdminLoggedIn','ResetUserPassword','AddRegisteredUser','UpdateUser','ChangedUserSetting')
+ 'Exchange' = @('MailboxLogin','MailItemsAccessed','Send','SendOnBehalf','SoftDelete','HardDelete','MoveToDeletedItems','CopyToFolder','NewInboxRule','UpdateInboxRules','AddMailboxPermission','RemoveMailboxPermission')
+ 'SharePoint' = @('FileAccessed','FileDownloaded','FileUploaded','FileModified','FileDeleted','FileMoved','SharingSet','SharingInvitationCreated','SharingInvitationAccepted','SharedLinkCreated','SharingRevoked','AddMemberToUnifiedGroup','RemoveMemberFromUnifiedGroup')
+ 'OneDrive' = @('FileAccessed','FileDownloaded','FileUploaded','FileModified','FileDeleted','FileMoved','SharingSet','SharingInvitationCreated','SharingInvitationAccepted','SharedLinkCreated','SharingRevoked','AddMemberToUnifiedGroup','RemoveMemberFromUnifiedGroup')
+ 'Teams' = @('TeamMemberAdded','TeamMemberRemoved','ChannelAdded','ChannelDeleted','ChannelMessageSent','ChannelMessageDeleted','TeamDeleted','TeamArchived','AddMemberToUnifiedGroup','RemoveMemberFromUnifiedGroup')
+ 'MicrosoftForms' = @('CreateForm','EditForm','DeleteForm','ViewForm','CreateResponse','SubmitResponse','ViewResponse','DeleteResponse')
+ 'MicrosoftStream' = @('StreamModified','StreamViewed','StreamDeleted','StreamDownloaded')
+ 'MicrosoftPlanner' = @('PlanCreated','PlanDeleted','PlanModified','TaskCreated','TaskDeleted','TaskModified','TaskAssigned','TaskCompleted')
+ 'PowerApps' = @('LaunchedApp','CreatedApp','EditedApp','DeletedApp','PublishedApp')
+}
+
+$copilotBaseActivityType = 'CopilotInteraction'
+$m365UsageServiceBundle = @('Exchange','SharePoint','OneDrive','Teams')
+$m365UsageRecordBundle = @('ExchangeAdmin','ExchangeItem','ExchangeMailbox','SharePointFileOperation','SharePointSharingOperation','SharePoint','OneDrive','MicrosoftTeams','OfficeNative','MicrosoftForms','MicrosoftStream','PlannerPlan','PlannerTask','PowerAppsApp')
+# Curated M365 usage operations spanning Exchange/SharePoint/OneDrive/Teams/Forms/Stream/Planner/PowerApps and Office desktop apps (Word/Excel/PowerPoint/OneNote)
+$m365UsageActivityBundle = @(
+ # === Authentication ===
+ 'UserLoggedIn',
+
+ # === Exchange/Email ===
+ 'MailboxLogin','MailItemsAccessed','Send','SendOnBehalf','SoftDelete','HardDelete','MoveToDeletedItems','CopyToFolder',
+
+ # === SharePoint/OneDrive - Files ===
+ 'FileAccessed','FileDownloaded','FileUploaded','FileModified','FileDeleted','FileMoved',
+ 'FileCheckedIn','FileCheckedOut','FileRecycled','FileRestored','FileVersionsAllDeleted',
+
+ # === SharePoint/OneDrive - Sharing ===
+ 'SharingSet','SharingInvitationCreated','SharingInvitationAccepted','SharedLinkCreated','SharingRevoked',
+ 'AddedToSecureLink','RemovedFromSecureLink','SecureLinkUsed',
+
+ # === Groups/Unified Groups ===
+ 'AddMemberToUnifiedGroup','RemoveMemberFromUnifiedGroup',
+
+ # === Teams - Team/Channel management ===
+ 'TeamCreated','TeamDeleted','TeamArchived','TeamSettingChanged',
+ 'TeamMemberAdded','TeamMemberRemoved','MemberAdded','MemberRemoved','MemberRoleChanged',
+ 'ChannelAdded','ChannelDeleted','ChannelSettingChanged','ChannelOwnerResponded',
+ 'ChannelMessageSent','ChannelMessageDeleted',
+ 'BotAddedToTeam','BotRemovedFromTeam',
+ 'TabAdded','TabRemoved','TabUpdated',
+ 'ConnectorAdded','ConnectorRemoved','ConnectorUpdated',
+
+ # === Teams - Chat/Messaging (1:1 and group chats) ===
+ 'TeamsSessionStarted',
+ 'ChatCreated','ChatRetrieved','ChatUpdated',
+ 'MessageSent','MessageRead','MessageDeleted','MessageUpdated','MessagesListed',
+ 'MessageCreation','MessageCreatedHasLink','MessageEditedHasLink',
+ 'MessageHostedContentRead','MessageHostedContentsListed',
+ 'SensitiveContentShared',
+
+ # === Teams - Meeting lifecycle ===
+ 'MeetingCreated','MeetingUpdated','MeetingDeleted',
+ 'MeetingStarted','MeetingEnded',
+ 'MeetingParticipantJoined','MeetingParticipantLeft','MeetingParticipantRoleChanged',
+ 'MeetingRecordingStarted','MeetingRecordingEnded',
+ 'MeetingDetail','MeetingParticipantDetail',
+ 'LiveNotesUpdate','AINotesUpdate',
+ 'RecordingExported','TranscriptsExported',
+
+ # === Teams - Apps/Approvals ===
+ 'AppInstalled','AppUpgraded','AppUninstalled',
+ 'CreatedApproval','ApprovedRequest','RejectedApprovalRequest','CanceledApprovalRequest',
+
+ # === Office apps (Word, Excel, PowerPoint, etc.) ===
+ 'Create','Edit','Open','Save','Print',
+
+ # === Microsoft Forms ===
+ 'CreateForm','EditForm','DeleteForm','ViewForm','CreateResponse','SubmitResponse','ViewResponse','DeleteResponse',
+
+ # === Microsoft Stream ===
+ 'StreamModified','StreamViewed','StreamDeleted','StreamDownloaded',
+
+ # === Planner ===
+ 'PlanCreated','PlanDeleted','PlanModified','TaskCreated','TaskDeleted','TaskModified','TaskAssigned','TaskCompleted',
+
+ # === Power Apps ===
+ 'LaunchedApp','CreatedApp','EditedApp','DeletedApp','PublishedApp',
+
+ # === Copilot ===
+ 'CopilotInteraction'
+) | Select-Object -Unique
+
+# Script version constant (must appear after param/help to keep param() valid as first executable block)
+$ScriptVersion = '1.10.6'
+
+# --- Initialize/Clear persistent script variables to prevent cross-run contamination ---
+# Note: Script-scoped variables persist across multiple script invocations in the same PowerShell session
+$script:partitionStatus = $null
+$script:processedJobIds = $null
+$script:shownJobMessages = $null
+
+# --- Known Microsoft 365 Copilot SKU IDs ---
+# Source: PAX Graph Audit Log Processor + Microsoft official SKU documentation
+$script:CopilotSkuIds = @{
+ 'c815c93d-0759-4bb8-b857-bc921a71be83' = 'Microsoft 365 Copilot' # M365 Copilot
+ '06ebc4ee-1bb5-47dd-8120-11324bc54e06' = 'Microsoft 365 Copilot' # M365 Copilot (alternative)
+ 'a1c5e422-7c00-4433-a276-0f5b5f02e952' = 'Copilot Pro' # Copilot Pro
+ '4a51bca5-1eff-43f5-878c-177680f191af' = 'Microsoft Copilot for Microsoft 365' # Another variant
+ 'f841e8a7-8d86-4eae-af8c-d14b2a4c7228' = 'Microsoft 365 Copilot' # Additional variant
+ 'd814ea5e-2d90-455a-8b9e-2e5e4f3e8e8d' = 'Microsoft Copilot for M365' # Additional variant
+ '440eaaa8-b3e0-484b-a8be-62870b9ba70a' = 'Microsoft 365 Copilot' # Detected from tenant usage
+ # Additional SKUs from Microsoft official documentation (https://learn.microsoft.com/licensing-service-plan-reference)
+ 'ad9c22b3-52d7-4e7e-973c-88121ea96436' = 'Microsoft 365 Copilot (Education Faculty)' # EDU Faculty
+ '15f2e9fc-b782-4f73-bf51-81d8b7fff6f4' = 'Microsoft Copilot for Sales' # Sales Copilot
+ '639dec6b-bb19-468b-871c-c5c441c4b0cb' = 'Copilot for Microsoft 365' # Official product name variant
+}
+
+# --- DSPM for AI: Synchronized Timestamp & OutputPath Validation ---
+
+# Generate synchronized timestamp for all output files in this run
+$global:ScriptRunTimestamp = Get-Date -Format 'yyyyMMdd_HHmmss'
+
+# --- Logging Helper Functions (defined early for use throughout script) ---
+# Log file path will be set after OutputFile resolution; buffer early entries.
+$script:LogFile = $null
+$script:LogBuffer = New-Object System.Collections.Generic.List[string]
+
+function Write-Log {
+ param([Parameter(Mandatory = $true)][string]$Message, [string]$Level = "INFO")
+ $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+ $logEntry = "[$timestamp] [$Level] $Message"
+ Microsoft.PowerShell.Utility\Write-Host $Message
+ try {
+ if ($script:LogFile) { Add-Content -Path $script:LogFile -Value $logEntry -Encoding UTF8 -ErrorAction SilentlyContinue }
+ else { $script:LogBuffer.Add($logEntry) | Out-Null }
+ } catch {}
+}
+
+function Write-LogHost {
+ param([Parameter(Mandatory = $true)][AllowEmptyString()][string]$Message, [string]$ForegroundColor = "White")
+ Microsoft.PowerShell.Utility\Write-Host $Message -ForegroundColor $ForegroundColor
+ try {
+ $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+ $logEntry = "[$timestamp] [INFO] $Message"
+ if ($script:LogFile) { Add-Content -Path $script:LogFile -Value $logEntry -Encoding UTF8 -ErrorAction SilentlyContinue }
+ else { $script:LogBuffer.Add($logEntry) | Out-Null }
+ } catch {}
+}
+
+# Mirror Write-Host to log file with matching signature
+function global:Write-Host {
+ [CmdletBinding()]
+ param(
+ [Parameter(Position=0, ValueFromPipeline=$true, ValueFromRemainingArguments=$true)]
+ $Object,
+ [object] $Separator,
+ [ConsoleColor] $ForegroundColor,
+ [ConsoleColor] $BackgroundColor,
+ [switch] $NoNewLine
+ )
+ process {
+ Microsoft.PowerShell.Utility\Write-Host @PSBoundParameters
+ try {
+ # Compose message
+ $msgItems = @($Object)
+ $msg = ($msgItems | Out-String).TrimEnd()
+ if ($msg) {
+ $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+ $entry = "[$timestamp] [INFO] $msg"
+ if ($script:LogFile) { Add-Content -Path $script:LogFile -Value $entry -Encoding UTF8 -ErrorAction SilentlyContinue }
+ else { $script:LogBuffer.Add($entry) | Out-Null }
+ }
+ } catch {}
+ }
+}
+
+function Get-MaskedUsername {
+ <#
+ .SYNOPSIS
+ Masks a username or email address for secure display in logs and screenshots.
+
+ .DESCRIPTION
+ Converts "admin@contoso.com" to "a******n@contoso.com" to prevent accidental
+ credential exposure in terminal output, screenshots, or log files.
+
+ Preserves first and last character of local part, masks middle with 6 asterisks.
+ Returns original string if input is null, empty, or doesn't contain "@".
+
+ .PARAMETER Username
+ The username or email address to mask
+
+ .OUTPUTS
+ Masked string (e.g., "a******n@contoso.com")
+
+ .EXAMPLE
+ Get-MaskedUsername -Username "admin@contoso.com"
+ Returns: "a******n@contoso.com"
+ #>
+
+ param(
+ [Parameter(Mandatory = $false)]
+ [string]$Username
+ )
+
+ if ([string]::IsNullOrWhiteSpace($Username)) {
+ return $Username
+ }
+
+ # Only mask if it looks like an email address
+ if ($Username -notmatch '@') {
+ return $Username
+ }
+
+ $parts = $Username -split '@'
+ if ($parts.Count -ne 2) {
+ return $Username
+ }
+
+ $localPart = $parts[0]
+ $domain = $parts[1]
+
+ # Handle very short usernames
+ if ($localPart.Length -le 2) {
+ return "$($localPart[0])******@$domain"
+ }
+
+ $first = $localPart[0]
+ $last = $localPart[$localPart.Length - 1]
+ $masked = "$first******$last@$domain"
+
+ return $masked
+}
+
+# --- Helper Function: Detect if PAYG billing is configured in tenant ---
+function Test-PAYGBillingEnabled {
+ <#
+ .SYNOPSIS
+ Attempts to detect if Microsoft Purview PAYG billing is configured in the tenant.
+
+ .DESCRIPTION
+ Checks for indicators that PAYG billing is enabled:
+ - Attempts to query audit records for AIAppInteraction type (PAYG-only)
+ - If records exist or query succeeds without billing errors, PAYG is likely enabled
+ - Returns $null if detection is inconclusive
+
+ .OUTPUTS
+ $true if PAYG billing appears to be enabled
+ $false if PAYG billing appears to be disabled or not configured
+ $null if detection is inconclusive (requires actual query attempt)
+ #>
+
+ # Note: The most reliable way to detect PAYG is to attempt a query for AIAppInteraction
+ # and check for specific error responses. However, this requires actual data/timeframe.
+ # For now, we return $null to indicate "unknown" and let the post-query detection handle it.
+
+ Write-LogHost "PAYG billing detection: Deferred to post-query validation" -ForegroundColor DarkGray
+ return $null
+}
+
+# --- Helper Function: Determine if DSPM features are being used beyond default M365 Copilot types ---
+function Test-DSPMFeaturesEnabled {
+ <#
+ .SYNOPSIS
+ Determines if DSPM for AI features are being used beyond the default M365 Copilot activity types.
+
+ .DESCRIPTION
+ Returns $true if -IncludeDSPMForAI switch is enabled.
+ This adds: ConnectedAIAppInteraction, AIInteraction, AIAppInteraction
+
+ Returns $false if only default M365 Copilot type (CopilotInteraction) is being queried.
+ #>
+ return $IncludeDSPMForAI
+}
+
+# --- Excel Export Validation ---
+
+# AppendFile with Excel requires ExportWorkbook
+if ($AppendFile -and $ExportWorkbook -eq $false) {
+ # AppendFile is fine for CSV (will be handled later), but needs validation for Excel intent
+ # This check is only if user explicitly wants Excel append
+}
+
+if ($AppendFile -and -not $ExportWorkbook -and -not $PSBoundParameters.ContainsKey('ExportWorkbook')) {
+ # User wants AppendFile but didn't specify format - this is OK, will default to CSV append
+}
+
+# Log export mode
+if ($ExportWorkbook) {
+ # Determine Excel output mode based on -CombineOutput parameter
+ if ($CombineOutput) {
+ Write-Host "Excel export mode: Combined activity tab + separate EntraUsers tab (if requested)" -ForegroundColor Cyan
+ } else {
+ # Default for Excel: separated tabs
+ Write-Host "Excel export mode: Multi-tab workbook (one tab per activity type)" -ForegroundColor Cyan
+ }
+
+ if ($AppendFile) {
+ Write-Host "Append mode: Enabled (will validate existing workbook structure)" -ForegroundColor Cyan
+ }
+ Write-Host ""
+} else {
+ # CSV export mode
+ if ($OnlyUserInfo) {
+ # OnlyUserInfo mode: No activity files, just Entra user data
+ Write-Host "CSV export mode: Entra user directory and licensing data only (no audit logs)" -ForegroundColor Cyan
+ } else {
+ # Determine CSV output mode based on -CombineOutput parameter
+ if ($RAWInputCSV -and -not $CombineOutput.IsPresent) { $CombineOutput = [System.Management.Automation.SwitchParameter]::new($true) }
+ if ($CombineOutput.IsPresent -or $RAWInputCSV) {
+ # User specified -CombineOutput switch: combine all activity types
+ $csvModeMsg = "Combined activity file"
+ if ($IncludeUserInfo) { $csvModeMsg += " + separate EntraUsers file" }
+ Write-Host "CSV export mode: $csvModeMsg" -ForegroundColor Cyan
+ } else {
+ # Default for live CSV: separate files per activity type
+ $csvModeMsg = "Separate activity files (one per activity type)"
+ if ($IncludeUserInfo) { $csvModeMsg += " + EntraUsers file" }
+ Write-Host "CSV export mode: $csvModeMsg" -ForegroundColor Cyan
+ }
+ }
+ Write-Host ""
+}
+
+# Validate OutputPath is folder only (no filenames)
+if ($OutputPath) {
+ # Check if path contains file extension or appears to be a filename
+ if ($OutputPath -match '\.[a-zA-Z0-9]{2,4}$' -or ($OutputPath -notmatch '[\\/]$' -and (Split-Path -Leaf $OutputPath) -match '\.')) {
+ Write-Host "ERROR: OutputPath must be a folder path only. Custom filenames are not supported." -ForegroundColor Red
+ Write-Host "The script will automatically generate timestamped filenames based on activity types." -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Example valid paths:" -ForegroundColor Green
+ Write-Host " -OutputPath 'C:\Temp\'" -ForegroundColor Green
+ Write-Host " -OutputPath 'D:\AuditLogs\'" -ForegroundColor Green
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Example INVALID path:" -ForegroundColor Red
+ Write-Host " -OutputPath 'C:\Temp\myfile.csv'" -ForegroundColor Red
+ exit 1
+ }
+
+ # Ensure OutputPath ends with backslash
+ if (-not $OutputPath.EndsWith('\')) {
+ $OutputPath = $OutputPath + '\'
+ }
+
+ # Create directory if it doesn't exist
+ if (-not (Test-Path -Path $OutputPath -PathType Container)) {
+ try {
+ New-Item -Path $OutputPath -ItemType Directory -Force | Out-Null
+ Write-Host "INFO: Created output directory: $OutputPath" -ForegroundColor Green
+ }
+ catch {
+ Write-Host "ERROR: Failed to create output directory: $OutputPath" -ForegroundColor Red
+ Write-Host "Error: $_" -ForegroundColor Red
+ exit 1
+ }
+ }
+}
+
+# Validate AppendFile is not used with EntraUsers export modes
+if ($AppendFile -and ($IncludeUserInfo -or $OnlyUserInfo)) {
+ Write-Host "ERROR: -AppendFile cannot be used with EntraUsers export modes" -ForegroundColor Red
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "EntraUsers data is always overwritten (never appended) because it represents" -ForegroundColor Yellow
+ Write-Host "a point-in-time snapshot of your tenant's user information, not time-based" -ForegroundColor Yellow
+ Write-Host "activity data. Each export should create a fresh EntraUsers dataset." -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Solutions:" -ForegroundColor Green
+ Write-Host " 1. Remove -IncludeUserInfo or -OnlyUserInfo to append activity data only" -ForegroundColor Green
+ Write-Host " 2. Run without -AppendFile to create new timestamped files" -ForegroundColor Green
+ exit 1
+}
+
+# Validate AppendFile has proper filename format
+if ($AppendFile) {
+ # Check if it's a directory path (ends with slash/backslash or has no extension)
+ if ($AppendFile -match '[\\/]$') {
+ Write-Host "ERROR: -AppendFile must specify a filename, not a directory path" -ForegroundColor Red
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Valid examples:" -ForegroundColor Green
+ Write-Host " -AppendFile 'MyReport.xlsx'" -ForegroundColor Green
+ Write-Host " -AppendFile 'C:\Data\Audit\Report.csv'" -ForegroundColor Green
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Invalid examples:" -ForegroundColor Red
+ Write-Host " -AppendFile 'C:\Data\'" -ForegroundColor Red
+ Write-Host " -AppendFile 'C:\Data\Audit\'" -ForegroundColor Red
+ exit 1
+ }
+
+ # Extract file extension
+ $appendExt = [System.IO.Path]::GetExtension($AppendFile).ToLower()
+
+ # Validate extension exists
+ if (-not $appendExt) {
+ Write-Host "ERROR: -AppendFile must include a file extension (.csv or .xlsx)" -ForegroundColor Red
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Valid examples:" -ForegroundColor Green
+ Write-Host " -AppendFile 'MyReport.xlsx'" -ForegroundColor Green
+ Write-Host " -AppendFile 'AuditData.csv'" -ForegroundColor Green
+ exit 1
+ }
+
+ # Validate extension matches export mode
+ if ($ExportWorkbook -and $appendExt -ne '.xlsx') {
+ Write-Host "ERROR: -AppendFile must use .xlsx extension when -ExportWorkbook is specified" -ForegroundColor Red
+ Write-Host "You specified: $AppendFile" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Solutions:" -ForegroundColor Green
+ Write-Host " 1. Change filename to use .xlsx extension" -ForegroundColor Green
+ Write-Host " 2. Remove -ExportWorkbook to append CSV data instead" -ForegroundColor Green
+ exit 1
+ }
+ elseif (-not $ExportWorkbook -and $appendExt -ne '.csv') {
+ Write-Host "ERROR: -AppendFile must use .csv extension for CSV mode" -ForegroundColor Red
+ Write-Host "You specified: $AppendFile" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Solutions:" -ForegroundColor Green
+ Write-Host " 1. Change filename to use .csv extension" -ForegroundColor Green
+ Write-Host " 2. Add -ExportWorkbook to append Excel data instead" -ForegroundColor Green
+ exit 1
+ }
+}
+
+# --- DSPM for AI: Conflict Detection for -ExcludeCopilotInteraction ---
+
+$script:ConflictResolved = $false
+$script:ConflictChoice = $null
+
+# Detect conflicts where user wants to both include AND exclude CopilotInteraction:
+# 1. Explicit include via -ActivityTypes parameter
+# 2. Explicit include via -IncludeCopilotInteraction switch
+# 3. Implicit include via DSPM switches (which require CopilotInteraction for context)
+$explicitInclude = $ActivityTypes -and ($ActivityTypes -contains 'CopilotInteraction')
+$explicitIncludeViaSwitch = $IncludeCopilotInteraction
+$implicitIncludeViaDSPM = $IncludeDSPMForAI
+
+if ($ExcludeCopilotInteraction -and ($explicitInclude -or $explicitIncludeViaSwitch -or $implicitIncludeViaDSPM)) {
+ if (-not $Force) {
+ Write-Host ""
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host "CONFLICT DETECTED" -ForegroundColor Red
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host ""
+
+ if ($explicitInclude) {
+ Write-Host "You provided 'CopilotInteraction' in -ActivityTypes but also specified -ExcludeCopilotInteraction switch." -ForegroundColor Yellow
+ }
+ elseif ($explicitIncludeViaSwitch) {
+ Write-Host "You enabled -IncludeCopilotInteraction but also specified -ExcludeCopilotInteraction." -ForegroundColor Yellow
+ }
+ elseif ($implicitIncludeViaDSPM) {
+ Write-Host "You specified DSPM switches (which require M365 Copilot data for context) but also specified -ExcludeCopilotInteraction." -ForegroundColor Yellow
+ Write-Host "DSPM switches enabled: -IncludeDSPMForAI" -ForegroundColor Cyan
+ }
+
+ Write-Host ""
+ Write-Host "Microsoft 365 Copilot data (CopilotInteraction) includes:" -ForegroundColor Cyan
+ Write-Host " - M365 Copilot (Word, Excel, PowerPoint, Outlook, Teams meetings, etc.)" -ForegroundColor Cyan
+ Write-Host " - Microsoft 365 Copilot Chat (Office.com)" -ForegroundColor Cyan
+ Write-Host " - Security Copilot" -ForegroundColor Cyan
+ Write-Host " - Copilot Studio interactions" -ForegroundColor Cyan
+ Write-Host " - Billing: FREE (included with E5/Audit Standard)" -ForegroundColor Green
+ Write-Host ""
+ Write-Host "Do you want to INCLUDE or EXCLUDE Microsoft 365 Copilot activity type?" -ForegroundColor Yellow
+ Write-Host " [I] INCLUDE - Proceed with M365 Copilot data enabled (override -ExcludeCopilotInteraction switch)" -ForegroundColor Green
+ Write-Host " [E] EXCLUDE - Remove CopilotInteraction (honor -ExcludeCopilotInteraction switch)" -ForegroundColor Red
+ Write-Host ""
+
+ Send-PromptNotification
+ $userChoice = Read-Host "Enter your choice (I/E)"
+
+ if ($userChoice -eq 'I' -or $userChoice -eq 'i') {
+ $ExcludeCopilotInteraction = $false
+ $script:ConflictChoice = 'INCLUDE'
+ Write-Host ""
+ Write-Host "Choice: INCLUDE - Proceeding with CopilotInteraction enabled" -ForegroundColor Green
+ Write-Host ""
+ }
+ elseif ($userChoice -eq 'E' -or $userChoice -eq 'e') {
+ $script:ConflictChoice = 'EXCLUDE'
+ Write-Host ""
+ Write-Host "Choice: EXCLUDE - CopilotInteraction will be removed from ActivityTypes" -ForegroundColor Red
+ Write-Host ""
+ }
+ else {
+ Write-Host ""
+ Write-Host "ERROR: Invalid choice. Please enter 'I' for INCLUDE or 'E' for EXCLUDE." -ForegroundColor Red
+ exit 1
+ }
+
+ $script:ConflictResolved = $true
+ }
+ else {
+ # Force mode - honor ExcludeCopilotInteraction without prompt
+ $script:ConflictChoice = 'EXCLUDE (Force mode)'
+ $script:ConflictResolved = $true
+ }
+}
+
+# ==============================================
+# ImportExcel Module Check (for Excel export)
+# ==============================================
+
+if ($ExportWorkbook) {
+ Write-Host "Checking ImportExcel module for Excel export..." -ForegroundColor Cyan
+
+ $importExcelModule = Get-Module -ListAvailable -Name ImportExcel | Select-Object -First 1
+ if (-not $importExcelModule) {
+ Write-Host "ImportExcel module not found (required for -ExportWorkbook)." -ForegroundColor Yellow
+ Write-Host "Installing ImportExcel module..." -ForegroundColor Yellow
+ Write-Host ""
+
+ try {
+ Install-Module ImportExcel -Scope CurrentUser -Force -AllowClobber -Repository PSGallery -ErrorAction Stop
+ Write-Host "ImportExcel module installed successfully!" -ForegroundColor Green
+ Write-Host ""
+
+ # Re-check for the module
+ $importExcelModule = Get-Module -ListAvailable -Name ImportExcel | Select-Object -First 1
+ if (-not $importExcelModule) {
+ Write-Host "ERROR: Module installation completed but module not found. Try restarting PowerShell." -ForegroundColor Red
+ exit 1
+ }
+ }
+ catch {
+ Write-Host "ERROR: Failed to install ImportExcel module: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host ""
+ Write-Host "Please install manually using:" -ForegroundColor Yellow
+ Write-Host " Install-Module ImportExcel -Scope CurrentUser" -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host "Falling back to CSV export..." -ForegroundColor Yellow
+ $script:ExportWorkbook = $false
+ $script:AppendFile = $false
+ }
+ }
+ else {
+ Write-Host "ImportExcel module detected: $($importExcelModule.Name) v$($importExcelModule.Version)" -ForegroundColor Green
+ }
+
+ # Import ImportExcel module
+ if ($ExportWorkbook) {
+ try {
+ Import-Module ImportExcel -ErrorAction Stop
+ Write-Host "ImportExcel module imported successfully" -ForegroundColor Green
+ Write-Host ""
+ }
+ catch {
+ Write-Host "ERROR: Failed to import ImportExcel module: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host "Falling back to CSV export..." -ForegroundColor Yellow
+ $script:ExportWorkbook = $false
+ $script:AppendFile = $false
+ }
+ }
+}
+
+# --- Early parameter validation & environment sanity checks ---
+
+# PowerShell 5.1 requires -UseEOM mode (Graph API mode requires PS 7+ for ThreadJob parallelism)
+if ($PSVersionTable.PSVersion.Major -lt 7 -and -not $UseEOM -and -not $RAWInputCSV -and -not $Resume) {
+ Write-Host "" -ForegroundColor Red
+ Write-Host "═══════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host " ERROR: PowerShell 5.1 Detected - Graph API Mode Not Supported" -ForegroundColor Red
+ Write-Host "═══════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host ""
+ Write-Host " The default Microsoft Graph API mode requires PowerShell 7+ for parallel query execution." -ForegroundColor Yellow
+ Write-Host " PowerShell 5.1 is supported, but requires -UseEOM (Exchange Online Management) mode." -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host " SOLUTION: Add the -UseEOM switch to your command:" -ForegroundColor Cyan
+ Write-Host ""
+ Write-Host " .\PAX_Purview_Audit_Log_Processor.ps1 -UseEOM [your other parameters]" -ForegroundColor White
+ Write-Host ""
+ Write-Host " OR upgrade to PowerShell 7+ for Graph API mode (recommended for performance):" -ForegroundColor Cyan
+ Write-Host " https://aka.ms/powershell" -ForegroundColor White
+ Write-Host ""
+ Write-Host " EOM MODE NOTES:" -ForegroundColor DarkCyan
+ Write-Host " - Uses Search-UnifiedAuditLog cmdlet (serial processing)" -ForegroundColor Gray
+ Write-Host " - Requires Exchange Online Management module" -ForegroundColor Gray
+ Write-Host " - Requires Exchange Admin role or audit log read permissions" -ForegroundColor Gray
+ Write-Host " - Some features (Entra user enrichment) not available in EOM mode" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host "═══════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ exit 1
+}
+
+if ($ExcludeAgents -and ($AgentId -or $AgentsOnly)) {
+ Write-Host "ERROR: -ExcludeAgents cannot be used with -AgentId or -AgentsOnly switches." -ForegroundColor Red
+ Write-Host "These switches are mutually exclusive:" -ForegroundColor Yellow
+ Write-Host " -AgentId/-AgentsOnly: Filter to ONLY records with agents" -ForegroundColor Yellow
+ Write-Host " -ExcludeAgents: Filter to ONLY records without agents" -ForegroundColor Yellow
+ Write-Host "Please use only one filtering approach and re-run." -ForegroundColor Yellow
+ exit 1
+}
+
+# Validate MaxConcurrency range (Microsoft Purview enforces 10 concurrent search job limit per user account)
+# ============================================================================
+# APPENDFILE COLUMN VALIDATION FUNCTIONS
+# ============================================================================
+# Early validation to prevent wasting time on Graph API queries when explosion
+# parameters don't match between existing file and new data parameters.
+# Note: For explosion modes, actual column schemas are dynamic and vary by data,
+# so we validate explosion parameter compatibility rather than exact columns.
+# ============================================================================
+
+function Get-LikelyExplosionParams {
+ param([string[]]$Columns)
+
+ # Check for deep explosion indicators (CopilotEventData.* columns)
+ $hasDeepColumns = $Columns | Where-Object { $_ -match '^CopilotEventData\.' }
+ if ($hasDeepColumns) {
+ return @{ Mode = "ExplodeDeep"; DisplayName = "-ExplodeDeep" }
+ }
+
+ # Check for array explosion indicators (exploded field names like Message_, Context_, AgentId, etc.)
+ $hasArrayColumns = $Columns | Where-Object { $_ -match '^(Message_|Context_|Interaction_|AgentId|AgentName|AgentVersion|AccessedResource_|AISystemPlugin_)' }
+ if ($hasArrayColumns) {
+ return @{ Mode = "ExplodeArrays"; DisplayName = "-ExplodeArrays" }
+ }
+
+ # Check for standard mode indicators (AuditData JSON column present)
+ $hasAuditData = $Columns -contains 'AuditData'
+ if ($hasAuditData) {
+ return @{ Mode = "Standard"; DisplayName = "Standard (no explosion)" }
+ }
+
+ # Unable to determine
+ return @{ Mode = "Unknown"; DisplayName = "Unknown mode" }
+}
+
+function Test-AppendFileCompatibility {
+ param(
+ [string]$FilePath,
+ [bool]$IsExcel,
+ [bool]$ExplodeArrays,
+ [bool]$ExplodeDeep,
+ [string]$TargetSheet = $null
+ )
+
+ $result = @{
+ Compatible = $true
+ ExistingMode = $null
+ CurrentMode = $null
+ ExistingColumns = @()
+ ExistingCount = 0
+ ErrorMessage = $null
+ }
+
+ try {
+ # Determine current explosion mode
+ if ($ExplodeDeep) {
+ $result.CurrentMode = @{ Mode = "ExplodeDeep"; DisplayName = "-ExplodeDeep" }
+ }
+ elseif ($ExplodeArrays) {
+ $result.CurrentMode = @{ Mode = "ExplodeArrays"; DisplayName = "-ExplodeArrays" }
+ }
+ else {
+ $result.CurrentMode = @{ Mode = "Standard"; DisplayName = "Standard (no explosion)" }
+ }
+
+ # Read existing file columns
+ if ($IsExcel) {
+ # Validate Excel file and read columns
+ if (-not (Get-Module -Name ImportExcel -ListAvailable)) {
+ $result.ErrorMessage = "ImportExcel module not available for validation"
+ $result.Compatible = $false
+ return $result
+ }
+
+ Import-Module ImportExcel -ErrorAction Stop
+
+ # Get sheet info
+ $sheets = Get-ExcelSheetInfo -Path $FilePath -ErrorAction Stop
+
+ if ($TargetSheet) {
+ # Validate specific sheet
+ $sheet = $sheets | Where-Object { $_.Name -eq $TargetSheet }
+ if (-not $sheet) {
+ $result.ErrorMessage = "Target sheet '$TargetSheet' not found in workbook"
+ $result.Compatible = $false
+ return $result
+ }
+ }
+ else {
+ # Use first sheet
+ $sheet = $sheets | Select-Object -First 1
+ }
+
+ # Read header row from Excel
+ $headerData = Import-Excel -Path $FilePath -WorksheetName $sheet.Name -StartRow 1 -EndRow 1 -NoHeader -ErrorAction Stop
+ $existingCols = $headerData[0].PSObject.Properties.Value | Where-Object { $_ }
+ }
+ else {
+ # CSV: Read first line (header)
+ $firstLine = Get-Content -Path $FilePath -First 1 -Encoding UTF8 -ErrorAction Stop
+ $existingCols = ($firstLine -split ',') | ForEach-Object { $_.Trim('"') }
+ }
+
+ $result.ExistingColumns = $existingCols
+ $result.ExistingCount = $existingCols.Count
+
+ # Detect explosion mode of existing file
+ $result.ExistingMode = Get-LikelyExplosionParams -Columns $existingCols
+
+ # Check if explosion modes match
+ if ($result.ExistingMode.Mode -ne $result.CurrentMode.Mode) {
+ $result.Compatible = $false
+ $result.ErrorMessage = "Explosion parameter mismatch: existing file is '$($result.ExistingMode.DisplayName)' but current command uses '$($result.CurrentMode.DisplayName)'"
+ }
+ else {
+ # Modes match - compatible
+ # Note: We don't validate exact columns because explosion schemas are dynamic
+ # and vary based on actual data content. As long as explosion params match,
+ # the append will work correctly.
+ $result.Compatible = $true
+ }
+ }
+ catch {
+ $result.ErrorMessage = $_.Exception.Message
+ $result.Compatible = $false
+ }
+
+ return $result
+}
+
+# ============================================================================
+# END APPENDFILE VALIDATION FUNCTIONS
+# ============================================================================
+
+if ($MaxConcurrency -lt 1 -or $MaxConcurrency -gt 10) {
+ Write-Host "ERROR: -MaxConcurrency must be between 1 and 10." -ForegroundColor Red
+ Write-Host "Microsoft Purview enforces a maximum of 10 concurrent search jobs per user account." -ForegroundColor Yellow
+ Write-Host "Current value: $MaxConcurrency" -ForegroundColor Yellow
+ Write-Host "Please specify a value between 1 and 10 and re-run." -ForegroundColor Yellow
+ exit 1
+}
+
+# Establish date defaults / validation depending on mode.
+if ($RAWInputCSV) {
+ $parsedStart = $null; $parsedEnd = $null
+ if ($PSBoundParameters.ContainsKey('StartDate')) {
+ try { $parsedStart = [datetime]::ParseExact($StartDate, 'yyyy-MM-dd', $null) } catch { Write-Host "ERROR: StartDate must be yyyy-MM-dd if provided." -ForegroundColor Red; exit 1 }
+ }
+ if ($PSBoundParameters.ContainsKey('EndDate')) {
+ try { $parsedEnd = [datetime]::ParseExact($EndDate, 'yyyy-MM-dd', $null) } catch { Write-Host "ERROR: EndDate must be yyyy-MM-dd if provided." -ForegroundColor Red; exit 1 }
+ }
+ if ($parsedStart -and $parsedEnd -and $parsedEnd -lt $parsedStart) { Write-Host "ERROR: EndDate ($EndDate) is earlier than StartDate ($StartDate)." -ForegroundColor Red; exit 1 }
+ if (-not $PSBoundParameters.ContainsKey('StartDate')) { $StartDate = '*' }
+ if (-not $PSBoundParameters.ContainsKey('EndDate')) { $EndDate = '*' }
+}
+else {
+ if (-not $PSBoundParameters.ContainsKey('StartDate') -and -not $PSBoundParameters.ContainsKey('EndDate')) {
+ $yesterdayUtc = (Get-Date).ToUniversalTime().Date.AddDays(-1)
+ $StartDate = $yesterdayUtc.ToString('yyyy-MM-dd')
+ $EndDate = $yesterdayUtc.AddDays(1).ToString('yyyy-MM-dd')
+ }
+ elseif (-not $PSBoundParameters.ContainsKey('StartDate')) {
+ $StartDate = '*'
+ try {
+ $parsedEnd = [datetime]::ParseExact($EndDate, 'yyyy-MM-dd', $null)
+ } catch { Write-Host "ERROR: EndDate must be yyyy-MM-dd format." -ForegroundColor Red; exit 1 }
+ }
+ elseif (-not $PSBoundParameters.ContainsKey('EndDate')) {
+ $EndDate = '*'
+ try {
+ $parsedStart = [datetime]::ParseExact($StartDate, 'yyyy-MM-dd', $null)
+ } catch { Write-Host "ERROR: StartDate must be yyyy-MM-dd format." -ForegroundColor Red; exit 1 }
+ }
+ else {
+ try {
+ $parsedStart = [datetime]::ParseExact($StartDate, 'yyyy-MM-dd', $null)
+ $parsedEnd = [datetime]::ParseExact($EndDate, 'yyyy-MM-dd', $null)
+ }
+ catch { Write-Host "ERROR: StartDate/EndDate must be in yyyy-MM-dd format." -ForegroundColor Red; exit 1 }
+ if ($parsedEnd -lt $parsedStart) { Write-Host "ERROR: EndDate ($EndDate) is earlier than StartDate ($StartDate)." -ForegroundColor Red; exit 1 }
+ }
+}
+
+if ($BlockHours -le 0) { Write-Host "ERROR: BlockHours must be positive." -ForegroundColor Red; exit 1 }
+
+try { if ($PSVersionTable.PSEdition -eq 'Core' -and ($global:InformationPreference -in @('SilentlyContinue', 'Ignore'))) { $global:InformationPreference = 'Continue' } } catch {}
+
+if ($RAWInputCSV) {
+ $rawConflictParams = @('BlockHours', 'ResultSize', 'PacingMs', 'Auth', 'ParallelMode', 'MaxParallelGroups', 'MaxConcurrency', 'EnableParallel', 'GroupNames')
+ $specifiedConflicts = @()
+ foreach ($cp in $rawConflictParams) { if ($PSBoundParameters.ContainsKey($cp)) { $specifiedConflicts += $cp } }
+ if ($specifiedConflicts.Count -gt 0) {
+ Write-Host "ERROR: -RAWInputCSV cannot be combined with live query parameter(s): $($specifiedConflicts -join ', ')" -ForegroundColor Red
+ Write-Host "Remove those conflicting parameters and re-run. Allowed with RAWInputCSV: StartDate, EndDate, ActivityTypes, AgentId, AgentsOnly, UserIds, OutputFile, AppendFile, explosion switches." -ForegroundColor Yellow
+ Write-Host "Note: -GroupNames requires authentication and cannot be used in replay mode. Use -UserIds with explicit email addresses instead." -ForegroundColor Yellow
+ exit 1
+ }
+}
+
+# Validate -UseEOM compatibility with parallel processing
+if ($UseEOM) {
+ $parallelConflicts = @()
+
+ # Check for explicit parallel mode settings
+ if ($PSBoundParameters.ContainsKey('EnableParallel') -and $EnableParallel) {
+ $parallelConflicts += '-EnableParallel'
+ }
+
+ if ($PSBoundParameters.ContainsKey('ParallelMode') -and $ParallelMode -ne 'Off') {
+ $parallelConflicts += "-ParallelMode $ParallelMode"
+ }
+
+ if ($parallelConflicts.Count -gt 0) {
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host " ERROR: -UseEOM Incompatible with Parallel Processing" -ForegroundColor Red
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host ""
+ Write-Host "Exchange Online Management mode (-UseEOM) only supports SERIAL processing." -ForegroundColor Yellow
+ Write-Host "The Search-UnifiedAuditLog cmdlet cannot be used in parallel ThreadJobs due to" -ForegroundColor Gray
+ Write-Host "implicit remoting architecture limitations in the EOM PowerShell module." -ForegroundColor Gray
+ Write-Host ""
+ Write-Host "CONFLICTING PARAMETERS DETECTED:" -ForegroundColor Yellow
+ foreach ($conflict in $parallelConflicts) {
+ Write-Host " • $conflict" -ForegroundColor Red
+ }
+ Write-Host ""
+ Write-Host "RESOLUTION OPTIONS:" -ForegroundColor Cyan
+ Write-Host " 1. Remove -UseEOM switch to enable Graph API mode (supports parallel processing)" -ForegroundColor White
+ Write-Host " 2. Remove parallel parameters and use serial-only processing with -UseEOM" -ForegroundColor White
+ Write-Host " 3. Set -ParallelMode Off explicitly: -UseEOM -ParallelMode Off" -ForegroundColor White
+ Write-Host ""
+ Write-Host "NOTE: Graph API mode (default, no -UseEOM) supports parallel processing in PowerShell 7+." -ForegroundColor Gray
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+
+ # Log to file if log initialized
+ if ($script:logFile -and (Test-Path $script:logFile)) {
+ $timestamp = Get-Date -Format 'yyyy-MM-dd HH:mm:ss'
+ Add-Content -Path $script:logFile -Value "[$timestamp] ERROR: -UseEOM incompatible with parallel processing parameters: $($parallelConflicts -join ', ')"
+ Add-Content -Path $script:logFile -Value "[$timestamp] Script terminated. Resolution: Remove -UseEOM or disable parallel mode."
+ }
+
+ exit 1
+ }
+
+ # Force ParallelMode Off in EOM mode even if Auto is set
+ if ($ParallelMode -ne 'Off') {
+ Write-Host ""
+ Write-Host "NOTE: -UseEOM mode requires serial processing. Forcing -ParallelMode Off." -ForegroundColor Yellow
+ Write-Host ""
+ $ParallelMode = 'Off'
+ }
+}
+
+$script:learnedActivityBlockSize = @{}
+$script:globalLearnedBlockSize = $BlockHours
+$script:subdivisionSequence = @(0.5, 0.25, 0.133333, 0.066667, 0.033333, 0.016667, 0.010417, 0.005556, 0.002778, 0.001389) # 12h, 6h, 3.2h, 1.6h, 48m, 24m, 15m, 8m, 4m, 2m
+$script:Hit10KLimit = $false
+$script:Hit1MLimit = $false # Graph API 1,000,000 record limit per query
+$script:LimitTimeWindow = ""
+$script:SubdividedPartitions = @{} # Track partitions that needed subdivision (key=original range, value=count)
+$script:Connected = $false
+
+# ============================================================================
+# GRAPH API SECURITY AUDIT ENDPOINT VERSION CONFIGURATION
+# ============================================================================
+# Manually configure these variables if Microsoft updates the API version
+# PAX will try CURRENT version first, then fallback to PREVIOUS version
+# ============================================================================
+$script:GraphAuditApiVersion_Current = 'v1.0' # Try this version first (expected GA in Q1 2026)
+$script:GraphAuditApiVersion_Previous = 'beta' # Fallback to this version if current unavailable
+$script:GraphAuditApiVersion = $null # Runtime-detected version (do not edit)
+# ============================================================================
+
+# Suppress PowerShell's web request progress bar (prevents "Reading web response stream" noise)
+$ProgressPreference = 'SilentlyContinue'
+
+# Telemetry tracking for Graph API parallel queries (per-slice lifecycle data)
+$script:telemetryData = @()
+
+$script:metrics = @{
+ StartTime = (Get-Date).ToUniversalTime()
+ QueryMs = 0
+ ExplosionMs = 0
+ ExportMs = 0
+ PagesFetched = 0
+ TotalRecordsFetched = 0
+ TotalStructuredRows = 0
+ ExplosionEvents = 0
+ ExplosionRowsFromEvents = 0
+ ExplosionMaxPerRecord = 0
+ ExplosionTruncated = $false
+ ShrinkEvents = 0
+ Activities = @{}
+ EffectiveChunkSize = 0
+ ParallelBatchSizeFinal = 0
+ ParallelThrottleFinal = 0
+ AgentFilterApplied = $false
+ AgentFilterPreCount = 0
+ AgentFilterPostCount = 0
+ AgentFilterRemovedCount = 0
+ AgentFilterElapsedSec = 0
+ ExcludeAgentsApplied = $false
+ ExcludeAgentsPreCount = 0
+ ExcludeAgentsPostCount = 0
+ ExcludeAgentsRemoved = 0
+ ExcludeAgentsElapsedSec = 0
+ PromptFilterApplied = $false
+ PromptFilterType = ''
+ PromptFilterPreCount = 0
+ PromptFilterPostCount = 0
+ PromptFilterRemovedCount = 0
+ PromptFilterElapsedSec = 0
+ PromptFilterMsgBefore = 0
+ PromptFilterMsgAfter = 0
+ PromptFilterMsgRemoved = 0
+ PromptFilterRecordsMixed = 0
+ PromptFilterRecordsPromptOnly = 0
+ PromptFilterRecordsResponseOnly = 0
+ PromptFilterRecordsNoMessages = 0
+ FilteringSkippedRecords = 0
+ FilteringMissingAuditData = 0
+ FilteringParseFailures = 0
+ FilteringPromptFiltered = 0
+ FilteringAgentFiltered = 0
+ FilteringExcludeAgents = 0
+ FilteringUserIds = 0
+ FilteringGroupNames = 0
+ FilteringOther = 0
+ AdaptiveEvents = @()
+ AdaptiveMemoryReductions = 0
+ AdaptiveLatencyReductions = 0
+ AdaptiveLatencyIncreases = 0
+ ThroughputBaselineRps = 0
+ CircuitBreakerTrips = 0
+ BackoffTotalDelaySeconds = 0
+ PartitionCapsApplied = 0
+ PartitionCapHighestRequested = 0
+}
+
+$script:summaryWritten = $false
+
+# Streaming dataset profiler (live & replay)
+$script:profiler = @{
+ Rows = 0
+ Operations = @{}
+ RecordTypes = @{}
+ HasCopilot = 0
+ MaxDepth = 0
+ DepthCounts = @{}
+ MaxArrayLen = 0
+}
+
+$script:shapeCache = @{}
+
+function Get-RecordShapeKey {
+ param([object]$AuditData)
+ try {
+ $rt = $AuditData.RecordType
+ } catch { $rt = '' }
+ try {
+ $op = $AuditData.Operation
+ } catch { $op = '' }
+ try {
+ $hasCopilot = $AuditData.PSObject.Properties['CopilotEventData'] -ne $null
+ } catch { $hasCopilot = $false }
+ return "$rt|$op|$hasCopilot"
+}
+
+function Get-RecordShape {
+ param([object]$AuditData)
+ if ($null -eq $AuditData) { return $null }
+ $key = Get-RecordShapeKey $AuditData
+ if ($script:shapeCache.ContainsKey($key)) { return $script:shapeCache[$key] }
+ $shape = @{}
+ try {
+ $shape.RecordType = $AuditData.RecordType
+ $shape.Operation = $AuditData.Operation
+ } catch {}
+ try { $shape.HasCopilot = $AuditData.PSObject.Properties['CopilotEventData'] -ne $null } catch { $shape.HasCopilot = $false }
+ try { $shape.Depth = Get-JsonDepth $AuditData 0 } catch { $shape.Depth = 0 }
+ $shape.Mode = if ($shape.HasCopilot) { 'Copilot' } else { 'AuditData' }
+ $script:shapeCache[$key] = $shape
+ return $shape
+}
+
+function Reset-Profiler {
+ $script:profiler = @{
+ Rows = 0
+ Operations = @{}
+ RecordTypes = @{}
+ HasCopilot = 0
+ MaxDepth = 0
+ DepthCounts = @{}
+ MaxArrayLen = 0
+ }
+}
+
+function Get-JsonDepth([object]$node, [int]$d = 0) {
+ if ($null -eq $node -or (Test-ScalarValue $node)) { return $d }
+ if ($node -is [System.Collections.IDictionary]) {
+ $maxd = $d
+ foreach ($v in $node.Values) { $maxd = [math]::Max($maxd, (Get-JsonDepth $v ($d + 1))) }
+ return $maxd
+ }
+ if ($node -is [System.Collections.IEnumerable] -and -not ($node -is [string])) {
+ $maxd = $d
+ $i = 0
+ foreach ($el in $node) { $maxd = [math]::Max($maxd, (Get-JsonDepth $el ($d + 1))); $i++ }
+ if ($i -gt $script:profiler.MaxArrayLen) { $script:profiler.MaxArrayLen = $i }
+ return $maxd
+ }
+ return $d
+}
+
+function Profile-AuditData {
+ param([object]$AuditData)
+ if ($null -eq $AuditData) { return }
+ try {
+ $script:profiler.Rows++
+ # Operation
+ try {
+ $op = $AuditData.Operation
+ if (-not [string]::IsNullOrWhiteSpace($op)) {
+ if (-not $script:profiler.Operations.ContainsKey($op)) { $script:profiler.Operations[$op] = 0 }
+ $script:profiler.Operations[$op] += 1
+ }
+ } catch {}
+ # RecordType
+ try {
+ $rt = $AuditData.RecordType
+ if (-not [string]::IsNullOrWhiteSpace([string]$rt)) {
+ if (-not $script:profiler.RecordTypes.ContainsKey([string]$rt)) { $script:profiler.RecordTypes[[string]$rt] = 0 }
+ $script:profiler.RecordTypes[[string]$rt] += 1
+ }
+ } catch {}
+ # CopilotEventData presence
+ try { if ($AuditData.PSObject.Properties['CopilotEventData']) { $script:profiler.HasCopilot++ } } catch {}
+ # Depth & arrays
+ $depth = Get-JsonDepth $AuditData 0
+ if ($depth -gt $script:profiler.MaxDepth) { $script:profiler.MaxDepth = $depth }
+ if (-not $script:profiler.DepthCounts.ContainsKey($depth)) { $script:profiler.DepthCounts[$depth] = 0 }
+ $script:profiler.DepthCounts[$depth] += 1
+ } catch {}
+}
+
+function Write-ProfilerSummary {
+ param([int]$TopOps = 20, [int]$TopDepths = 10)
+ try {
+ Write-LogHost "Profiler: Rows=$($script:profiler.Rows), MaxDepth=$($script:profiler.MaxDepth), MaxArrayLen=$($script:profiler.MaxArrayLen), HasCopilot=$($script:profiler.HasCopilot)" -ForegroundColor Gray
+ if ($script:profiler.Operations.Count -gt 0) {
+ Write-LogHost "Profiler: Operations (top $TopOps):" -ForegroundColor Gray
+ $script:profiler.Operations.GetEnumerator() | Sort-Object Value -Descending | Select-Object -First $TopOps | ForEach-Object { Write-LogHost " $($_.Key): $($_.Value)" -ForegroundColor Gray }
+ }
+ if ($script:profiler.DepthCounts.Count -gt 0) {
+ Write-LogHost "Profiler: Depth distribution (top $TopDepths):" -ForegroundColor Gray
+ $script:profiler.DepthCounts.GetEnumerator() | Sort-Object Value -Descending | Select-Object -First $TopDepths | ForEach-Object { Write-LogHost " Depth $($_.Key): $($_.Value)" -ForegroundColor Gray }
+ }
+ } catch {}
+}
+$script:adaptiveThroughputBaseline = $null
+$script:adaptiveLowLatencyStreak = 0
+$script:consecutiveBlockFailures = 0
+$script:circuitBreakerOpen = $false
+$script:circuitBreakerOpenUntil = $null
+
+# ==============================================
+# GRAPH API VERSION DETECTION HELPER
+# ==============================================
+# Automatically detects and uses configured current version or falls back to previous version
+# Version configuration is at top of script for easy manual updates
+
+function Get-GraphAuditApiUri {
+ <#
+ .SYNOPSIS
+ Builds Graph API audit endpoint URI with automatic version detection.
+
+ .DESCRIPTION
+ Attempts to use the configured current version first. If not available,
+ falls back to the previous version. Version detection is cached per session.
+
+ Configure versions at top of script:
+ $script:GraphAuditApiVersion_Current = 'v1.0' (try first)
+ $script:GraphAuditApiVersion_Previous = 'beta' (fallback)
+
+ .PARAMETER Path
+ The audit API path (e.g., "queries", "queries/{id}", "queries/{id}/records")
+
+ .OUTPUTS
+ String - Full Graph API URI with appropriate version
+
+ .EXAMPLE
+ $uri = Get-GraphAuditApiUri -Path "queries"
+ # Returns: https://graph.microsoft.com/v1.0/security/auditLog/queries
+ # or: https://graph.microsoft.com/beta/security/auditLog/queries (if v1.0 unavailable)
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$Path
+ )
+
+ # Auto-detect version on first use (cached for session)
+ if ($null -eq $script:GraphAuditApiVersion) {
+ $currentVer = $script:GraphAuditApiVersion_Current
+ $previousVer = $script:GraphAuditApiVersion_Previous
+
+ try {
+ # Test if current version endpoint is available
+ $testUri = "https://graph.microsoft.com/$currentVer/security/auditLog/queries"
+ Invoke-MgGraphRequest -Method GET -Uri $testUri -ErrorAction Stop | Out-Null
+ $script:GraphAuditApiVersion = $currentVer
+ Write-LogHost "Graph API: security/auditLog endpoint using version $currentVer" -ForegroundColor Green
+ } catch {
+ # Current version not available, fallback to previous
+ $script:GraphAuditApiVersion = $previousVer
+ Write-LogHost "Graph API: security/auditLog endpoint using version $previousVer (fallback from $currentVer)" -ForegroundColor Yellow
+ }
+ }
+
+ return "https://graph.microsoft.com/$($script:GraphAuditApiVersion)/security/auditLog/$Path"
+}
+
+# ==============================================
+# CTRL+C GRACEFUL EXIT HANDLER
+# ==============================================
+# Track Ctrl+C state for graceful exit messaging in finally block
+
+$script:CtrlCPressed = $false
+$script:ScriptCompleted = $false
+$script:EarlyExit = $false
+
+# Register exit handler that ALWAYS runs when PowerShell exits
+# This works even when Ctrl+C is pressed before the try block (e.g., during module loading)
+# Uses environment variable for cross-runspace communication since Register-EngineEvent runs in isolated scope
+$env:PAX_GRACEFUL_EXIT_DONE = $null
+$env:PAX_REPLAY_MODE = $null # Will be set to "1" when RAWInputCSV is used
+Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
+ if (-not $env:PAX_GRACEFUL_EXIT_DONE) {
+ # Skip interrupt messaging in replay mode - no Graph connection to disconnect
+ if (-not $env:PAX_REPLAY_MODE) {
+ Write-Host ""
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host " Script Interrupted - Performing Graceful Cleanup" -ForegroundColor Yellow
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host " Cleanup complete. Exiting..." -ForegroundColor Green
+ Write-Host ""
+ }
+ }
+} | Out-Null
+
+# Define cleanup function (used by catch block for PipelineStoppedException)
+function Invoke-GracefulExit {
+ param([string]$Reason = "Script interrupted")
+
+ if ($script:CtrlCPressed) { return } # Prevent multiple invocations
+ $script:CtrlCPressed = $true
+
+ # Signal to engine event handler that graceful exit is handling this
+ $env:PAX_GRACEFUL_EXIT_DONE = "1"
+
+ # Skip interrupt messaging and Graph disconnect in replay mode - no connections to clean up
+ if ($env:PAX_REPLAY_MODE) {
+ exit 0
+ }
+
+ Write-Host ""
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host " Script Interrupted - Performing Graceful Cleanup" -ForegroundColor Yellow
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host ""
+
+ # Disconnect from Microsoft Graph - ALWAYS attempt disconnect
+ Write-Host " Disconnecting from Microsoft Graph..." -ForegroundColor Cyan
+ try {
+ Disconnect-MgGraph -ErrorAction Stop | Out-Null
+ Write-Host " Microsoft Graph disconnected" -ForegroundColor Green
+ }
+ catch {
+ if ($_.Exception.Message -match 'No application to sign out from') {
+ Write-Host " (Not connected to Microsoft Graph)" -ForegroundColor DarkGray
+ } else {
+ Write-Host " Microsoft Graph session cleared" -ForegroundColor Green
+ }
+ }
+
+ # Disconnect from Exchange Online (if connected via EOM mode)
+ try {
+ $eomSession = Get-PSSession | Where-Object { $_.ConfigurationName -eq 'Microsoft.Exchange' -and $_.State -eq 'Opened' }
+ if ($eomSession) {
+ Write-Host " Disconnecting from Exchange Online Management..." -ForegroundColor Cyan
+ Disconnect-ExchangeOnline -Confirm:$false -ErrorAction SilentlyContinue | Out-Null
+ Write-Host " Exchange Online disconnected" -ForegroundColor Green
+ }
+ }
+ catch {
+ Write-Host " (Exchange Online cleanup completed)" -ForegroundColor Gray
+ }
+
+ # Log the graceful exit
+ if ($LogFile -and (Test-Path $LogFile)) {
+ Write-Output "" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ Write-Output "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] Script interrupted by user (Ctrl+C)" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ Write-Output "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] Graceful cleanup completed" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ }
+
+ # Show checkpoint resume message if checkpoint is enabled
+ if ($script:CheckpointEnabled -and $script:CheckpointPath -and (Test-Path $script:CheckpointPath)) {
+ Show-CheckpointExitMessage
+ }
+
+ Write-Host ""
+ Write-Host " Cleanup complete. Exiting..." -ForegroundColor Green
+ Write-Host ""
+
+ # Exit cleanly (env var PAX_GRACEFUL_EXIT_DONE already set at function start)
+ exit 0
+}
+
+# Trap for catching terminating errors (including Ctrl+C)
+trap {
+ if ($_.Exception -is [System.Management.Automation.PipelineStoppedException]) {
+ Invoke-GracefulExit
+ break
+ }
+ # Re-throw other exceptions
+ throw $_
+}
+
+# ==============================================
+# MODULE PREREQUISITES
+# ==============================================
+# Load required modules based on mode selection (-UseEOM vs Graph API default)
+
+if ($RAWInputCSV) {
+ # Set replay mode flag for graceful exit handling (skip Graph disconnect messaging)
+ $env:PAX_REPLAY_MODE = "1"
+ Write-LogHost "`nReplay mode: Skipping module loading`n" -ForegroundColor Cyan
+}
+elseif (-not $UseEOM) {
+ # DEFAULT MODE: Microsoft Graph Security API
+ # Requires Microsoft.Graph.Authentication and Microsoft.Graph.Security modules
+ Write-LogHost "`nLoading Microsoft Graph modules..." -ForegroundColor Cyan
+
+ try {
+ # ============================================
+ # AUTO-UPDATE CHECK: Ensure latest SDK version
+ # ============================================
+ # The Graph Security auditLog API has known issues with older SDK versions.
+ # Always check for and install the latest version to ensure compatibility.
+
+ Write-LogHost " Checking for Microsoft Graph SDK updates..." -ForegroundColor Gray
+
+ # Get currently installed version
+ $installedAuth = Get-Module -ListAvailable -Name Microsoft.Graph.Authentication | Sort-Object Version -Descending | Select-Object -First 1
+ $installedAuthVersion = if ($installedAuth) { $installedAuth.Version } else { [Version]"0.0.0" }
+
+ # Check PSGallery for latest version (with 15-second timeout to avoid hangs)
+ $latestAuthVersion = $null
+ try {
+ $updateCheckJob = Start-Job -ScriptBlock { Find-Module -Name Microsoft.Graph.Authentication -Repository PSGallery -ErrorAction Stop }
+ $jobCompleted = Wait-Job -Job $updateCheckJob -Timeout 15
+ if ($jobCompleted) {
+ $galleryAuth = Receive-Job -Job $updateCheckJob -ErrorAction Stop
+ $latestAuthVersion = [Version]$galleryAuth.Version
+ }
+ else {
+ Write-LogHost " PSGallery check timed out (15s) - skipping update check" -ForegroundColor Yellow
+ }
+ Remove-Job -Job $updateCheckJob -Force -ErrorAction SilentlyContinue
+ }
+ catch {
+ Write-LogHost " Warning: Could not check PSGallery for updates: $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost " Continuing with installed version..." -ForegroundColor Yellow
+ }
+
+ # Update if newer version available
+ $updatePerformed = $false
+ if ($latestAuthVersion -and ($latestAuthVersion -gt $installedAuthVersion)) {
+ Write-LogHost " Update available: v$installedAuthVersion → v$latestAuthVersion" -ForegroundColor Yellow
+ Write-LogHost " Installing Microsoft.Graph.Authentication v$latestAuthVersion..." -ForegroundColor Yellow
+
+ try {
+ # Install latest Authentication module
+ Install-Module -Name Microsoft.Graph.Authentication -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop
+ Write-LogHost " Microsoft.Graph.Authentication updated to v$latestAuthVersion" -ForegroundColor Green
+
+ # Install matching Security module
+ Write-LogHost " Installing Microsoft.Graph.Security v$latestAuthVersion..." -ForegroundColor Yellow
+ Install-Module -Name Microsoft.Graph.Security -RequiredVersion $latestAuthVersion -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop
+ Write-LogHost " Microsoft.Graph.Security updated to v$latestAuthVersion" -ForegroundColor Green
+
+ $updatePerformed = $true
+
+ # Refresh module info after update
+ $installedAuth = Get-Module -ListAvailable -Name Microsoft.Graph.Authentication | Sort-Object Version -Descending | Select-Object -First 1
+ $installedAuthVersion = $installedAuth.Version
+ }
+ catch {
+ Write-LogHost " Warning: Update failed: $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost " Continuing with existing version v$installedAuthVersion..." -ForegroundColor Yellow
+ }
+ }
+ elseif ($latestAuthVersion) {
+ Write-LogHost " Microsoft Graph SDK is up to date (v$installedAuthVersion)" -ForegroundColor Green
+ }
+ else {
+ Write-LogHost " Using installed version v$installedAuthVersion" -ForegroundColor Gray
+ }
+
+ # ============================================
+ # LOAD MODULES
+ # ============================================
+
+ $authModule = Get-Module -Name Microsoft.Graph.Authentication | Select-Object -First 1
+ if (-not $authModule) {
+ $authModule = Get-Module -ListAvailable -Name Microsoft.Graph.Authentication | Sort-Object Version -Descending | Select-Object -First 1
+ }
+ if (-not $authModule) {
+ Write-LogHost " Installing Microsoft.Graph.Authentication module (CurrentUser scope)..." -ForegroundColor Yellow
+ Install-Module -Name Microsoft.Graph.Authentication -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop
+ $authModule = Get-Module -ListAvailable -Name Microsoft.Graph.Authentication | Sort-Object Version -Descending | Select-Object -First 1
+ }
+ $authVersion = $authModule.Version
+ Write-LogHost " Importing Microsoft.Graph.Authentication v$authVersion..." -ForegroundColor Gray
+ Import-Module Microsoft.Graph.Authentication -RequiredVersion $authVersion -Force -ErrorAction Stop
+ Write-LogHost " Microsoft.Graph.Authentication v$authVersion loaded" -ForegroundColor Green
+
+ # Load Microsoft.Graph.Security matching auth version (exact if possible, otherwise same major/minor)
+ $securityModule = Get-Module -Name Microsoft.Graph.Security | Where-Object { $_.Version -eq $authVersion } | Select-Object -First 1
+ if (-not $securityModule) {
+ $securityModule = Get-Module -ListAvailable -Name Microsoft.Graph.Security |
+ Where-Object { $_.Version.Major -eq $authVersion.Major -and $_.Version.Minor -eq $authVersion.Minor } |
+ Sort-Object Version -Descending |
+ Select-Object -First 1
+ }
+ if (-not $securityModule) {
+ Write-LogHost " Installing Microsoft.Graph.Security v$authVersion (CurrentUser scope)..." -ForegroundColor Yellow
+ Install-Module -Name Microsoft.Graph.Security -RequiredVersion $authVersion -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop
+ $securityModule = Get-Module -ListAvailable -Name Microsoft.Graph.Security | Where-Object { $_.Version -eq $authVersion } | Select-Object -First 1
+ }
+ $secVersion = $securityModule.Version
+ Write-LogHost " Importing Microsoft.Graph.Security v$secVersion..." -ForegroundColor Gray
+ Import-Module Microsoft.Graph.Security -RequiredVersion $secVersion -Force -ErrorAction Stop
+ Write-LogHost " Microsoft.Graph.Security v$secVersion loaded" -ForegroundColor Green
+ }
+ catch {
+ Write-LogHost " ERROR: Failed to load Microsoft Graph module: $($_.Exception.Message)" -ForegroundColor Red
+ Write-LogHost "`nTroubleshooting:" -ForegroundColor Yellow
+ Write-LogHost " 1. Ensure PowerShell Gallery access is available" -ForegroundColor White
+ Write-LogHost " 2. Try manual installation: Install-Module -Name Microsoft.Graph -Force" -ForegroundColor White
+ Write-LogHost " 3. Use -UseEOM switch to fall back to Exchange Online Management mode" -ForegroundColor White
+ throw
+ }
+
+ Write-LogHost "Microsoft Graph modules loaded successfully`n" -ForegroundColor Green
+}
+else {
+ # EOM MODE: Exchange Online Management
+ # Graph modules not required in EOM mode
+ Write-LogHost "`nEOM Mode: Skipping Microsoft Graph module loading`n" -ForegroundColor Cyan
+}
+
+# ==============================================
+# DUAL-MODE AUTHENTICATION FUNCTION
+# ==============================================
+# Unified authentication supporting both EOM and Graph API modes
+
+function Connect-PurviewAudit {
+ <#
+ .SYNOPSIS
+ Unified authentication for Purview audit log access via EOM or Graph API.
+
+ .DESCRIPTION
+ Authenticates to Microsoft 365 using either Exchange Online Management (EOM)
+ or Microsoft Graph Security API based on the -UseEOM switch.
+
+ EOM Mode (-UseEOM):
+ - Uses Connect-ExchangeOnline cmdlet
+ - Requires Exchange Online RBAC roles
+ - Serial processing only
+
+ Graph API Mode (Default):
+ - Uses Connect-MgGraph with AuditLog.Read.All scope
+ - Requires Azure AD roles + Graph API permissions
+ - Supports parallel processing
+
+ .PARAMETER AuthMethod
+ Authentication method: WebLogin, DeviceCode, Credential, Silent, AppRegistration
+
+ .PARAMETER UseEOMMode
+ If true, use EOM mode. If false, use Graph API mode.
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [ValidateSet('WebLogin', 'DeviceCode', 'Credential', 'Silent', 'AppRegistration')]
+ [string]$AuthMethod,
+
+ [Parameter(Mandatory = $false)]
+ [bool]$UseEOMMode = $false
+ )
+
+ if ($UseEOMMode) {
+ # ========================================
+ # EOM MODE: Exchange Online Management
+ # ========================================
+
+ if ($script:Connected) {
+ Write-LogHost "Already connected to Exchange Online." -ForegroundColor Gray
+ return
+ }
+
+ Write-LogHost "Connecting to Microsoft 365 Security & Compliance Center (EOM)..." -ForegroundColor Cyan
+
+ # Ensure ExchangeOnlineManagement module is available
+ try {
+ $existingEOM = Get-Module -ListAvailable -Name ExchangeOnlineManagement | Sort-Object Version -Descending | Select-Object -First 1
+ if (-not $existingEOM) {
+ Write-LogHost "Installing ExchangeOnlineManagement module (CurrentUser scope)..." -ForegroundColor Yellow
+ Install-Module -Name ExchangeOnlineManagement -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop
+ }
+ Import-Module ExchangeOnlineManagement -Force -ErrorAction Stop
+
+ $eomVersion = (Get-Module ExchangeOnlineManagement).Version
+ Write-LogHost " ExchangeOnlineManagement v$eomVersion loaded" -ForegroundColor Green
+ }
+ catch {
+ Write-LogHost "ERROR: Module load/install failure: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+
+ # Authenticate based on method
+ try {
+ switch ($AuthMethod.ToLower()) {
+ 'appregistration' {
+ Write-LogHost "AppRegistration authentication is not supported with -UseEOM. Remove -UseEOM to use Graph mode." -ForegroundColor Yellow
+ throw "AppRegistration authentication is only available in Graph API mode"
+ }
+ 'weblogin' {
+ $exoCmd = Get-Command Connect-ExchangeOnline -ErrorAction Stop
+ $hasUseWeb = $exoCmd.Parameters.ContainsKey('UseWebLogin')
+
+ if ($hasUseWeb) {
+ Write-LogHost "Using Connect-ExchangeOnline -UseWebLogin..." -ForegroundColor Gray
+ Connect-ExchangeOnline -ShowBanner:$false -UseWebLogin -ErrorAction Stop | Out-Null
+ }
+ else {
+ Write-LogHost "UseWebLogin parameter not available; using standard interactive auth..." -ForegroundColor Yellow
+ Connect-ExchangeOnline -ShowBanner:$false -ErrorAction Stop | Out-Null
+ }
+ }
+
+ 'devicecode' {
+ Write-LogHost "Using device code flow..." -ForegroundColor Gray
+ Connect-ExchangeOnline -ShowBanner:$false -Device -ErrorAction Stop | Out-Null
+ }
+
+ 'credential' {
+ Write-LogHost "Using credential-based authentication..." -ForegroundColor Gray
+ $cred = Get-Credential -Message 'Enter admin credentials for Exchange Online'
+ Connect-ExchangeOnline -ShowBanner:$false -Credential $cred -ErrorAction Stop | Out-Null
+ }
+
+ 'silent' {
+ Write-LogHost "Attempting silent authentication..." -ForegroundColor Gray
+ $silentOk = $true
+ try {
+ Connect-ExchangeOnline -ShowBanner:$false -ErrorAction Stop | Out-Null
+ }
+ catch {
+ $silentOk = $false
+ }
+
+ if (-not $silentOk) {
+ Write-LogHost "Silent auth failed, falling back to WebLogin..." -ForegroundColor Yellow
+ try {
+ Connect-ExchangeOnline -ShowBanner:$false -UseWebLogin -ErrorAction Stop | Out-Null
+ }
+ catch {
+ Write-LogHost "ERROR: Silent + fallback auth failed: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+ }
+ }
+ }
+
+ $script:Connected = $true
+ Write-LogHost "Successfully connected to Exchange Online" -ForegroundColor Green
+
+ # Verify connection
+ try {
+ $connInfo = Get-ConnectionInformation -ErrorAction SilentlyContinue | Where-Object { $_.TokenStatus -ne 'Expired' } | Select-Object -First 1
+ if ($connInfo) {
+ Write-LogHost " Tenant ID: $($connInfo.TenantId)" -ForegroundColor Gray
+ Write-LogHost " User: $($connInfo.UserPrincipalName)" -ForegroundColor Gray
+ }
+ }
+ catch {
+ # Connection info not critical, continue
+ }
+ }
+ catch {
+ Write-LogHost "ERROR: EOM authentication failed: $($_.Exception.Message)" -ForegroundColor Red
+ Write-LogHost "`nTroubleshooting:" -ForegroundColor Yellow
+ Write-LogHost " 1. Verify you have required Exchange Online roles" -ForegroundColor White
+ Write-LogHost " 2. Check Multi-Factor Authentication requirements" -ForegroundColor White
+ Write-LogHost " 3. Try a different auth method (-Auth parameter)" -ForegroundColor White
+ throw
+ }
+ }
+ else {
+ # ========================================
+ # GRAPH API MODE: Microsoft Graph Security
+ # ========================================
+
+ Write-LogHost "Connecting to Microsoft Graph Security API..." -ForegroundColor Cyan
+
+ # Define required scopes for Purview audit log access via beta endpoint
+ # ThreatIntelligence.Read.All is required for GET operations on beta endpoint
+ # Service-specific AuditLogsQuery-*.Read.All permissions are required for record retrieval
+ $RequiredScopes = @(
+ 'AuditLog.Read.All' # Primary scope for audit log queries
+ 'ThreatIntelligence.Read.All' # Required for GET operations (beta API)
+ 'AuditLogsQuery-Entra.Read.All' # Entra ID (Azure AD) audit logs
+ 'AuditLogsQuery-Exchange.Read.All' # Exchange Online audit logs
+ 'AuditLogsQuery-OneDrive.Read.All' # OneDrive audit logs
+ 'AuditLogsQuery-SharePoint.Read.All' # SharePoint Online audit logs
+ 'Organization.Read.All' # Required for tenant-level metadata (subscribedSkus, license fetch)
+ )
+
+ try {
+ switch ($AuthMethod.ToLower()) {
+ 'weblogin' {
+ Write-LogHost "Using interactive browser authentication..." -ForegroundColor Gray
+ Connect-MgGraph -Scopes $RequiredScopes -NoWelcome -ErrorAction Stop
+ }
+
+ 'devicecode' {
+ Write-LogHost "Using device code flow..." -ForegroundColor Gray
+ Write-LogHost "A browser window will open. Follow the instructions to authenticate." -ForegroundColor Yellow
+ Connect-MgGraph -Scopes $RequiredScopes -UseDeviceCode -NoWelcome -ErrorAction Stop
+ }
+
+ 'credential' {
+ Write-LogHost "Using client secret credential..." -ForegroundColor Gray
+
+ # Check for required environment variables
+ $tenantId = $env:GRAPH_TENANT_ID
+ $clientId = $env:GRAPH_CLIENT_ID
+ $clientSecret = $env:GRAPH_CLIENT_SECRET
+
+ if (-not $tenantId -or -not $clientId -or -not $clientSecret) {
+ Write-LogHost "ERROR: Credential authentication requires environment variables:" -ForegroundColor Red
+ Write-LogHost " GRAPH_TENANT_ID : Your Azure AD Tenant ID" -ForegroundColor Yellow
+ Write-LogHost " GRAPH_CLIENT_ID : Your App Registration Client ID" -ForegroundColor Yellow
+ Write-LogHost " GRAPH_CLIENT_SECRET : Your App Registration Client Secret" -ForegroundColor Yellow
+ Write-LogHost ""
+ Write-LogHost "Set these variables before running the script:" -ForegroundColor Yellow
+ Write-LogHost " `$env:GRAPH_TENANT_ID = 'your-tenant-id'" -ForegroundColor White
+ Write-LogHost " `$env:GRAPH_CLIENT_ID = 'your-client-id'" -ForegroundColor White
+ Write-LogHost " `$env:GRAPH_CLIENT_SECRET = 'your-client-secret'" -ForegroundColor White
+ throw "Missing required environment variables for credential authentication"
+ }
+
+ $secureSecret = ConvertTo-SecureString -String $clientSecret -AsPlainText -Force
+ $credential = New-Object System.Management.Automation.PSCredential($clientId, $secureSecret)
+
+ # Clear plain-text secret from memory
+ Clear-Variable -Name clientSecret -Force -ErrorAction SilentlyContinue
+
+ Connect-MgGraph -TenantId $tenantId -ClientSecretCredential $credential -NoWelcome -ErrorAction Stop
+ }
+
+ 'silent' {
+ Write-LogHost "Using managed identity or existing token..." -ForegroundColor Gray
+ Connect-MgGraph -Identity -NoWelcome -ErrorAction Stop
+ }
+ 'appregistration' {
+ Write-LogHost "Using app registration authentication..." -ForegroundColor Gray
+
+ $appTenantId = $script:TenantId
+ if ([string]::IsNullOrWhiteSpace($appTenantId)) { $appTenantId = $env:GRAPH_TENANT_ID }
+ if ([string]::IsNullOrWhiteSpace($appTenantId)) {
+ Write-LogHost "ERROR: -TenantId or GRAPH_TENANT_ID is required for AppRegistration auth." -ForegroundColor Red
+ throw "Missing TenantId for AppRegistration authentication"
+ }
+
+ $appClientId = $script:ClientId
+ if ([string]::IsNullOrWhiteSpace($appClientId)) { $appClientId = $env:GRAPH_CLIENT_ID }
+ if ([string]::IsNullOrWhiteSpace($appClientId)) {
+ Write-LogHost "ERROR: -ClientId or GRAPH_CLIENT_ID is required for AppRegistration auth." -ForegroundColor Red
+ throw "Missing ClientId for AppRegistration authentication"
+ }
+
+ # Store auth config for potential re-authentication during long-running operations
+ $script:AuthConfig.Method = 'AppRegistration'
+ $script:AuthConfig.TenantId = $appTenantId
+ $script:AuthConfig.ClientId = $appClientId
+ $script:AuthConfig.CertStoreLocation = $script:ClientCertificateStoreLocation
+
+ $secretValue = $script:ClientSecret
+ if ([string]::IsNullOrWhiteSpace($secretValue)) { $secretValue = $env:GRAPH_CLIENT_SECRET }
+
+ $certThumbprint = $script:ClientCertificateThumbprint
+ if ([string]::IsNullOrWhiteSpace($certThumbprint)) { $certThumbprint = $env:GRAPH_CLIENT_CERT_THUMBPRINT }
+
+ $certPath = $script:ClientCertificatePath
+ if ([string]::IsNullOrWhiteSpace($certPath)) { $certPath = $env:GRAPH_CLIENT_CERT_PATH }
+
+ $certPasswordSecure = $script:ClientCertificatePassword
+ if (-not $certPasswordSecure -and $env:GRAPH_CLIENT_CERT_PASSWORD) {
+ $certPasswordSecure = ConvertTo-SecureString $env:GRAPH_CLIENT_CERT_PASSWORD -AsPlainText -Force
+ }
+
+ $certPasswordPlain = $null
+ if ($certPasswordSecure) {
+ $certPasswordPlain = [System.Net.NetworkCredential]::new('', $certPasswordSecure).Password
+ }
+
+ if (-not [string]::IsNullOrWhiteSpace($secretValue)) {
+ Write-LogHost " -> Authenticating with client secret" -ForegroundColor Gray
+ $secureSecret = ConvertTo-SecureString -String $secretValue -AsPlainText -Force
+ $credential = New-Object System.Management.Automation.PSCredential($appClientId, $secureSecret)
+ # Store secret securely for re-authentication (keep a copy before clearing)
+ $script:AuthConfig.ClientSecret = $secureSecret.Copy()
+ $script:AuthConfig.CanReauthenticate = $true
+ Clear-Variable -Name secretValue -Force -ErrorAction SilentlyContinue
+ Connect-MgGraph -TenantId $appTenantId -ClientSecretCredential $credential -NoWelcome -ErrorAction Stop
+ Clear-Variable -Name secureSecret -Force -ErrorAction SilentlyContinue
+ Clear-Variable -Name credential -Force -ErrorAction SilentlyContinue
+ }
+ elseif (-not [string]::IsNullOrWhiteSpace($certThumbprint)) {
+ Write-LogHost " -> Authenticating with certificate thumbprint $certThumbprint" -ForegroundColor Gray
+ $storeLocation = [System.Security.Cryptography.X509Certificates.StoreLocation]::$script:ClientCertificateStoreLocation
+ $store = New-Object System.Security.Cryptography.X509Certificates.X509Store("My", $storeLocation)
+ $store.Open([System.Security.Cryptography.X509Certificates.OpenFlags]::ReadOnly)
+ try {
+ $certificate = $store.Certificates | Where-Object { $_.Thumbprint -eq $certThumbprint }
+ if (-not $certificate) {
+ Write-LogHost "ERROR: Certificate with thumbprint '$certThumbprint' not found in $script:ClientCertificateStoreLocation store." -ForegroundColor Red
+ throw "Certificate not found"
+ }
+ # Store cert thumbprint for re-authentication
+ $script:AuthConfig.CertThumbprint = $certThumbprint
+ $script:AuthConfig.CanReauthenticate = $true
+ Connect-MgGraph -TenantId $appTenantId -ClientId $appClientId -CertificateThumbprint $certThumbprint -NoWelcome -ErrorAction Stop
+ }
+ finally {
+ $store.Close()
+ }
+ }
+ elseif (-not [string]::IsNullOrWhiteSpace($certPath)) {
+ Write-LogHost " -> Authenticating with certificate file $certPath" -ForegroundColor Gray
+ $flags = [System.Security.Cryptography.X509Certificates.X509KeyStorageFlags]::Exportable
+ $cert = $null
+ try {
+ if ($certPasswordPlain) {
+ $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($certPath, $certPasswordPlain, $flags)
+ }
+ else {
+ $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($certPath)
+ }
+ # Store cert path and password for re-authentication
+ $script:AuthConfig.CertPath = $certPath
+ if ($certPasswordSecure) { $script:AuthConfig.CertPassword = $certPasswordSecure.Copy() }
+ $script:AuthConfig.CanReauthenticate = $true
+ Connect-MgGraph -TenantId $appTenantId -ClientId $appClientId -Certificate $cert -NoWelcome -ErrorAction Stop
+ }
+ finally {
+ if ($cert) { $cert.Dispose() }
+ if ($certPasswordPlain) {
+ Clear-Variable -Name certPasswordPlain -Force -ErrorAction SilentlyContinue
+ }
+ }
+ }
+ else {
+ Write-LogHost "ERROR: Provide either -ClientSecret, -ClientCertificateThumbprint, or -ClientCertificatePath for AppRegistration auth." -ForegroundColor Red
+ throw "No credential material supplied for AppRegistration"
+ }
+
+ if ($certPasswordSecure) {
+ Clear-Variable -Name certPasswordSecure -Force -ErrorAction SilentlyContinue
+ }
+ }
+ }
+
+ Write-LogHost "Successfully connected to Microsoft Graph" -ForegroundColor Green
+
+ # Record token issue time for proactive refresh tracking
+ $script:AuthConfig.TokenIssueTime = Get-Date
+
+ # Initialize shared auth state for thread jobs (enables proactive token refresh)
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo) {
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ $script:SharedAuthState.AuthMethod = $AuthMethod.ToLower()
+ Write-LogHost " Token expires: $($tokenInfo.ExpiresOn.ToString('HH:mm:ss')) UTC (source: $($tokenInfo.Source))" -ForegroundColor Gray
+ }
+
+ # Get and display current context
+ $context = Get-MgContext
+ Write-LogHost " Tenant ID: $($context.TenantId)" -ForegroundColor Gray
+ $maskedAccount = Get-MaskedUsername -Username $context.Account
+ Write-LogHost " Account: $maskedAccount" -ForegroundColor Gray
+ Write-LogHost " Scopes: $($context.Scopes -join ', ')" -ForegroundColor Gray
+
+ # Trigger Graph API version detection early (before queries start)
+ $null = Get-GraphAuditApiUri -Path 'queries'
+
+ # Validate required scopes are present
+ $missingScopes = @()
+ foreach ($scope in $RequiredScopes) {
+ if ($context.Scopes -notcontains $scope) {
+ $missingScopes += $scope
+ }
+ }
+
+ if ($missingScopes.Count -gt 0) {
+ Write-LogHost ""
+ Write-LogHost "WARNING: Missing required scope(s):" -ForegroundColor Yellow
+ foreach ($scope in $missingScopes) {
+ Write-LogHost " • $scope" -ForegroundColor Yellow
+ }
+ Write-LogHost ""
+ Write-LogHost "Script may fail when accessing audit logs." -ForegroundColor Yellow
+ Write-LogHost "Consider re-authenticating with full permissions." -ForegroundColor Yellow
+ Write-LogHost ""
+ }
+
+ $script:Connected = $true
+ }
+ catch {
+ Write-LogHost "ERROR: Graph API authentication failed: $($_.Exception.Message)" -ForegroundColor Red
+ Write-LogHost ""
+ Write-LogHost "Troubleshooting:" -ForegroundColor Yellow
+ Write-LogHost " 1. Ensure you have AuditLog.Read.All permission" -ForegroundColor White
+ Write-LogHost " 2. Verify Azure AD role (Compliance/Security Administrator)" -ForegroundColor White
+ Write-LogHost " 3. Check network connectivity to Microsoft Graph API" -ForegroundColor White
+ Write-LogHost " 4. Try a different authentication method (-Auth parameter)" -ForegroundColor White
+ Write-LogHost " 5. Use -UseEOM switch to fall back to EOM mode" -ForegroundColor White
+ Write-LogHost ""
+ throw
+ }
+ }
+}
+
+# ==============================================
+# ACCESS TOKEN EXTRACTION HELPER
+# ==============================================
+function Get-GraphAccessToken {
+ <#
+ .SYNOPSIS
+ Extracts the current access token from an active Microsoft Graph session.
+
+ .DESCRIPTION
+ Microsoft Graph PowerShell SDK 2.x does NOT expose AccessToken via Get-MgContext
+ for security reasons. This function reliably extracts the token by making a
+ lightweight request and extracting the Authorization header.
+
+ Primary method: HTTP request header extraction (reliable in SDK 2.x)
+ Fallback method: Get-MgContext.AccessToken (for older SDK versions)
+
+ .OUTPUTS
+ [string] The access token, or $null if extraction fails
+ #>
+ [CmdletBinding()]
+ param()
+
+ # Primary method: Extract token from HTTP response headers (reliable in SDK 2.x)
+ try {
+ $response = Invoke-MgGraphRequest -Method GET -Uri 'https://graph.microsoft.com/v1.0/$metadata' -OutputType HttpResponseMessage -ErrorAction Stop
+ $token = $response.RequestMessage.Headers.Authorization.Parameter
+ if ($token) {
+ return $token
+ }
+ }
+ catch {
+ # HTTP method failed, try fallback
+ }
+
+ # Fallback method: Get-MgContext.AccessToken (works in older SDK versions)
+ try {
+ $context = Get-MgContext -ErrorAction SilentlyContinue
+ if ($context -and $context.AccessToken) {
+ return $context.AccessToken
+ }
+ }
+ catch {
+ # Fallback also failed
+ }
+
+ return $null
+}
+
+# ==============================================
+# ACCESS TOKEN WITH EXPIRY EXTRACTION (for shared auth state)
+# ==============================================
+function Get-GraphAccessTokenWithExpiry {
+ <#
+ .SYNOPSIS
+ Extracts access token AND expiry time from the active Microsoft Graph session.
+
+ .DESCRIPTION
+ Decodes the JWT access token to extract the actual 'exp' (expiry) claim.
+ This is more reliable than Azure.Identity reflection and doesn't cause
+ extra authentication popups.
+
+ JWT tokens have three base64-encoded parts: header.payload.signature
+ The payload contains the 'exp' claim as a Unix timestamp.
+
+ Falls back to 50-minute estimated expiry if JWT decode fails.
+
+ .OUTPUTS
+ [hashtable] with Token (string) and ExpiresOn (DateTime) properties
+ Returns $null if no token can be extracted
+ #>
+ [CmdletBinding()]
+ param()
+
+ $result = @{
+ Token = $null
+ ExpiresOn = $null
+ Source = 'unknown'
+ }
+
+ # First, get the token using existing reliable method
+ $result.Token = Get-GraphAccessToken
+ if (-not $result.Token) {
+ return $null
+ }
+
+ # Try to decode JWT to get actual expiry from 'exp' claim
+ # JWT format: base64url(header).base64url(payload).signature
+ try {
+ $tokenParts = $result.Token.Split('.')
+ if ($tokenParts.Count -ge 2) {
+ # Decode the payload (second part)
+ $payloadBase64 = $tokenParts[1]
+
+ # Add padding if needed (base64url uses no padding)
+ $paddingNeeded = 4 - ($payloadBase64.Length % 4)
+ if ($paddingNeeded -lt 4) {
+ $payloadBase64 += ('=' * $paddingNeeded)
+ }
+
+ # Convert base64url to standard base64 (replace - with +, _ with /)
+ $payloadBase64 = $payloadBase64.Replace('-', '+').Replace('_', '/')
+
+ # Decode and parse JSON
+ $payloadBytes = [Convert]::FromBase64String($payloadBase64)
+ $payloadJson = [System.Text.Encoding]::UTF8.GetString($payloadBytes)
+ $payload = $payloadJson | ConvertFrom-Json
+
+ if ($payload.exp) {
+ # 'exp' is Unix timestamp (seconds since 1970-01-01 UTC)
+ $unixEpoch = [DateTime]::new(1970, 1, 1, 0, 0, 0, [DateTimeKind]::Utc)
+ $result.ExpiresOn = $unixEpoch.AddSeconds($payload.exp)
+ $result.Source = 'JWT'
+
+ # Calculate time remaining for logging
+ $timeRemaining = $result.ExpiresOn - (Get-Date).ToUniversalTime()
+ if ($timeRemaining.TotalMinutes -gt 0) {
+ Write-Verbose "Token expires in $([int]$timeRemaining.TotalMinutes) minutes (from JWT 'exp' claim)"
+ }
+
+ return $result
+ }
+ }
+ }
+ catch {
+ # JWT decode failed, use fallback
+ Write-Verbose "JWT decode failed: $($_.Exception.Message)"
+ }
+
+ # Fallback: estimate 50-minute expiry from now (observed token lifetime ~45-60 minutes)
+ # With 5-minute buffer, proactive refresh triggers at ~45 min mark
+ $result.ExpiresOn = (Get-Date).ToUniversalTime().AddMinutes(50)
+ $result.Source = 'estimated'
+
+ return $result
+}
+
+# ==============================================
+# TOKEN REFRESH FUNCTION FOR LONG-RUNNING OPERATIONS
+# ==============================================
+function Invoke-TokenRefresh {
+ <#
+ .SYNOPSIS
+ Forces re-authentication for AppRegistration auth mode to get fresh access token.
+
+ .DESCRIPTION
+ When using App Registration authentication (client secret or certificate),
+ this function reconnects to Microsoft Graph to obtain a fresh access token.
+ This is critical for long-running operations that exceed the default OAuth
+ token lifetime (~60-90 minutes).
+
+ For interactive auth modes, this function returns $false as re-authentication
+ would require user interaction.
+
+ .PARAMETER Force
+ Force re-authentication even if token doesn't appear expired.
+
+ .OUTPUTS
+ [PSCustomObject] with Success ($true/$false) and NewToken properties
+ #>
+ [CmdletBinding()]
+ param(
+ [switch]$Force
+ )
+
+ $result = [PSCustomObject]@{
+ Success = $false
+ NewToken = $null
+ Message = ""
+ AuthMethod = $script:AuthConfig.Method
+ }
+
+ # Check if we can re-authenticate
+ if (-not $script:AuthConfig.CanReauthenticate) {
+ $result.Message = "Auth method '$($script:AuthConfig.Method)' does not support automatic re-authentication"
+ return $result
+ }
+
+ # Validate we have stored config
+ if ($script:AuthConfig.Method -ne 'AppRegistration') {
+ $result.Message = "Only AppRegistration auth mode supports automatic token refresh"
+ return $result
+ }
+
+ if ([string]::IsNullOrWhiteSpace($script:AuthConfig.TenantId) -or
+ [string]::IsNullOrWhiteSpace($script:AuthConfig.ClientId)) {
+ $result.Message = "Missing TenantId or ClientId in stored auth config"
+ return $result
+ }
+
+ Write-LogHost " [TOKEN-REFRESH] Attempting re-authentication using AppRegistration..." -ForegroundColor Cyan
+
+ try {
+ # Disconnect first to ensure clean state
+ try {
+ Disconnect-MgGraph -ErrorAction SilentlyContinue | Out-Null
+ } catch { }
+
+ # Re-authenticate based on stored credential type
+ $reconnected = $false
+
+ # Try client secret first
+ if ($script:AuthConfig.ClientSecret) {
+ Write-LogHost " [TOKEN-REFRESH] Reconnecting with client secret..." -ForegroundColor Gray
+ $credential = New-Object System.Management.Automation.PSCredential(
+ $script:AuthConfig.ClientId,
+ $script:AuthConfig.ClientSecret
+ )
+ Connect-MgGraph -TenantId $script:AuthConfig.TenantId `
+ -ClientSecretCredential $credential `
+ -NoWelcome -ErrorAction Stop
+ $reconnected = $true
+ }
+ # Try certificate thumbprint
+ elseif ($script:AuthConfig.CertThumbprint) {
+ Write-LogHost " [TOKEN-REFRESH] Reconnecting with certificate thumbprint..." -ForegroundColor Gray
+ Connect-MgGraph -TenantId $script:AuthConfig.TenantId `
+ -ClientId $script:AuthConfig.ClientId `
+ -CertificateThumbprint $script:AuthConfig.CertThumbprint `
+ -NoWelcome -ErrorAction Stop
+ $reconnected = $true
+ }
+ # Try certificate file
+ elseif ($script:AuthConfig.CertPath) {
+ Write-LogHost " [TOKEN-REFRESH] Reconnecting with certificate file..." -ForegroundColor Gray
+ $flags = [System.Security.Cryptography.X509Certificates.X509KeyStorageFlags]::Exportable
+ $cert = $null
+ try {
+ if ($script:AuthConfig.CertPassword) {
+ $plainPassword = [Runtime.InteropServices.Marshal]::PtrToStringAuto(
+ [Runtime.InteropServices.Marshal]::SecureStringToBSTR($script:AuthConfig.CertPassword)
+ )
+ $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2(
+ $script:AuthConfig.CertPath, $plainPassword, $flags
+ )
+ Clear-Variable -Name plainPassword -Force -ErrorAction SilentlyContinue
+ }
+ else {
+ $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2(
+ $script:AuthConfig.CertPath
+ )
+ }
+ Connect-MgGraph -TenantId $script:AuthConfig.TenantId `
+ -ClientId $script:AuthConfig.ClientId `
+ -Certificate $cert `
+ -NoWelcome -ErrorAction Stop
+ $reconnected = $true
+ }
+ finally {
+ if ($cert) { $cert.Dispose() }
+ }
+ }
+
+ if ($reconnected) {
+ # Use reliable token extraction helper (HTTP method primary)
+ $result.NewToken = Get-GraphAccessToken
+ if ($result.NewToken) {
+ $result.Success = $true
+ $result.Message = "Successfully refreshed token"
+
+ # Update token timing and reset auth failure flags
+ $script:AuthConfig.TokenIssueTime = Get-Date
+ $script:TokenAcquiredTime = Get-Date
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false # Reset for next auth failure cycle
+
+ Write-LogHost " [TOKEN-REFRESH] Successfully obtained fresh access token" -ForegroundColor Green
+ Write-LogHost " [TOKEN-REFRESH] Token acquired at $(Get-Date -Format 'HH:mm:ss') - proactive refresh at 30-minute age" -ForegroundColor DarkGray
+ }
+ else {
+ $result.Message = "Reconnected but could not extract access token"
+ Write-LogHost " [TOKEN-REFRESH] ✗ $($result.Message)" -ForegroundColor Red
+ }
+ }
+ else {
+ $result.Message = "No valid credential found in stored auth config"
+ }
+ }
+ catch {
+ $result.Message = "Re-authentication failed: $($_.Exception.Message)"
+ Write-LogHost " [TOKEN-REFRESH] ✗ $($result.Message)" -ForegroundColor Red
+ }
+
+ return $result
+}
+
+# ==============================================
+# PROACTIVE TOKEN REFRESH FOR LONG-RUNNING OPERATIONS
+# ==============================================
+function Refresh-GraphTokenIfNeeded {
+ <#
+ .SYNOPSIS
+ Proactively refreshes the Graph access token if it's nearing expiry.
+
+ .DESCRIPTION
+ Checks SharedAuthState.ExpiresOn and refreshes token if less than 10 minutes
+ remain before expiry. Uses Azure.Identity for interactive auth modes, or
+ Invoke-TokenRefresh for AppRegistration mode.
+
+ This function is called from the main thread's job monitoring loop to ensure
+ thread jobs always have a valid token in SharedAuthState.
+
+ IMPORTANT: Includes cooldown logic to prevent spam - only attempts refresh
+ once per 5 minutes. If silent refresh fails, sets AuthFailureDetected to
+ trigger interactive re-auth prompt.
+
+ .PARAMETER BufferMinutes
+ Refresh if token expires within this many minutes. Default: 5.
+
+ .OUTPUTS
+ $true - Token was refreshed successfully (silent or interactive)
+ $false - No refresh needed (token still valid) or within cooldown period
+ 'Quit' - User chose to quit at the re-auth prompt
+
+ CRITICAL: Callers MUST check for 'Quit' return and handle gracefully!
+ #>
+ [CmdletBinding()]
+ param(
+ [int]$BufferMinutes = 5
+ )
+
+ # Check if we have shared auth state
+ if (-not $script:SharedAuthState.ExpiresOn) {
+ return $false
+ }
+
+ $now = (Get-Date).ToUniversalTime()
+ $expiresOn = $script:SharedAuthState.ExpiresOn
+ $minutesRemaining = ($expiresOn - $now).TotalMinutes
+
+ # PROACTIVE REFRESH FOR APPREG: Refresh at 30-minute token age (not just near expiry)
+ # AppRegistration can refresh silently, so we do this proactively to avoid 401s
+ $needsProactiveRefresh = $false
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ if ($script:AuthConfig.TokenIssueTime) {
+ $tokenAge = (Get-Date) - $script:AuthConfig.TokenIssueTime
+ if ($tokenAge.TotalMinutes -gt 30) {
+ $needsProactiveRefresh = $true
+ Write-LogHost " [TOKEN] Token age: $([Math]::Round($tokenAge.TotalMinutes, 1)) minutes - proactive refresh triggered" -ForegroundColor Yellow
+ }
+ }
+ }
+
+ if ($minutesRemaining -gt $BufferMinutes -and -not $needsProactiveRefresh) {
+ return $false # Token still valid, no refresh needed
+ }
+
+ # COOLDOWN CHECK: Don't spam refresh attempts - only try once per 5 minutes
+ if ($script:LastProactiveRefreshAttempt) {
+ $timeSinceLastAttempt = ((Get-Date) - $script:LastProactiveRefreshAttempt).TotalMinutes
+ if ($timeSinceLastAttempt -lt 5) {
+ # Already tried recently, don't spam
+ return $false
+ }
+ }
+ $script:LastProactiveRefreshAttempt = Get-Date
+
+ # Log appropriate message based on trigger reason
+ if (-not $needsProactiveRefresh) {
+ Write-LogHost " [TOKEN] Token expires in $([Math]::Round($minutesRemaining, 1)) minutes - attempting proactive refresh..." -ForegroundColor Yellow
+ }
+
+ # Try to refresh using Azure.Identity (uses cached MSAL tokens, may prompt if needed)
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo -and $tokenInfo.Token -ne $script:SharedAuthState.Token) {
+ # Got a new token
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ $script:SharedAuthState.RefreshCount++
+
+ Write-LogHost " [TOKEN] Token refreshed silently (expires: $($tokenInfo.ExpiresOn.ToString('HH:mm:ss')) UTC, refresh #$($script:SharedAuthState.RefreshCount))" -ForegroundColor Green
+ Write-LogHost " [TOKEN] Note: In-flight queries may still require re-auth before this expiration" -ForegroundColor DarkGray
+ return $true
+ }
+
+ # Azure.Identity didn't give us a new token, try AppRegistration refresh if available
+ if ($script:AuthConfig.CanReauthenticate) {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success) {
+ $script:SharedAuthState.Token = $refreshResult.NewToken
+ $script:SharedAuthState.ExpiresOn = (Get-Date).ToUniversalTime().AddMinutes(50)
+ $script:SharedAuthState.LastRefresh = Get-Date
+ $script:SharedAuthState.RefreshCount++
+ $script:AuthConfig.TokenIssueTime = Get-Date # Reset age timer for proactive refresh
+
+ Write-LogHost " [TOKEN] Token refreshed via AppRegistration (refresh #$($script:SharedAuthState.RefreshCount))" -ForegroundColor Green
+ return $true
+ }
+ }
+
+ # SILENT REFRESH FAILED
+ # For AppRegistration + Force: FATAL exit (true headless operation)
+ # For AppRegistration without Force: Fall back to interactive prompt
+ # For interactive modes: Prompt user for re-authentication
+ Write-LogHost " [TOKEN] [!] Silent token refresh failed - interactive re-authentication required" -ForegroundColor Red
+
+ # AppRegistration mode with -Force: Silent refresh failure is fatal (no interactive fallback for headless runs)
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $Force) {
+ Write-LogHost " [TOKEN] FATAL: AppRegistration token refresh failed. Cannot continue headless (-Force mode)." -ForegroundColor Red
+ Write-LogHost " [TOKEN] Check: client secret expiration, certificate validity, or API permissions." -ForegroundColor Yellow
+ return 'Quit'
+ }
+
+ # Interactive modes OR AppRegistration without -Force: prompt user for re-authentication
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ # User chose to quit - return special value for callers to handle
+ return 'Quit'
+ }
+
+ # User pressed R and successfully re-authenticated
+ # Invoke-TokenRefreshPrompt already updated SharedAuthState and reset AuthFailureDetected
+ return $true
+}
+
+# ==============================================
+# CHECKPOINT/RESUME FUNCTIONS FOR LONG-RUNNING OPERATIONS
+# ==============================================
+
+function Initialize-CheckpointForNewRun {
+ <#
+ .SYNOPSIS
+ Creates new checkpoint structure for a fresh run (not resume mode).
+ .DESCRIPTION
+ Initializes checkpoint data structure with all processing parameters,
+ creates _PARTIAL output filename, and saves initial checkpoint file to disk.
+ On resume, all parameters are restored from checkpoint to ensure consistency.
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [string]$OutputPath,
+
+ [Parameter(Mandatory)]
+ [string]$BaseOutputFileName,
+
+ [Parameter(Mandatory)]
+ [string]$RunTimestamp,
+
+ [Parameter(Mandatory)]
+ [datetime]$StartDate,
+
+ [Parameter(Mandatory)]
+ [datetime]$EndDate,
+
+ [Parameter()]
+ [hashtable]$AllParameters
+ )
+
+ # Create _PARTIAL filename
+ $fileNameWithoutExt = [System.IO.Path]::GetFileNameWithoutExtension($BaseOutputFileName)
+ $fileExt = [System.IO.Path]::GetExtension($BaseOutputFileName)
+ $partialFileName = "${fileNameWithoutExt}_PARTIAL${fileExt}"
+ $script:PartialOutputPath = Join-Path $OutputPath $partialFileName
+
+ # Create checkpoint file path (hidden file with dot prefix)
+ $script:CheckpointPath = Join-Path $OutputPath ".pax_checkpoint_${RunTimestamp}.json"
+
+ # Initialize checkpoint data structure with comprehensive parameter snapshot
+ $script:CheckpointData = @{
+ version = 2 # Bumped version for expanded parameter storage
+ runTimestamp = $RunTimestamp
+ created = (Get-Date).ToUniversalTime().ToString('o')
+ lastUpdated = (Get-Date).ToUniversalTime().ToString('o')
+ parameters = @{
+ # Date range
+ startDate = $StartDate.ToUniversalTime().ToString('o')
+ endDate = $EndDate.ToUniversalTime().ToString('o')
+
+ # Activity/Record filtering
+ activityTypes = if ($AllParameters.ActivityTypes) { @($AllParameters.ActivityTypes) } else { @() }
+ recordTypes = if ($AllParameters.RecordTypes) { @($AllParameters.RecordTypes) } else { @() }
+ serviceTypes = if ($AllParameters.ServiceTypes) { @($AllParameters.ServiceTypes) } else { @() }
+ userIds = if ($AllParameters.UserIds) { @($AllParameters.UserIds) } else { @() }
+ groupNames = if ($AllParameters.GroupNames) { @($AllParameters.GroupNames) } else { @() }
+
+ # Agent filtering
+ agentId = if ($AllParameters.AgentId) { @($AllParameters.AgentId) } else { @() }
+ agentsOnly = [bool]$AllParameters.AgentsOnly
+ excludeAgents = [bool]$AllParameters.ExcludeAgents
+
+ # Prompt filtering
+ promptFilter = if ($AllParameters.PromptFilter) { $AllParameters.PromptFilter } else { $null }
+
+ # Schema/Explosion settings
+ explodeArrays = [bool]$AllParameters.ExplodeArrays
+ explodeDeep = [bool]$AllParameters.ExplodeDeep
+ explosionThreads = if ($AllParameters.ExplosionThreads) { $AllParameters.ExplosionThreads } else { 0 }
+ flatDepth = if ($AllParameters.FlatDepth) { $AllParameters.FlatDepth } else { 120 }
+ streamingSchemaSample = if ($AllParameters.StreamingSchemaSample) { $AllParameters.StreamingSchemaSample } else { 5000 }
+ streamingChunkSize = if ($AllParameters.StreamingChunkSize) { $AllParameters.StreamingChunkSize } else { 5000 }
+
+ # M365/User info bundles
+ includeM365Usage = [bool]$AllParameters.IncludeM365Usage
+ includeUserInfo = [bool]$AllParameters.IncludeUserInfo
+ includeDSPMForAI = [bool]$AllParameters.IncludeDSPMForAI
+ includeCopilotInteraction = [bool]$AllParameters.IncludeCopilotInteraction
+ excludeCopilotInteraction = [bool]$AllParameters.ExcludeCopilotInteraction
+
+ # Partitioning
+ blockHours = if ($AllParameters.BlockHours) { $AllParameters.BlockHours } else { 0.5 }
+ partitionHours = if ($AllParameters.PartitionHours) { $AllParameters.PartitionHours } else { 0 }
+ maxPartitions = if ($AllParameters.MaxPartitions) { $AllParameters.MaxPartitions } else { 160 }
+
+ # Output settings
+ outputPath = $OutputPath
+ exportWorkbook = [bool]$AllParameters.ExportWorkbook
+ combineOutput = [bool]$AllParameters.CombineOutput
+
+ # Auth (method only - no secrets)
+ auth = if ($AllParameters.Auth) { $AllParameters.Auth } else { 'WebLogin' }
+ tenantId = if ($AllParameters.TenantId) { $AllParameters.TenantId } else { $null }
+ clientId = if ($AllParameters.ClientId) { $AllParameters.ClientId } else { $null }
+ # Note: ClientSecret is NOT stored for security
+
+ # Other settings
+ resultSize = if ($AllParameters.ResultSize) { $AllParameters.ResultSize } else { 10000 }
+ maxConcurrency = if ($AllParameters.MaxConcurrency) { $AllParameters.MaxConcurrency } else { 10 }
+ maxMemoryMB = if ($AllParameters.MaxMemoryMB) { $AllParameters.MaxMemoryMB } else { 0 }
+ useEOM = [bool]$AllParameters.UseEOM
+ autoCompleteness = [bool]$AllParameters.AutoCompleteness
+ includeTelemetry = [bool]$AllParameters.IncludeTelemetry
+ }
+ outputFiles = @{
+ partialCsv = $partialFileName
+ finalCsv = $BaseOutputFileName
+ }
+ partitions = @{
+ total = 0
+ blockHours = if ($AllParameters.BlockHours) { $AllParameters.BlockHours } else { 0.5 }
+ completed = @()
+ queryCreated = @()
+ }
+ statistics = @{
+ totalRecordsSaved = 0
+ partitionsComplete = 0
+ partitionsQueryCreated = 0
+ partitionsRemaining = 0
+ }
+ explosion = @{
+ status = 'NotStarted' # NotStarted, InProgress, Completed
+ recordsProcessed = 0
+ rowsGenerated = 0
+ lastUpdateTime = $null
+ }
+ }
+
+ # Save initial checkpoint
+ Save-CheckpointToDisk
+
+ return $script:PartialOutputPath
+}
+
+function Save-CheckpointToDisk {
+ <#
+ .SYNOPSIS
+ Writes current checkpoint data to disk atomically.
+ .DESCRIPTION
+ Uses temp file + rename pattern for atomic writes to prevent corruption.
+ #>
+
+ if (-not $script:CheckpointPath -or -not $script:CheckpointData) {
+ return
+ }
+
+ try {
+ # Update timestamp
+ $script:CheckpointData.lastUpdated = (Get-Date).ToUniversalTime().ToString('o')
+
+ # Update statistics
+ $script:CheckpointData.statistics.partitionsComplete = $script:CheckpointData.partitions.completed.Count
+ $script:CheckpointData.statistics.partitionsQueryCreated = $script:CheckpointData.partitions.queryCreated.Count
+ $script:CheckpointData.statistics.partitionsRemaining = $script:CheckpointData.partitions.total -
+ $script:CheckpointData.statistics.partitionsComplete -
+ $script:CheckpointData.statistics.partitionsQueryCreated
+
+ # Write to temp file first (atomic write pattern)
+ $tempPath = "$($script:CheckpointPath).tmp"
+ $script:CheckpointData | ConvertTo-Json -Depth 10 | Set-Content -Path $tempPath -Encoding UTF8 -Force
+
+ # Remove destination first if it exists (Move-Item -Force doesn't always work on Windows)
+ if (Test-Path $script:CheckpointPath) {
+ Remove-Item -Path $script:CheckpointPath -Force -ErrorAction SilentlyContinue
+ }
+
+ # Rename to final (atomic on most filesystems)
+ Move-Item -Path $tempPath -Destination $script:CheckpointPath -Force
+ }
+ catch {
+ Write-LogHost " Warning: Failed to save checkpoint: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+}
+
+function Save-Checkpoint {
+ <#
+ .SYNOPSIS
+ Updates checkpoint with partition state change and saves to disk.
+ .PARAMETER PartitionIndex
+ The partition index (1-based).
+ .PARAMETER State
+ 'QueryCreated' or 'Completed'
+ .PARAMETER QueryId
+ The server-assigned query ID.
+ .PARAMETER PartitionStart
+ Partition start time (optional - looked up from partitionStatus if not provided).
+ .PARAMETER PartitionEnd
+ Partition end time (optional - looked up from partitionStatus if not provided).
+ .PARAMETER RecordCount
+ Number of records (only for Completed state).
+ .PARAMETER Force
+ Just save current checkpoint state to disk without updating partition info.
+ #>
+ param(
+ [Parameter()]
+ [int]$PartitionIndex,
+
+ [Parameter()]
+ [ValidateSet('QueryCreated', 'Completed')]
+ [string]$State,
+
+ [Parameter()]
+ [string]$QueryId,
+
+ [Parameter()]
+ [datetime]$PartitionStart,
+
+ [Parameter()]
+ [datetime]$PartitionEnd,
+
+ [Parameter()]
+ [int]$RecordCount = 0,
+
+ [Parameter()]
+ [switch]$Force
+ )
+
+ if (-not $script:CheckpointData) {
+ return
+ }
+
+ # If -Force is specified, just save current state to disk without updating partition info
+ if ($Force) {
+ Save-CheckpointToDisk
+ return
+ }
+
+ # For normal calls, require the mandatory parameters
+ if (-not $PartitionIndex -or -not $State -or -not $QueryId) {
+ Write-Verbose "Save-Checkpoint: Missing required parameters (PartitionIndex, State, QueryId) - skipping"
+ return
+ }
+
+ # Look up partition times from partitionStatus if not provided
+ if (-not $PartitionStart -or -not $PartitionEnd) {
+ $partitionInfo = $script:partitionStatus[$PartitionIndex]
+ if ($partitionInfo -and $partitionInfo.Partition) {
+ # The Partition object has PStart and PEnd properties
+ if (-not $PartitionStart -and $partitionInfo.Partition.PStart) { $PartitionStart = $partitionInfo.Partition.PStart }
+ if (-not $PartitionEnd -and $partitionInfo.Partition.PEnd) { $PartitionEnd = $partitionInfo.Partition.PEnd }
+ }
+
+ # If still missing, we can't proceed with checkpoint save
+ if (-not $PartitionStart -or -not $PartitionEnd) {
+ Write-Verbose "Save-Checkpoint: Could not determine partition times for index $PartitionIndex - skipping checkpoint update"
+ return
+ }
+ }
+
+ $partitionEntry = @{
+ index = $PartitionIndex
+ start = $PartitionStart.ToUniversalTime().ToString('o')
+ end = $PartitionEnd.ToUniversalTime().ToString('o')
+ queryId = $QueryId
+ }
+
+ if ($State -eq 'QueryCreated') {
+ $partitionEntry.createdAt = (Get-Date).ToUniversalTime().ToString('o')
+
+ # Add to queryCreated list (if not already there)
+ $existing = $script:CheckpointData.partitions.queryCreated | Where-Object { $_.index -eq $PartitionIndex }
+ if (-not $existing) {
+ $script:CheckpointData.partitions.queryCreated += $partitionEntry
+ }
+ }
+ elseif ($State -eq 'Completed') {
+ $partitionEntry.records = $RecordCount
+
+ # Remove from queryCreated if present
+ $script:CheckpointData.partitions.queryCreated = @(
+ $script:CheckpointData.partitions.queryCreated | Where-Object { $_.index -ne $PartitionIndex }
+ )
+
+ # Add to completed list (if not already there)
+ $existing = $script:CheckpointData.partitions.completed | Where-Object { $_.index -eq $PartitionIndex }
+ if (-not $existing) {
+ $script:CheckpointData.partitions.completed += $partitionEntry
+ $script:CheckpointData.statistics.totalRecordsSaved += $RecordCount
+ }
+ }
+
+ # Save to disk
+ Save-CheckpointToDisk
+}
+
+function Read-Checkpoint {
+ <#
+ .SYNOPSIS
+ Loads and validates a checkpoint file.
+ .PARAMETER CheckpointPath
+ Path to the checkpoint JSON file.
+ .OUTPUTS
+ $true if valid and loaded, $false if invalid.
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [string]$CheckpointPath
+ )
+
+ if (-not (Test-Path $CheckpointPath)) {
+ Write-LogHost "ERROR: Checkpoint file not found: $CheckpointPath" -ForegroundColor Red
+ return $false
+ }
+
+ try {
+ $data = Get-Content -Path $CheckpointPath -Raw | ConvertFrom-Json -AsHashtable
+
+ # Validate version (supports version 1 and 2)
+ if (-not $data.version -or $data.version -gt 2) {
+ Write-LogHost "ERROR: Unsupported checkpoint version: $($data.version). This script supports versions 1-2." -ForegroundColor Red
+ return $false
+ }
+
+ # Validate required fields
+ if (-not $data.runTimestamp -or -not $data.outputFiles -or -not $data.partitions) {
+ Write-LogHost "ERROR: Checkpoint file is missing required fields" -ForegroundColor Red
+ return $false
+ }
+
+ # Get output directory from checkpoint path
+ $outputDir = Split-Path $CheckpointPath -Parent
+ $partialCsvPath = Join-Path $outputDir $data.outputFiles.partialCsv
+
+ # Note: We don't require _PARTIAL.csv to exist - the actual data is in .pax_incremental/*.jsonl files
+ # The _PARTIAL.csv is only created when merging at completion, or may not exist yet
+
+ # Check for incremental save data if there are completed partitions
+ $completedPartitions = @($data.partitionStates.PSObject.Properties | Where-Object { $_.Value.state -eq 'Completed' })
+ if ($completedPartitions.Count -gt 0) {
+ $incrementalDir = Join-Path $outputDir ".pax_incremental"
+ $hasIncrementalData = $false
+ $incrementalRecordCount = 0
+
+ if (Test-Path $incrementalDir) {
+ $jsonlFiles = Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue
+ if ($jsonlFiles -and $jsonlFiles.Count -gt 0) {
+ $hasIncrementalData = $true
+ # Count records in files
+ foreach ($file in $jsonlFiles) {
+ $incrementalRecordCount += (Get-Content $file.FullName | Measure-Object -Line).Lines
+ }
+ }
+ }
+
+ $expectedRecords = ($completedPartitions | ForEach-Object { $_.Value.recordCount } | Measure-Object -Sum).Sum
+
+ if (-not $hasIncrementalData) {
+ Write-LogHost ""
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost " WARNING: INCREMENTAL DATA MISSING" -ForegroundColor Red
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost ""
+ Write-LogHost " Checkpoint shows $($completedPartitions.Count) completed partition(s) with ~$expectedRecords records," -ForegroundColor Yellow
+ Write-LogHost " but the .pax_incremental folder is missing or empty." -ForegroundColor Yellow
+ Write-LogHost ""
+ Write-LogHost " Expected location: $incrementalDir" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost " If you continue, data from completed partitions will be LOST." -ForegroundColor Red
+ Write-LogHost " The remaining partitions will be re-queried, but previous data cannot be recovered." -ForegroundColor Red
+ Write-LogHost ""
+ Write-LogHost " OPTIONS:" -ForegroundColor Cyan
+ Write-LogHost " 1. Restore the .pax_incremental folder from backup (if available)" -ForegroundColor White
+ Write-LogHost " 2. Start a fresh run without -Resume (will re-query all partitions)" -ForegroundColor White
+ Write-LogHost " 3. Continue anyway and accept data loss from completed partitions" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost ""
+
+ $response = Read-Host "Continue with potential data loss? (yes/no)"
+ if ($response -notmatch '^y(es)?$') {
+ Write-LogHost "Resume cancelled. Consider starting a fresh run." -ForegroundColor Yellow
+ return $false
+ }
+ Write-LogHost "Continuing with resume despite missing incremental data..." -ForegroundColor Yellow
+ } elseif ($incrementalRecordCount -lt ($expectedRecords * 0.9)) {
+ # Warn if incremental count is significantly less than expected (allow 10% variance for counting differences)
+ Write-LogHost ""
+ Write-LogHost " [WARN] Incremental data may be incomplete:" -ForegroundColor Yellow
+ Write-LogHost " Checkpoint expects ~$expectedRecords records from completed partitions" -ForegroundColor Yellow
+ Write-LogHost " Found $incrementalRecordCount records in .pax_incremental" -ForegroundColor Yellow
+ Write-LogHost ""
+ }
+ }
+
+ # Load into script scope
+ $script:CheckpointPath = $CheckpointPath
+ $script:CheckpointData = $data
+ $script:PartialOutputPath = $partialCsvPath
+ $script:IsResumeMode = $true
+
+ return $true
+ }
+ catch {
+ Write-LogHost "ERROR: Failed to parse checkpoint file: $($_.Exception.Message)" -ForegroundColor Red
+ return $false
+ }
+}
+
+function Find-Checkpoints {
+ <#
+ .SYNOPSIS
+ Discovers checkpoint files in the specified output directory.
+ .PARAMETER OutputPath
+ Directory to search for checkpoint files.
+ .OUTPUTS
+ Array of checkpoint info objects sorted by LastUpdated (newest first).
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [string]$OutputPath
+ )
+
+ if (-not (Test-Path $OutputPath)) {
+ return @()
+ }
+
+ $checkpointFiles = Get-ChildItem -Path $OutputPath -Filter ".pax_checkpoint_*.json" -Force -ErrorAction SilentlyContinue
+
+ if (-not $checkpointFiles -or $checkpointFiles.Count -eq 0) {
+ return @()
+ }
+
+ $checkpoints = @()
+
+ foreach ($file in $checkpointFiles) {
+ try {
+ $data = Get-Content -Path $file.FullName -Raw | ConvertFrom-Json -AsHashtable
+
+ $checkpoints += [PSCustomObject]@{
+ Path = $file.FullName
+ FileName = $file.Name
+ RunTimestamp = $data.runTimestamp
+ LastUpdated = script:Parse-DateSafe $data.lastUpdated
+ StartDate = if ($data.parameters.startDate) { $d = script:Parse-DateSafe $data.parameters.startDate; if ($d) { $d.ToString('yyyy-MM-dd') } else { 'Unknown' } } else { 'Unknown' }
+ EndDate = if ($data.parameters.endDate) { $d = script:Parse-DateSafe $data.parameters.endDate; if ($d) { $d.ToString('yyyy-MM-dd') } else { 'Unknown' } } else { 'Unknown' }
+ PartitionsComplete = $data.statistics.partitionsComplete
+ PartitionsTotal = $data.partitions.total
+ RecordsSaved = $data.statistics.totalRecordsSaved
+ }
+ }
+ catch {
+ # Skip invalid checkpoint files
+ continue
+ }
+ }
+
+ # Sort by LastUpdated descending (newest first)
+ return $checkpoints | Sort-Object -Property LastUpdated -Descending
+}
+
+function Select-Checkpoint {
+ <#
+ .SYNOPSIS
+ Prompts user to select from multiple checkpoint files.
+ .PARAMETER Checkpoints
+ Array of checkpoint info objects from Find-Checkpoints.
+ .OUTPUTS
+ Selected checkpoint path, or $null if user quits.
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [array]$Checkpoints
+ )
+
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-Host " Multiple checkpoint files found. Select one to resume:" -ForegroundColor Cyan
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-Host ""
+
+ for ($i = 0; $i -lt $Checkpoints.Count; $i++) {
+ $cp = $Checkpoints[$i]
+ $num = $i + 1
+ Write-Host " [$num] $($cp.LastUpdated.ToString('yyyy-MM-dd HH:mm')) | $($cp.StartDate) to $($cp.EndDate) | $($cp.PartitionsComplete)/$($cp.PartitionsTotal) partitions | $($cp.RecordsSaved.ToString('N0')) records" -ForegroundColor White
+ Write-Host " $($cp.FileName)" -ForegroundColor DarkGray
+ Write-Host ""
+ }
+
+ Write-Host " [Q] Quit (do not resume)" -ForegroundColor Yellow
+ Write-Host ""
+
+ Send-PromptNotification
+ while ($true) {
+ $choice = Read-Host " Enter selection (1-$($Checkpoints.Count)) or 'Q' to quit"
+
+ if ($choice -eq 'Q' -or $choice -eq 'q') {
+ return $null
+ }
+
+ $selection = 0
+ if ([int]::TryParse($choice, [ref]$selection)) {
+ if ($selection -ge 1 -and $selection -le $Checkpoints.Count) {
+ return $Checkpoints[$selection - 1]
+ }
+ }
+
+ Write-Host " Invalid selection. Please enter a number 1-$($Checkpoints.Count) or 'Q' to quit." -ForegroundColor Red
+ }
+}
+
+function Remove-Checkpoint {
+ <#
+ .SYNOPSIS
+ Deletes checkpoint file after successful completion.
+ #>
+
+ if ($script:CheckpointPath -and (Test-Path $script:CheckpointPath)) {
+ try {
+ Remove-Item -Path $script:CheckpointPath -Force
+ }
+ catch {
+ Write-LogHost " Warning: Could not delete checkpoint file: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ $script:CheckpointPath = $null
+ $script:CheckpointData = $null
+}
+
+function Get-PartitionsToProcess {
+ <#
+ .SYNOPSIS
+ Categorizes partitions based on checkpoint state for resume.
+ .PARAMETER AllPartitions
+ Array of all partition objects for the date range.
+ .OUTPUTS
+ Hashtable with ToSkip, ToFetchOnly, and ToCreateAndFetch arrays.
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [array]$AllPartitions
+ )
+
+ $result = @{
+ ToSkip = @() # Already completed - skip entirely
+ ToFetchOnly = @() # Query exists on server - just fetch records
+ ToCreateAndFetch = @() # Start fresh - create query then fetch
+ }
+
+ if (-not $script:CheckpointData) {
+ # No checkpoint - all partitions need full processing
+ $result.ToCreateAndFetch = $AllPartitions
+ return $result
+ }
+
+ $completedIndices = @{}
+ $queryCreatedIndices = @{}
+
+ # Build lookup tables from checkpoint (use string keys for reliable comparison)
+ foreach ($cp in $script:CheckpointData.partitions.completed) {
+ $completedIndices["$($cp.index)"] = $cp
+ }
+ foreach ($qc in $script:CheckpointData.partitions.queryCreated) {
+ $queryCreatedIndices["$($qc.index)"] = $qc
+ }
+
+ # Categorize each partition
+ foreach ($partition in $AllPartitions) {
+ $idx = "$($partition.Index)" # Convert to string for reliable comparison
+
+ if ($completedIndices.ContainsKey($idx)) {
+ $result.ToSkip += $partition
+ }
+ elseif ($queryCreatedIndices.ContainsKey($idx)) {
+ # Add QueryId to partition for fetch-only processing
+ $partition | Add-Member -NotePropertyName 'StoredQueryId' -NotePropertyValue $queryCreatedIndices[$idx].queryId -Force
+ $result.ToFetchOnly += $partition
+ }
+ else {
+ $result.ToCreateAndFetch += $partition
+ }
+ }
+
+ return $result
+}
+
+function Test-ShouldPromptTokenRefresh {
+ <#
+ .SYNOPSIS
+ Checks if token refresh handling is needed.
+ .DESCRIPTION
+ Reactive detection: Returns true only when a 401 Unauthorized error
+ has been detected, indicating the token has actually expired.
+ No proactive time-based prompts - only triggers on real auth failures.
+
+ For AppRegistration mode, this still returns true on 401 so the
+ auth handling block can perform automatic silent token refresh.
+ For interactive modes, this triggers a user prompt.
+ .OUTPUTS
+ $true if auth failure detected and refresh handling is needed, $false otherwise.
+ #>
+
+ # Reactive detection: Return true when 401 error detected (for ALL auth methods)
+ # The auth handling block will determine whether to auto-refresh (AppRegistration)
+ # or prompt the user (interactive modes)
+ return $script:AuthFailureDetected
+}
+
+function Invoke-TokenRefreshPrompt {
+ <#
+ .SYNOPSIS
+ Handles token refresh for interactive auth modes (WebLogin/DeviceCode).
+ .DESCRIPTION
+ Triggered reactively when a 401 Unauthorized error is detected.
+ First attempts silent token refresh (SDK may have valid refresh token cached).
+ If silent refresh fails, prompts user to re-authenticate or quit.
+ .OUTPUTS
+ 'Refreshed' - Token refreshed successfully (silent or interactive)
+ 'Quit' - User chose to exit
+ #>
+
+ $tokenAge = if ($script:TokenAcquiredTime) { (Get-Date) - $script:TokenAcquiredTime } else { $null }
+
+ # ═══════════════════════════════════════════════════════════════════════════
+ # NOTE: We intentionally do NOT attempt automatic token refresh here.
+ # On Windows, Connect-MgGraph with InteractiveBrowserCredential ALWAYS opens
+ # a browser popup. If the user is away, this popup sits waiting, and when
+ # they return and press 'R' to re-auth, a SECOND popup appears.
+ # By going straight to the user prompt, we ensure only ONE popup when ready.
+ # ═══════════════════════════════════════════════════════════════════════════
+
+ # ═══════════════════════════════════════════════════════════════════════════
+ # Prompt user for interactive re-authentication (single popup, user-initiated)
+ # ═══════════════════════════════════════════════════════════════════════════
+
+ # Get progress info for display
+ $completedCount = if ($script:CheckpointData) { $script:CheckpointData.statistics.partitionsComplete } else { 0 }
+ $totalCount = if ($script:CheckpointData) { $script:CheckpointData.partitions.total } else { 0 }
+ $recordsSaved = if ($script:CheckpointData) { $script:CheckpointData.statistics.totalRecordsSaved } else { 0 }
+
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host " [!] AUTHENTICATION EXPIRED - Re-authentication Required" -ForegroundColor Red
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host ""
+ Write-Host " Authentication failure detected (401 Unauthorized)." -ForegroundColor White
+ Write-Host " Your session token has expired. Please re-authenticate to continue." -ForegroundColor White
+ Write-Host ""
+ if ($tokenAge) {
+ Write-Host " Session duration: $([Math]::Round($tokenAge.TotalMinutes, 0)) minutes" -ForegroundColor Gray
+ }
+ Write-Host " Progress: $completedCount/$totalCount partitions complete | $($recordsSaved.ToString('N0')) records saved to disk" -ForegroundColor White
+ Write-Host " Auth method: $($Auth) (interactive)" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host " [R] Re-authenticate now (recommended)" -ForegroundColor Green
+ Write-Host " [Q] Quit and save progress (resume later with -Resume)" -ForegroundColor Cyan
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+
+ Send-PromptNotification
+ while ($true) {
+ $choice = Read-Host " Enter choice (R/Q)"
+
+ switch ($choice.ToUpper()) {
+ 'R' {
+ Write-Host ""
+ Write-Host " Re-authenticating..." -ForegroundColor Cyan
+
+ try {
+ # Disconnect and reconnect
+ Disconnect-MgGraph -ErrorAction SilentlyContinue | Out-Null
+ Connect-PurviewAudit -AuthMethod $Auth -UseEOMMode $false
+
+ # Update token timing and reset auth failure flags
+ $script:TokenAcquiredTime = Get-Date
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false # Reset for next auth failure cycle
+
+ Write-Host " Re-authentication successful. Resuming execution..." -ForegroundColor Green
+ Write-Host " Failed partitions will be retried with fresh token." -ForegroundColor Green
+ Write-Host ""
+
+ return 'Refreshed'
+ }
+ catch {
+ Write-Host " ✗ Re-authentication failed: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host " Please try again or quit." -ForegroundColor Yellow
+ }
+ }
+ 'Q' {
+ Write-Host ""
+ Show-CheckpointExitMessage
+ return 'Quit'
+ }
+ default {
+ Write-Host " Invalid choice. Please enter R or Q." -ForegroundColor Red
+ }
+ }
+ }
+}
+
+function Merge-IncrementalSaves {
+ <#
+ .SYNOPSIS
+ Merges incremental JSON save files into the main allLogs collection.
+ .DESCRIPTION
+ Called at the end of execution to consolidate partition data that was
+ saved incrementally during execution. In resume mode, only merges data
+ from partitions that were skipped (already completed at start of run),
+ since partitions completed during this run already have data in memory.
+ .PARAMETER AllLogs
+ Reference to the synchronized ArrayList to add records to.
+ .PARAMETER OutputDirectory
+ The output directory containing the .pax_incremental folder.
+ .PARAMETER CleanupAfterMerge
+ If true, deletes the incremental files after successful merge.
+ .PARAMETER OnlyPartitionIndices
+ If specified, only merge files for these partition indices.
+ Used in resume mode to avoid double-counting data from partitions
+ that completed during this run.
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [System.Collections.ArrayList]$AllLogs,
+
+ [Parameter(Mandatory = $true)]
+ [string]$OutputDirectory,
+
+ [Parameter(Mandatory = $false)]
+ [bool]$CleanupAfterMerge = $true,
+
+ [Parameter(Mandatory = $false)]
+ [int[]]$OnlyPartitionIndices = $null
+ )
+
+ $incrementalDir = Join-Path $OutputDirectory ".pax_incremental"
+
+ if (-not (Test-Path $incrementalDir)) {
+ return 0
+ }
+
+ $incrementalFiles = Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue
+
+ if (-not $incrementalFiles -or $incrementalFiles.Count -eq 0) {
+ return 0
+ }
+
+ $mergedCount = 0
+ $filesProcessed = 0
+ $filesSkipped = 0
+
+ $filterMsg = if ($OnlyPartitionIndices) { " (filtering for partitions: $($OnlyPartitionIndices -join ', '))" } else { "" }
+ Write-LogHost " [MERGE] Found $($incrementalFiles.Count) incremental save files$filterMsg..." -ForegroundColor Cyan
+
+ foreach ($file in $incrementalFiles) {
+ try {
+ # If filtering by partition indices, check if this file matches
+ # Filename format: Part{N}_timestamp_qid-{QueryId}_Nrecords.jsonl (recovery files use qid-recovery)
+ if ($OnlyPartitionIndices) {
+ $partMatch = [regex]::Match($file.Name, '^Part(\d+)_')
+ if ($partMatch.Success) {
+ $filePartitionIndex = [int]$partMatch.Groups[1].Value
+ if ($filePartitionIndex -notin $OnlyPartitionIndices) {
+ $filesSkipped++
+ # Don't delete - this file is for a partition completed in this run
+ continue
+ }
+ }
+ }
+
+ # Read JSON Lines (NDJSON) format - one record per line
+ $lines = Get-Content -Path $file.FullName -Encoding utf8
+ $fileRecordCount = 0
+
+ foreach ($line in $lines) {
+ if ([string]::IsNullOrWhiteSpace($line)) { continue }
+
+ try {
+ $record = $line | ConvertFrom-Json
+ [void]$AllLogs.Add($record)
+ $fileRecordCount++
+ } catch {
+ # Skip malformed lines but continue processing
+ Write-Verbose "Skipped malformed line in $($file.Name)"
+ }
+ }
+
+ $mergedCount += $fileRecordCount
+ if ($fileRecordCount -gt 0) { $filesProcessed++ }
+
+ if ($CleanupAfterMerge) {
+ Remove-Item -Path $file.FullName -Force -ErrorAction SilentlyContinue
+ }
+ }
+ catch {
+ Write-LogHost " [WARN] Failed to merge $($file.Name): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Clean up directory if empty
+ if ($CleanupAfterMerge) {
+ $remainingFiles = Get-ChildItem -Path $incrementalDir -ErrorAction SilentlyContinue
+ if (-not $remainingFiles -or $remainingFiles.Count -eq 0) {
+ Remove-Item -Path $incrementalDir -Force -ErrorAction SilentlyContinue
+ }
+ }
+
+ if ($mergedCount -gt 0) {
+ Write-LogHost " [MERGE] Merged $($mergedCount.ToString('N0')) records from $filesProcessed incremental files" -ForegroundColor Green
+ }
+
+ return $mergedCount
+}
+
+function Merge-IncrementalSaves-Streaming {
+ <#
+ .SYNOPSIS
+ Memory-efficient streaming merge of incremental JSONL files directly to CSV.
+ .DESCRIPTION
+ Instead of loading all records into memory, this function streams records
+ from incremental JSONL files directly to the final CSV output using batched
+ writes and explicit garbage collection between files. This prevents memory
+ exhaustion when merging millions of records.
+ .PARAMETER OutputFile
+ The final CSV file path to write merged data to.
+ .PARAMETER OutputDirectory
+ The output directory containing the .pax_incremental folder.
+ .PARAMETER OnlyPartitionIndices
+ If specified, only merge files for these partition indices.
+ .PARAMETER Columns
+ The column schema to use for CSV output. If not specified, uses default 7-column schema.
+ .RETURNS
+ The total number of records merged.
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$OutputFile,
+
+ [Parameter(Mandatory = $true)]
+ [string]$OutputDirectory,
+
+ [Parameter(Mandatory = $false)]
+ [int[]]$OnlyPartitionIndices = $null,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$Columns = $null,
+
+ [Parameter(Mandatory = $false)]
+ [System.Collections.Generic.HashSet[string]]$ExcludeRecordIds = $null,
+
+ [Parameter(Mandatory = $false)]
+ [ref]$ActivityCounts = $null
+ )
+
+ $incrementalDir = Join-Path $OutputDirectory ".pax_incremental"
+
+ if (-not (Test-Path $incrementalDir)) {
+ Write-LogHost " [MERGE-STREAM] No incremental directory found" -ForegroundColor Yellow
+ return 0
+ }
+
+ $allFiles = Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue
+ if (-not $allFiles -or $allFiles.Count -eq 0) {
+ Write-LogHost " [MERGE-STREAM] No incremental files found" -ForegroundColor Yellow
+ return 0
+ }
+
+ # Sort files by partition number for consistent output ordering
+ $files = $allFiles | Sort-Object {
+ if ($_.Name -match 'Part(\d+)_') { [int]$Matches[1] } else { 999999 }
+ }
+
+ # Filter by partition indices if specified
+ if ($OnlyPartitionIndices) {
+ $files = $files | Where-Object {
+ $partMatch = [regex]::Match($_.Name, '^Part(\d+)_')
+ if ($partMatch.Success) {
+ [int]$partMatch.Groups[1].Value -in $OnlyPartitionIndices
+ } else {
+ $false
+ }
+ }
+ }
+
+ if (-not $files -or @($files).Count -eq 0) {
+ Write-LogHost " [MERGE-STREAM] No matching incremental files for specified partitions" -ForegroundColor Yellow
+ return 0
+ }
+
+ $fileCount = @($files).Count
+ Write-LogHost " [MERGE-STREAM] Streaming $fileCount incremental files to CSV..." -ForegroundColor Cyan
+
+ # Use default 7-column schema if not specified (non-explosion mode)
+ if (-not $Columns) {
+ $Columns = @('RecordId', 'CreationDate', 'RecordType', 'Operation', 'AuditData', 'AssociatedAdminUnits', 'AssociatedAdminUnitsNames')
+ }
+
+ $totalMerged = 0
+ $filesProcessed = 0
+ $headerWritten = $false
+ $batchSize = 5000
+ $startTime = Get-Date
+ $lastProgressTime = Get-Date
+
+ # Track seen RecordIds for deduplication (seed with any in-memory RecordIds already written to CSV)
+ $seenIds = if ($ExcludeRecordIds) { New-Object System.Collections.Generic.HashSet[string] ($ExcludeRecordIds) } else { New-Object System.Collections.Generic.HashSet[string] }
+ $duplicatesSkipped = 0
+
+ foreach ($file in $files) {
+ $filesProcessed++
+ $partNum = if ($file.Name -match 'Part(\d+)_') { $Matches[1] } else { "?" }
+
+ try {
+ # Open CSV writer on first file with data
+ if (-not $headerWritten) {
+ Open-CsvWriter -Path $OutputFile -Columns $Columns
+ $headerWritten = $true
+ }
+
+ # Stream records from this file in batches
+ $batch = New-Object System.Collections.Generic.List[object]
+ $fileRecords = 0
+
+ Get-Content -Path $file.FullName -Encoding utf8 | ForEach-Object {
+ if (-not [string]::IsNullOrWhiteSpace($_)) {
+ try {
+ $record = $_ | ConvertFrom-Json
+
+ # Deduplicate by RecordId
+ $recordId = $null
+ if ($record.Identity) { $recordId = $record.Identity }
+ elseif ($record.Id) { $recordId = $record.Id }
+ elseif ($record.RecordId) { $recordId = $record.RecordId }
+
+ if ($recordId -and $seenIds.Contains($recordId)) {
+ $script:StreamingMergeDuplicatesSkipped++
+ return # Skip duplicate
+ }
+ if ($recordId) { [void]$seenIds.Add($recordId) }
+
+ # Parse AuditData for Operation if needed
+ $auditData = $record.AuditData
+ $parsedAudit = if ($record.PSObject.Properties['_ParsedAuditData']) {
+ $record._ParsedAuditData
+ } else {
+ try { $auditData | ConvertFrom-Json -ErrorAction SilentlyContinue } catch { $null }
+ }
+ $opValue = if ($parsedAudit -and $parsedAudit.Operation) { $parsedAudit.Operation } else { $record.Operations }
+
+ # Track per-activity counts for Activity Type Breakdown
+ if ($ActivityCounts -and $opValue) {
+ if (-not $ActivityCounts.Value.ContainsKey($opValue)) { $ActivityCounts.Value[$opValue] = 0 }
+ $ActivityCounts.Value[$opValue]++
+ }
+
+ # Create normalized record matching expected schema
+ $normalizedRecord = [pscustomobject]@{
+ RecordId = if ($record.RecordId) { $record.RecordId } elseif ($record.Identity) { $record.Identity } elseif ($record.Id) { $record.Id } elseif ($parsedAudit -and $parsedAudit.Id) { $parsedAudit.Id } else { $null }
+ CreationDate = if ($record.CreationDate) {
+ $dt = script:Parse-DateSafe $record.CreationDate; if ($dt) { $dt.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { $record.CreationDate }
+ } else { '' }
+ RecordType = $record.RecordType
+ Operation = $opValue
+ AuditData = $auditData
+ AssociatedAdminUnits = $record.AssociatedAdminUnits
+ AssociatedAdminUnitsNames = $record.AssociatedAdminUnitsNames
+ }
+
+ $batch.Add($normalizedRecord)
+ $fileRecords++
+
+ # Write batch when full
+ if ($batch.Count -ge $batchSize) {
+ Write-CsvRows -Rows $batch -Columns $Columns
+ $totalMerged += $batch.Count
+ $batch.Clear()
+ }
+ } catch {
+ # Skip malformed lines
+ Write-Verbose "Skipped malformed line in $($file.Name): $($_.Exception.Message)"
+ }
+ }
+ }
+
+ # Flush remaining batch for this file
+ if ($batch.Count -gt 0) {
+ Write-CsvRows -Rows $batch -Columns $Columns
+ $totalMerged += $batch.Count
+ $batch.Clear()
+ }
+
+ # Progress reporting every 30 seconds
+ $now = Get-Date
+ if (($now - $lastProgressTime).TotalSeconds -ge 30) {
+ $elapsed = ($now - $startTime).TotalSeconds
+ $rate = if ($elapsed -gt 0) { [int]($totalMerged / $elapsed) } else { 0 }
+ Write-LogHost " [MERGE-STREAM] Progress: $filesProcessed/$fileCount files | $($totalMerged.ToString('N0')) records | ~$rate rec/sec" -ForegroundColor DarkCyan
+ $lastProgressTime = $now
+ }
+
+ # Explicit garbage collection between files to release memory
+ $batch = $null
+ [GC]::Collect()
+ [GC]::WaitForPendingFinalizers()
+
+ } catch {
+ Write-LogHost " [WARN] Failed to stream merge $($file.Name): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Close CSV writer
+ if ($headerWritten) {
+ Close-CsvWriter
+ }
+
+ # Final stats
+ $totalElapsed = (Get-Date) - $startTime
+ $finalRate = if ($totalElapsed.TotalSeconds -gt 0) { [int]($totalMerged / $totalElapsed.TotalSeconds) } else { 0 }
+
+ Write-LogHost " [MERGE-STREAM] Streaming merge complete: $($totalMerged.ToString('N0')) records from $filesProcessed files" -ForegroundColor Green
+ Write-LogHost " [MERGE-STREAM] Time: $([Math]::Round($totalElapsed.TotalSeconds, 1))s | Rate: $finalRate rec/sec" -ForegroundColor DarkGray
+ if ($duplicatesSkipped -gt 0 -or $script:StreamingMergeDuplicatesSkipped -gt 0) {
+ $totalDupes = $duplicatesSkipped + $script:StreamingMergeDuplicatesSkipped
+ Write-LogHost " [MERGE-STREAM] Duplicates skipped: $totalDupes" -ForegroundColor DarkGray
+ }
+
+ # Clear the HashSet to free memory
+ $seenIds.Clear()
+ $seenIds = $null
+ [GC]::Collect()
+
+ return $totalMerged
+}
+
+function Show-CheckpointExitMessage {
+ <#
+ .SYNOPSIS
+ Displays checkpoint save confirmation and resume instructions.
+ #>
+
+ if (-not $script:CheckpointData -or -not $script:CheckpointPath) {
+ return
+ }
+
+ $completedCount = if ($script:CheckpointData.statistics.partitionsComplete) { $script:CheckpointData.statistics.partitionsComplete } else { 0 }
+ $queryCreatedCount = if ($script:CheckpointData.statistics.partitionsQueryCreated) { $script:CheckpointData.statistics.partitionsQueryCreated } else { 0 }
+ $totalCount = if ($script:CheckpointData.partitions.total) { $script:CheckpointData.partitions.total } else { 0 }
+ $remaining = $totalCount - $completedCount - $queryCreatedCount
+ $recordsSaved = if ($script:CheckpointData.statistics.totalRecordsSaved) { $script:CheckpointData.statistics.totalRecordsSaved } else { 0 }
+
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Green
+ Write-Host " PROGRESS SAVED" -ForegroundColor Green
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Green
+ Write-Host ""
+ Write-Host " Checkpoint: $(Split-Path $script:CheckpointPath -Leaf)" -ForegroundColor White
+ Write-Host " Partial data: $(if ($script:PartialOutputPath) { Split-Path $script:PartialOutputPath -Leaf } else { '(incremental saves in .pax_incremental/)' })" -ForegroundColor White
+ Write-Host " Records saved: $($recordsSaved.ToString('N0'))" -ForegroundColor White
+ Write-Host " Partitions: $completedCount/$totalCount complete" -NoNewline -ForegroundColor White
+ if ($queryCreatedCount -gt 0) {
+ Write-Host ", $queryCreatedCount queries pending" -NoNewline -ForegroundColor White
+ }
+ if ($remaining -gt 0) {
+ Write-Host ", $remaining not started" -ForegroundColor White
+ }
+ else {
+ Write-Host "" -ForegroundColor White
+ }
+ Write-Host ""
+ Write-Host " To resume later:" -ForegroundColor Cyan
+ Write-Host " -Resume -OutputPath `"$(Split-Path $script:CheckpointPath -Parent)`"" -ForegroundColor White
+ Write-Host ""
+ Write-Host " Or with explicit checkpoint file:" -ForegroundColor Cyan
+ Write-Host " -Resume `"$($script:CheckpointPath)`"" -ForegroundColor White
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Green
+}
+
+function Complete-CheckpointRun {
+ <#
+ .SYNOPSIS
+ Finalizes successful run: renames _PARTIAL file, deletes checkpoint.
+ .PARAMETER FinalOutputPath
+ The final output path (without _PARTIAL).
+ #>
+ param(
+ [Parameter(Mandatory)]
+ [string]$FinalOutputPath
+ )
+
+ if (-not $script:PartialOutputPath -or -not (Test-Path $script:PartialOutputPath)) {
+ return
+ }
+
+ try {
+ # Rename _PARTIAL to final
+ if (Test-Path $FinalOutputPath) {
+ # Final file already exists - add timestamp to avoid overwrite
+ $dir = Split-Path $FinalOutputPath -Parent
+ $name = [System.IO.Path]::GetFileNameWithoutExtension($FinalOutputPath)
+ $ext = [System.IO.Path]::GetExtension($FinalOutputPath)
+ $timestamp = Get-Date -Format 'yyyyMMdd_HHmmss'
+ $FinalOutputPath = Join-Path $dir "${name}_${timestamp}${ext}"
+ }
+
+ Move-Item -Path $script:PartialOutputPath -Destination $FinalOutputPath -Force
+
+ # Rename log file (remove _PARTIAL suffix)
+ $partialLogPath = $script:LogFile
+ if ($partialLogPath -and (Test-Path $partialLogPath) -and $partialLogPath -match '_PARTIAL\.log$') {
+ $finalLogPath = $partialLogPath -replace '_PARTIAL\.log$', '.log'
+ if (Test-Path $finalLogPath) {
+ $logDir = Split-Path $finalLogPath -Parent
+ $logName = [System.IO.Path]::GetFileNameWithoutExtension($finalLogPath)
+ $timestamp = Get-Date -Format 'yyyyMMdd_HHmmss'
+ $finalLogPath = Join-Path $logDir "${logName}_${timestamp}.log"
+ }
+ Move-Item -Path $partialLogPath -Destination $finalLogPath -Force
+ $script:LogFile = $finalLogPath
+ }
+
+ # Delete checkpoint
+ Remove-Checkpoint
+
+ $script:PartialOutputPath = $null
+ }
+ catch {
+ Write-LogHost " Warning: Could not finalize output file: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+}
+
+function Get-UserLicenseData {
+ <#
+ .SYNOPSIS
+ Fetches user license information from Microsoft Graph API.
+
+ .DESCRIPTION
+ Queries Graph API for subscribedSkus and builds lookup hashtables for:
+ - User license assignments (userId -> list of SKU names)
+ - Copilot license detection (userId -> has Copilot license)
+
+ Uses two-tier Copilot license detection:
+ 1. Checks known Copilot SKU IDs (hardcoded list from $script:CopilotSkuIds)
+ 2. Pattern matches SKU names containing "Copilot" (catches new/promotional variants)
+
+ .OUTPUTS
+ Hashtable with two keys:
+ - UserLicenses: @{userId = @('SKU1', 'SKU2')}
+ - UserHasCopilot: @{userId = $true/$false}
+ #>
+
+ Write-LogHost ""
+ Write-LogHost "Fetching license data from Microsoft Graph API..." -ForegroundColor Cyan
+
+ try {
+ # Fetch all subscribed SKUs with assigned user data
+ $uri = "https://graph.microsoft.com/v1.0/subscribedSkus"
+ $response = Invoke-MgGraphRequest -Method GET -Uri $uri -ErrorAction Stop
+ $skus = $response.value
+
+ Write-LogHost " Found $($skus.Count) SKU(s) in tenant" -ForegroundColor Gray
+
+ # Build SKU lookup: skuId -> skuPartNumber
+ $skuLookup = @{}
+ $copilotSkuIds = $script:CopilotSkuIds.Keys
+
+ foreach ($sku in $skus) {
+ $skuId = $sku.skuId
+ $skuName = $sku.skuPartNumber
+ $skuLookup[$skuId] = $skuName
+ }
+
+ # Build user license hashtables
+ $userLicenses = @{} # userId -> @('SKU1', 'SKU2')
+ $userHasCopilot = @{} # userId -> $true/$false
+
+ # Fetch all users with licenses in batches
+ Write-LogHost " Fetching user license assignments..." -ForegroundColor Gray
+ $userUri = "https://graph.microsoft.com/v1.0/users?`$select=id,userPrincipalName,assignedLicenses&`$top=999"
+ $userCount = 0
+ $copilotUserCount = 0
+
+ do {
+ $userResponse = Invoke-MgGraphRequest -Method GET -Uri $userUri -ErrorAction Stop
+ $users = $userResponse.value
+
+ foreach ($user in $users) {
+ $userId = $user.id
+ $upn = $user.userPrincipalName
+
+ if (-not $userId -or -not $upn) { continue }
+
+ $userCount++
+ $licenses = @()
+ $hasCopilot = $false
+
+ foreach ($license in $user.assignedLicenses) {
+ $skuId = $license.skuId
+
+ # Get friendly SKU name
+ $skuName = if ($skuLookup.ContainsKey($skuId)) {
+ $skuLookup[$skuId]
+ } else {
+ $skuId # Fallback to GUID if not in lookup
+ }
+
+ $licenses += $skuName
+
+ # Check if this is a Copilot license (two-tier detection)
+ $isCopilotSku = $copilotSkuIds -contains $skuId
+ $isCopilotName = $skuName -like "*Copilot*"
+
+ if ($isCopilotSku -or $isCopilotName) {
+ $hasCopilot = $true
+ }
+ }
+
+ # Store license data keyed by both ID and UPN for flexible lookup
+ if ($licenses.Count -gt 0) {
+ $userLicenses[$userId] = $licenses
+ $userLicenses[$upn] = $licenses
+ }
+
+ $userHasCopilot[$userId] = $hasCopilot
+ $userHasCopilot[$upn] = $hasCopilot
+
+ if ($hasCopilot) {
+ $copilotUserCount++
+ }
+ }
+
+ $userUri = $userResponse.'@odata.nextLink'
+ } while ($userUri)
+
+ Write-LogHost " Processed $userCount user(s) with license assignments" -ForegroundColor Gray
+ Write-LogHost " Detected $copilotUserCount user(s) with Copilot licenses" -ForegroundColor Green
+ Write-LogHost ""
+
+ return @{
+ UserLicenses = $userLicenses
+ UserHasCopilot = $userHasCopilot
+ }
+ }
+ catch {
+ Write-LogHost "WARNING: Failed to fetch license data: $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost " License columns will be empty in export" -ForegroundColor Yellow
+ Write-LogHost ""
+
+ # Return empty hashtables on failure
+ return @{
+ UserLicenses = @{}
+ UserHasCopilot = @{}
+ }
+ }
+}
+
+function ConvertTo-FlatEntraUsers {
+ <#
+ .SYNOPSIS
+ Flattens Entra user objects into CSV-friendly format.
+
+ .DESCRIPTION
+ Converts Entra ID user objects with nested properties into flat tabular format.
+ Filters out non-user accounts (rooms, resources) based on userType validation.
+ Explodes arrays (proxyAddresses, manager) into individual columns.
+
+ NOTE: This version excludes the 9 granular Entra license columns from the Graph script.
+ License data is added separately via Get-UserLicenseData() in MAC format (assignedLicenses, hasLicense).
+
+ .PARAMETER Users
+ Array of user objects from Microsoft Graph API (with 35 properties + manager expansion).
+
+ .OUTPUTS
+ Array of PSCustomObjects with 37 flattened columns (35 user properties + 5 manager columns, no license columns yet).
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [array]$Users
+ )
+
+ $flattenedUsers = @()
+
+ foreach ($user in $Users) {
+ # Filter: Only include real user accounts (exclude rooms, resources, shared mailboxes)
+ # Room and resource mailboxes have specific characteristics:
+ # - userType is often null or not "Member"/"Guest"
+ # - They typically lack givenName and surname
+ # - They often have mail but no userPrincipalName with typical user format
+
+ $userTypeValue = $user.userType
+
+ # Skip if userType is null/empty (likely a room or resource)
+ if ([string]::IsNullOrWhiteSpace($userTypeValue)) {
+ continue
+ }
+
+ # Only include users with userType = "Member" or "Guest"
+ # Rooms/resources typically have different userType values or null
+ if ($userTypeValue -ne 'Member' -and $userTypeValue -ne 'Guest') {
+ continue
+ }
+
+ # Additional heuristic: Real users typically have either givenName or surname
+ # Room mailboxes typically have neither (only displayName)
+ # This is not foolproof but combined with userType check, it's quite reliable
+ $hasGivenName = -not [string]::IsNullOrWhiteSpace($user.givenName)
+ $hasSurname = -not [string]::IsNullOrWhiteSpace($user.surname)
+
+ # If user has Member/Guest type but no name components, might be a shared resource
+ # Allow through if they have at least givenName OR surname OR if account is enabled
+ # (most room mailboxes are enabled but lack name components)
+ if (-not $hasGivenName -and -not $hasSurname -and $user.accountEnabled) {
+ # Additional check: if they have licenses assigned, likely a real user
+ if (-not $user.assignedLicenses -or $user.assignedLicenses.Count -eq 0) {
+ # No licenses and no name components - likely a room/resource
+ continue
+ }
+ }
+
+ $flatUser = [ordered]@{}
+
+ # Core Identity Properties (simple strings)
+ $flatUser['userPrincipalName'] = $user.userPrincipalName
+ $flatUser['DisplayName'] = $user.displayName
+ $flatUser['id'] = $user.id
+ $flatUser['Email'] = $user.mail
+ $flatUser['givenName'] = $user.givenName
+ $flatUser['surname'] = $user.surname
+
+ # Job Properties
+ $flatUser['JobTitle'] = $user.jobTitle
+ $flatUser['department'] = $user.department
+ $flatUser['employeeType'] = $user.employeeType
+ $flatUser['employeeId'] = $user.employeeId
+ $flatUser['employeeHireDate'] = $user.employeeHireDate
+
+ # Location Properties
+ $flatUser['officeLocation'] = $user.officeLocation
+ $flatUser['city'] = $user.city
+ $flatUser['state'] = $user.state
+ $flatUser['Country'] = $user.country
+ $flatUser['postalCode'] = $user.postalCode
+ $flatUser['companyName'] = $user.companyName
+
+ # Organizational Properties
+ $flatUser['employeeOrgData_division'] = if ($user.employeeOrgData) { $user.employeeOrgData.division } else { $null }
+ $flatUser['employeeOrgData_costCenter'] = if ($user.employeeOrgData) { $user.employeeOrgData.costCenter } else { $null }
+
+ # Status Properties
+ $flatUser['accountEnabled'] = $user.accountEnabled
+ $flatUser['userType'] = $user.userType
+ $flatUser['createdDateTime'] = $user.createdDateTime
+
+ # Usage Properties
+ $flatUser['usageLocation'] = $user.usageLocation
+ $flatUser['preferredLanguage'] = $user.preferredLanguage
+
+ # Sync Properties
+ $flatUser['onPremisesSyncEnabled'] = $user.onPremisesSyncEnabled
+ $flatUser['onPremisesImmutableId'] = $user.onPremisesImmutableId
+ $flatUser['externalUserState'] = $user.externalUserState
+
+ # Explode proxyAddresses array (Email aliases)
+ if ($user.proxyAddresses -and $user.proxyAddresses.Count -gt 0) {
+ $primarySMTP = $user.proxyAddresses | Where-Object { $_ -like 'SMTP:*' } | Select-Object -First 1
+ $flatUser['proxyAddresses_Primary'] = if ($primarySMTP) { $primarySMTP -replace '^SMTP:', '' } else { $null }
+ $flatUser['proxyAddresses_Count'] = $user.proxyAddresses.Count
+ $flatUser['proxyAddresses_All'] = ($user.proxyAddresses -join '; ')
+ }
+ else {
+ $flatUser['proxyAddresses_Primary'] = $null
+ $flatUser['proxyAddresses_Count'] = 0
+ $flatUser['proxyAddresses_All'] = $null
+ }
+
+ # Handle manager object separately (flatten to individual columns)
+ if ($user.manager) {
+ $flatUser['manager_id'] = $user.manager.id
+ $flatUser['manager_displayName'] = $user.manager.displayName
+ $flatUser['manager_userPrincipalName'] = $user.manager.userPrincipalName
+ $flatUser['manager_mail'] = $user.manager.mail
+ $flatUser['manager_jobTitle'] = $user.manager.jobTitle
+ }
+ else {
+ $flatUser['manager_id'] = $null
+ $flatUser['manager_displayName'] = $null
+ $flatUser['manager_userPrincipalName'] = $null
+ $flatUser['manager_mail'] = $null
+ $flatUser['manager_jobTitle'] = $null
+ }
+
+ # License columns will be added separately by Get-EntraUsersData()
+ # using Get-UserLicenseData() to provide MAC-format columns:
+ # - assignedLicenses (semicolon-separated SKU names)
+ # - hasLicense (Copilot detection boolean)
+
+ # =====================================================================
+ # Power BI AI-in-One Dashboard 2701 Template Compatibility Columns
+ # These alias columns map existing Graph API data to 2701 template column names
+ # =====================================================================
+ $flatUser['ManagerID'] = $flatUser['manager_id']
+ $flatUser['BusinessAreaLabel'] = $flatUser['employeeOrgData_division']
+ $flatUser['CountryofEmployment'] = $flatUser['Country']
+ $flatUser['CompanyCodeLabel'] = $flatUser['companyName']
+ $flatUser['CostCentreLabel'] = $flatUser['employeeOrgData_costCenter']
+ $flatUser['UserName'] = $flatUser['DisplayName']
+
+ # Viva Insights-specific columns (not available from Microsoft Graph API)
+ # These are placeholders for template compatibility - data must come from HR systems
+ $flatUser['EffectiveDate'] = $null
+ $flatUser['FunctionType'] = $null
+ $flatUser['BusinessAreaCode'] = $null
+ $flatUser['OrgLevel_3Label'] = $null
+
+ # Convert ordered hashtable to PSCustomObject for proper CSV export
+ $flattenedUsers += [PSCustomObject]$flatUser
+ }
+
+ return $flattenedUsers
+}
+
+function Get-EntraUsersData {
+ <#!
+ .SYNOPSIS
+ Collects and flattens Entra ID (Azure AD) user directory data and enriches with MAC-format license info.
+
+ .DESCRIPTION
+ # New naming: Purview_Audit_CombinedUsageActivity[_EntraUsers]_timestamp.xlsx
+ $baseName = "Purview_Audit_CombinedUsageActivity"
+ if ($IncludeUserInfo -and -not $UseEOM) { $baseName += "_EntraUsers" }
+ $excelDescriptor = if ($IncludeUserInfo -and -not $UseEOM) { 'multi-tab workbook (CombinedActivity + EntraUsers)' } else { 'single-tab workbook' }
+ Write-LogHost "Output File: ${outputDir}${baseName}_.xlsx ($excelDescriptor)" -ForegroundColor White
+ Filters out non-user principals (rooms/resources) using userType + name heuristics identical to ConvertTo-FlatEntraUsers.
+ Flattens users via ConvertTo-FlatEntraUsers, then appends two MAC-aligned columns:
+ • assignedLicenses (semicolon-separated SKU `skuPartNumber` names)
+ • hasLicense (Copilot license boolean; renamed from UserHasCopilotLicense)
+
+ License enrichment uses existing Get-UserLicenseData() hashtables (UserLicenses, UserHasCopilot).
+ Only called when -IncludeUserInfo is specified (Graph API mode).
+
+ .OUTPUTS
+ Array[psobject] of flattened users (30 core columns + 5 manager columns + 2 license columns = 37 total).
+
+ .NOTES
+ If Graph API call fails, returns empty array with warning. License enrichment silently skips if lookup missing.
+ #>
+ param(
+ [switch]$Quiet
+ )
+
+ $entraUsers = @()
+ try {
+ if (-not $Quiet) { Write-LogHost "Fetching Entra user directory (35 properties + manager)..." -ForegroundColor Cyan }
+
+ # Properties mirrored from Graph script (excluding license arrays we purposefully omit)
+ $entraUserSelect = @(
+ 'userPrincipalName','displayName','id','mail','givenName','surname','jobTitle','department','employeeType','employeeId','employeeHireDate',
+ 'officeLocation','city','state','country','postalCode','companyName','accountEnabled','userType','createdDateTime','usageLocation',
+ 'preferredLanguage','onPremisesSyncEnabled','onPremisesImmutableId','externalUserState','employeeOrgData','proxyAddresses'
+ ) -join ','
+
+ $baseUri = "https://graph.microsoft.com/v1.0/users?`$select=$entraUserSelect&`$expand=manager&`$top=999"
+ $nextLink = $baseUri
+ $rawUsers = @()
+ $loops = 0
+ while ($nextLink) {
+ $loops++
+ $resp = Invoke-GraphRequest -Uri $nextLink -Method GET -ErrorAction Stop
+ if ($resp.value) { $rawUsers += $resp.value }
+ $nextLink = $resp.'@odata.nextLink'
+ if ($loops -gt 2000) { throw "Safety abort: excessive paging (>2000)" }
+ }
+
+ if (-not $Quiet) { Write-LogHost " Retrieved $($rawUsers.Count) raw user objects" -ForegroundColor Gray }
+ $flattened = ConvertTo-FlatEntraUsers -Users $rawUsers
+ if (-not $Quiet) { Write-LogHost " Flattened to $($flattened.Count) user rows (filtered)" -ForegroundColor Gray }
+
+ # License enrichment (MAC-format columns)
+ $licenseData = $script:LicenseData
+ foreach ($u in $flattened) {
+ $upn = $u.userPrincipalName
+ $assignedNames = $null
+ $hasCopilot = $false
+ if ($licenseData) {
+ # lookup by UPN then id for flexibility
+ if ($licenseData.UserLicenses.ContainsKey($upn)) {
+ $assignedNames = ($licenseData.UserLicenses[$upn] -join ';')
+ } elseif ($licenseData.UserLicenses.ContainsKey($u.id)) {
+ $assignedNames = ($licenseData.UserLicenses[$u.id] -join ';')
+ }
+ if ($licenseData.UserHasCopilot.ContainsKey($upn)) {
+ $hasCopilot = [bool]$licenseData.UserHasCopilot[$upn]
+ } elseif ($licenseData.UserHasCopilot.ContainsKey($u.id)) {
+ $hasCopilot = [bool]$licenseData.UserHasCopilot[$u.id]
+ }
+ }
+ Add-Member -InputObject $u -NotePropertyName 'assignedLicenses' -NotePropertyValue $assignedNames -Force
+ Add-Member -InputObject $u -NotePropertyName 'HasLicense' -NotePropertyValue $hasCopilot -Force
+ }
+ $entraUsers = $flattened
+ # Validate schema (non-fatal)
+ try { Test-EntraUsersSchema -Users $entraUsers -Quiet:$Quiet } catch { }
+ }
+ catch {
+ Write-LogHost "WARNING: Failed to collect Entra user directory: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ return $entraUsers
+}
+
+# ==============================================
+# GRAPH API QUERY FUNCTIONS
+# ==============================================
+# REST-based audit log query functions for Microsoft Graph Security API
+
+function Test-Is429 {
+ <#
+ .SYNOPSIS
+ Safely detects 429 (Too Many Requests) throttling errors.
+
+ .DESCRIPTION
+ Provides null-safe detection of 429 throttling responses from Graph API.
+ Handles PowerShell 7+ variations where .Response property may be null.
+
+ Three-layer fallback strategy:
+ 1. Check .Response.StatusCode (when Response object exists)
+ 2. Check .Exception.Response.StatusCode directly (PS7+ pattern)
+ 3. Parse error message for '429' string (final fallback)
+
+ .PARAMETER Exception
+ The caught exception object from try/catch block
+
+ .OUTPUTS
+ $true if 429 throttling detected, $false otherwise
+
+ .EXAMPLE
+ try {
+ Invoke-RestMethod -Uri $uri -Headers $headers
+ }
+ catch {
+ if (Test-Is429 -Exception $_) {
+ Start-Sleep -Seconds 60
+ }
+ }
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [System.Management.Automation.ErrorRecord]$Exception
+ )
+
+ # Layer 1: Check .Response.StatusCode (traditional method)
+ if ($Exception.Exception.Response -and $Exception.Exception.Response.StatusCode) {
+ if ($Exception.Exception.Response.StatusCode -eq 429 -or $Exception.Exception.Response.StatusCode -eq 'TooManyRequests') {
+ return $true
+ }
+ }
+
+ # Layer 2: Check .Exception.Response.StatusCode directly (PS7+ sometimes skips wrapper)
+ if ($Exception.Exception.Response.StatusCode) {
+ if ($Exception.Exception.Response.StatusCode.value__ -eq 429) {
+ return $true
+ }
+ }
+
+ # Layer 3: Parse error message as final fallback
+ $errorMessage = $Exception.Exception.Message
+ if ($errorMessage -match '429' -or $errorMessage -match 'Too Many Requests' -or $errorMessage -match 'TooManyRequests') {
+ return $true
+ }
+
+ return $false
+}
+
+function Invoke-GraphAuditQuery {
+ <#
+ .SYNOPSIS
+ Creates a new audit log query in Microsoft Graph Security API.
+
+ .DESCRIPTION
+ Submits an audit log query request to Microsoft Graph Security API.
+ Returns a query ID that can be used to poll for status and retrieve results.
+
+ The Graph API uses an asynchronous query model:
+ 1. Submit query (this function) - returns queryId
+ 2. Poll query status - wait for "succeeded" state
+ 3. Retrieve records - paginated results
+
+ .PARAMETER DisplayName
+ Friendly name for the query (for tracking purposes)
+
+ .PARAMETER StartDate
+ Start date/time for audit log query (ISO 8601 format)
+
+ .PARAMETER EndDate
+ End date/time for audit log query (ISO 8601 format)
+
+ .PARAMETER Operations
+ Array of operation types to query (e.g., 'CopilotInteraction')
+
+ .PARAMETER RecordTypes
+ Optional record type filters to include in the Graph query body (passthrough).
+
+ .PARAMETER ServiceTypes
+ Optional service/workload filters to include in the Graph query body (passthrough).
+
+ .OUTPUTS
+ Query ID string if successful, $null if failed
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$DisplayName,
+
+ [Parameter(Mandatory = $true)]
+ [Alias('FilterStartDateTime')]
+ [datetime]$StartDate,
+
+ [Parameter(Mandatory = $true)]
+ [Alias('FilterEndDateTime')]
+ [datetime]$EndDate,
+
+ [Parameter(Mandatory = $false)]
+ [Alias('OperationFilters')]
+ [string[]]$Operations,
+
+ [Parameter(Mandatory = $false)]
+ [Alias('RecordTypeFilters')]
+ [string[]]$RecordTypes,
+
+ [Parameter(Mandatory = $false)]
+ [Alias('ServiceFilter')]
+ [string[]]$ServiceTypes
+ )
+
+ try {
+ # Format dates to ISO 8601 format required by Graph API
+ $startDateStr = $StartDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ $endDateStr = $EndDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+
+ # Build request body
+ $body = @{
+ displayName = $DisplayName
+ filterStartDateTime = $startDateStr
+ filterEndDateTime = $endDateStr
+ }
+
+ # Fail-safe sanitizer: If operations include M365 usage ops, drop record/service filters
+ try {
+ $usageOps = $script:m365UsageActivityBundle
+ if (-not $usageOps) { $usageOps = $m365UsageActivityBundle }
+ $hasUsageOps = $false
+ if ($Operations -and $usageOps) {
+ $opsLower = @($Operations | ForEach-Object { $_.ToLowerInvariant() })
+ $usageLower = @($usageOps | ForEach-Object { $_.ToLowerInvariant() })
+ $hasUsageOps = ($opsLower | Where-Object { $usageLower -contains $_ }) | Select-Object -First 1
+ }
+ if ($hasUsageOps) {
+ $RecordTypes = $null
+ $ServiceTypes = $null
+ }
+ } catch { }
+
+ # Add operation filters if specified
+ if ($Operations -and $Operations.Count -gt 0) {
+ $body.operationFilters = @($Operations)
+ }
+
+ # Add optional record/service filters (passthrough from caller)
+ if ($RecordTypes -and $RecordTypes.Count -gt 0) {
+ $body.recordTypeFilters = @($RecordTypes)
+ }
+
+ if ($ServiceTypes -and $ServiceTypes.Count -gt 0) {
+ $body.serviceFilter = $ServiceTypes[0]
+ }
+
+ # Log query details for troubleshooting (persisted to run log)
+ Write-LogHost "[INFO] Graph API Query Body:" -ForegroundColor Magenta
+ if ($Operations -and $Operations.Count -gt 0) {
+ Write-LogHost " operationFilters: $($Operations -join ', ')" -ForegroundColor DarkGray
+ }
+ if ($RecordTypes -and $RecordTypes.Count -gt 0) {
+ Write-LogHost " recordTypeFilters: $($RecordTypes -join ', ')" -ForegroundColor DarkGray
+ }
+ if ($ServiceTypes -and $ServiceTypes.Count -gt 0) {
+ Write-LogHost " serviceFilter: $($ServiceTypes[0])" -ForegroundColor DarkGray
+ }
+ $bodyJson = $body | ConvertTo-Json -Depth 10
+ Write-LogHost $bodyJson -ForegroundColor DarkGray
+
+ # Submit query via Graph API (auto-detects v1.0 or beta)
+ $uri = Get-GraphAuditApiUri -Path 'queries'
+ $response = Invoke-MgGraphRequest -Method POST -Uri $uri -Body $body -ErrorAction Stop
+
+ if ($response -and $response.id) {
+ return $response.id
+ }
+ else {
+ Write-LogHost "WARNING: Graph API query submitted but no ID returned" -ForegroundColor Yellow
+ return $null
+ }
+ }
+ catch {
+ Write-LogHost "ERROR: Failed to submit Graph audit query: $($_.Exception.Message)" -ForegroundColor Red
+ try {
+ if ($_.Exception.Response) {
+ $respStream = $_.Exception.Response.GetResponseStream()
+ if ($respStream) {
+ $reader = New-Object System.IO.StreamReader($respStream)
+ $body = $reader.ReadToEnd()
+ $reader.Dispose()
+ if ($body) { Write-LogHost "GRAPH response body: $body" -ForegroundColor DarkGray }
+ }
+ }
+ } catch {}
+ return $null
+ }
+}
+
+function Get-GraphAuditQueryStatus {
+ <#
+ .SYNOPSIS
+ Checks the status of a Graph API audit log query.
+
+ .DESCRIPTION
+ Polls the Microsoft Graph Security API to check query execution status.
+
+ Possible status values:
+ - notStarted: Query submitted but not yet processing
+ - queued: Query waiting in backend queue for available execution slot
+ - running: Query is executing
+ - succeeded: Query completed successfully, records ready
+ - failed: Query failed
+ - cancelled: Query was cancelled
+
+ .PARAMETER QueryId
+ The query ID returned by Invoke-GraphAuditQuery
+
+ .OUTPUTS
+ Hashtable with status information: @{ Status='succeeded'; RecordCount=1234 }
+ Returns $null if query check fails
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$QueryId
+ )
+
+ try {
+ $uri = Get-GraphAuditApiUri -Path "queries/$QueryId"
+ $response = Invoke-MgGraphRequest -Method GET -Uri $uri -ErrorAction Stop
+
+ $result = @{
+ QueryId = $QueryId
+ Status = $response.status
+ RecordCount = 0
+ }
+
+ # Some status responses include record count
+ if ($response.PSObject.Properties.Name -contains 'recordCount') {
+ $result.RecordCount = $response.recordCount
+ }
+
+ return $result
+ }
+ catch {
+ Write-LogHost "ERROR: Failed to get Graph query status: $($_.Exception.Message)" -ForegroundColor Red
+ return $null
+ }
+}
+
+function Get-GraphAuditRecords {
+ <#
+ .SYNOPSIS
+ Retrieves audit log records from a completed Graph API query.
+
+ .DESCRIPTION
+ Fetches audit log records from Microsoft Graph Security API for a completed query.
+ Handles pagination automatically using @odata.nextLink.
+
+ Only call this function after confirming query status is "succeeded".
+
+ .PARAMETER QueryId
+ The query ID returned by Invoke-GraphAuditQuery
+
+ .PARAMETER MaxRecords
+ Maximum number of records to retrieve (default: unlimited)
+
+ .OUTPUTS
+ Array of audit log record objects, or empty array if none found
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$QueryId,
+
+ [Parameter(Mandatory = $false)]
+ [int]$MaxRecords = 0
+ )
+
+ try {
+ $allRecords = @()
+ $uri = Get-GraphAuditApiUri -Path "queries/$QueryId/records"
+
+ do {
+ $response = Invoke-MgGraphRequest -Method GET -Uri $uri -ErrorAction Stop
+
+ if ($response -and $response.value) {
+ $allRecords += $response.value
+
+ # Check if we've hit the max records limit
+ if ($MaxRecords -gt 0 -and $allRecords.Count -ge $MaxRecords) {
+ $allRecords = $allRecords | Select-Object -First $MaxRecords
+ break
+ }
+ }
+
+ # Check for pagination
+ $uri = $response.'@odata.nextLink'
+
+ } while ($uri)
+
+ return $allRecords
+ }
+ catch {
+ Write-LogHost "ERROR: Failed to retrieve Graph audit records: $($_.Exception.Message)" -ForegroundColor Red
+ return @()
+ }
+}
+
+# ==============================================
+# DATA NORMALIZATION FUNCTION
+# ==============================================
+# Converts Graph API audit records to EOM-compatible schema
+
+function ConvertFrom-GraphAuditRecord {
+ <#
+ .SYNOPSIS
+ Normalizes Graph API audit records to match EOM cmdlet output schema.
+
+ .DESCRIPTION
+ Transforms Microsoft Graph Security API audit log records into the same
+ structure as Search-UnifiedAuditLog cmdlet output. This ensures the
+ existing explosion logic works identically regardless of data source.
+
+ Graph API Schema → EOM Schema Mapping:
+ - auditLogRecordType → RecordType
+ - operation → Operations
+ - createdDateTime → CreationDate
+ - auditData → AuditData (JSON string)
+ - userPrincipalName → UserIds
+ - id → Identity (unique record identifier)
+
+ .PARAMETER GraphRecords
+ Array of audit log records from Graph API (Get-GraphAuditRecords output)
+
+ .OUTPUTS
+ Array of normalized records matching EOM schema structure
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [AllowEmptyCollection()]
+ [array]$GraphRecords
+ )
+
+ if (-not $GraphRecords -or $GraphRecords.Count -eq 0) {
+ return @()
+ }
+
+ $normalized = @()
+
+ foreach ($record in $GraphRecords) {
+ try {
+ # Create EOM-compatible object structure
+ $eomRecord = [PSCustomObject]@{
+ RecordType = $null
+ CreationDate = $null
+ UserIds = $null
+ Operations = $null
+ AuditData = '{}'
+ }
+ # Map: auditLogRecordType → RecordType
+ if ($record.PSObject.Properties.Name -contains 'auditLogRecordType') {
+ $eomRecord.RecordType = $record.auditLogRecordType
+ }
+
+ # Map: createdDateTime → CreationDate
+ if ($record.PSObject.Properties.Name -contains 'createdDateTime') {
+ try {
+ $eomRecord.CreationDate = script:Parse-DateSafe $record.createdDateTime
+ }
+ catch {
+ $eomRecord.CreationDate = $record.createdDateTime
+ }
+ }
+
+ # Map: userPrincipalName → UserIds
+ if ($record.PSObject.Properties.Name -contains 'userPrincipalName') {
+ $eomRecord.UserIds = $record.userPrincipalName
+ }
+
+ # Map: operation → Operations
+ if ($record.PSObject.Properties.Name -contains 'operation') {
+ $eomRecord.Operations = $record.operation
+ }
+
+ # Map: id → Identity (unique identifier)
+ if ($record.PSObject.Properties.Name -contains 'id') {
+ $eomRecord.Identity = $record.id
+ }
+
+ # Map: auditData → AuditData (must be JSON string for explosion logic)
+ # PERF: Also store _ParsedAuditData to avoid re-parsing during explosion
+ if ($record.PSObject.Properties.Name -contains 'auditData') {
+ $auditDataObj = $record.auditData
+
+ # Store the already-parsed object for explosion optimization
+ $eomRecord | Add-Member -NotePropertyName '_ParsedAuditData' -NotePropertyValue $auditDataObj -Force
+
+ # If auditData is already an object, convert to JSON string
+ if ($auditDataObj -is [string]) {
+ $eomRecord.AuditData = $auditDataObj
+ # String means it wasn't pre-parsed, clear _ParsedAuditData
+ $eomRecord._ParsedAuditData = $null
+ }
+ else {
+ # Convert object to JSON string (explosion logic expects string)
+ try {
+ $eomRecord.AuditData = ($auditDataObj | ConvertTo-Json -Depth 100 -Compress)
+ }
+ catch {
+ Write-LogHost "WARNING: Failed to serialize auditData for record $($eomRecord.Identity)" -ForegroundColor Yellow
+ $eomRecord.AuditData = '{}'
+ $eomRecord._ParsedAuditData = $null
+ }
+ }
+ }
+ else {
+ # No auditData present - create minimal valid JSON
+ $eomRecord.AuditData = '{}'
+ }
+
+ $normalized += $eomRecord
+ }
+ catch {
+ Write-LogHost "WARNING: Failed to normalize Graph record: $($_.Exception.Message)" -ForegroundColor Yellow
+ # Continue processing remaining records
+ }
+ }
+
+ return $normalized
+}
+
+#
+# Core live-mode functions providing connectivity and paged audit retrieval.
+# NOTE: This function is now wrapped by Connect-PurviewAudit for EOM mode compatibility
+
+function Connect-ToComplianceCenter {
+ param()
+ if ($script:Connected) { return }
+ Write-LogHost "Connecting to Microsoft 365 Security & Compliance Center..." -ForegroundColor Cyan
+ # Ensure ExchangeOnlineManagement module is available
+ try {
+ $existingEOM = Get-Module -ListAvailable -Name ExchangeOnlineManagement | Sort-Object Version -Descending | Select-Object -First 1
+ if (-not $existingEOM) {
+ Write-LogHost "Installing ExchangeOnlineManagement module (CurrentUser scope)..." -ForegroundColor Yellow
+ try { Install-Module -Name ExchangeOnlineManagement -Scope CurrentUser -Force -AllowClobber -ErrorAction Stop } catch { Write-LogHost "Failed to install module: $($_.Exception.Message)" -ForegroundColor Red; throw }
+ }
+ Import-Module ExchangeOnlineManagement -Force -ErrorAction Stop
+ } catch {
+ Write-LogHost "Module load/install failure: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+
+ # Authentication modes (subset retained for stability)
+ try {
+ switch ($Auth.ToLower()) {
+ 'weblogin' {
+ try {
+ $exoCmd = Get-Command Connect-ExchangeOnline -ErrorAction Stop
+ $hasUseWeb = $exoCmd.Parameters.ContainsKey('UseWebLogin')
+ if ($hasUseWeb) {
+ Write-LogHost 'Using Connect-ExchangeOnline -UseWebLogin (parameter present).' -ForegroundColor DarkGray
+ Connect-ExchangeOnline -ShowBanner:$false -UseWebLogin -ErrorAction Stop | Out-Null
+ }
+ else {
+ Write-LogHost 'UseWebLogin parameter not available in this host/module; invoking standard interactive Connect-ExchangeOnline.' -ForegroundColor Yellow
+ Connect-ExchangeOnline -ShowBanner:$false -ErrorAction Stop | Out-Null
+ }
+ }
+ catch { Write-LogHost "WebLogin flow failed: $($_.Exception.Message)" -ForegroundColor Red; throw }
+ }
+ 'devicecode' {
+ Connect-ExchangeOnline -ShowBanner:$false -Device | Out-Null
+ }
+ 'credential' {
+ $cred = Get-Credential -Message 'Enter admin credentials for Exchange Online'
+ Connect-ExchangeOnline -ShowBanner:$false -Credential $cred | Out-Null
+ }
+ default {
+ # Silent first, fallback to WebLogin
+ $silentOk = $true
+ try { Connect-ExchangeOnline -ShowBanner:$false -ErrorAction Stop | Out-Null } catch { $silentOk = $false }
+ if (-not $silentOk) {
+ try { Connect-ExchangeOnline -ShowBanner:$false -UseWebLogin -ErrorAction Stop | Out-Null } catch { Write-LogHost "Silent + fallback auth failed: $($_.Exception.Message)" -ForegroundColor Red; throw }
+ }
+ }
+ }
+ $script:Connected = $true
+ Write-LogHost "Connected successfully." -ForegroundColor Green
+ }
+ catch {
+ Write-LogHost "Connection failure: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+}
+
+# ==============================================
+# DUAL-MODE DIAGNOSTICS FUNCTION
+# ==============================================
+# Unified capability check for both EOM and Graph API modes
+
+function Test-PurviewAuditCapability {
+ <#
+ .SYNOPSIS
+ Tests audit log query capability for either EOM or Graph API mode.
+
+ .DESCRIPTION
+ Performs connectivity and permission checks based on active mode.
+
+ EOM Mode:
+ - Verifies Search-UnifiedAuditLog cmdlet availability
+ - Performs probe query to test permissions
+ - Checks for proper role assignments
+
+ Graph API Mode:
+ - Tests Graph API connectivity
+ - Verifies AuditLog.Read.All permissions
+ - Performs lightweight endpoint check
+
+ .PARAMETER UseEOMMode
+ If true, test EOM capabilities. If false, test Graph API.
+
+ .PARAMETER SkipChecks
+ If true, skip all diagnostic checks (for advanced scenarios)
+
+ .OUTPUTS
+ $true if capability check passes, $false otherwise
+ #>
+
+ param(
+ [Parameter(Mandatory = $false)]
+ [bool]$UseEOMMode = $false,
+
+ [Parameter(Mandatory = $false)]
+ [bool]$SkipChecks = $false
+ )
+
+ if ($SkipChecks) {
+ Write-LogHost "Diagnostics: Skipped (per user request)" -ForegroundColor Gray
+ return $true
+ }
+
+ if ($UseEOMMode) {
+ # ========================================
+ # EOM MODE DIAGNOSTICS
+ # ========================================
+
+ Write-LogHost "Running EOM capability diagnostics..." -ForegroundColor Cyan
+
+ # Check if cmdlet is available
+ $cmd = Get-Command Search-UnifiedAuditLog -ErrorAction SilentlyContinue
+ if (-not $cmd) {
+ Write-LogHost " ✗ DIAGNOSTIC FAILED: 'Search-UnifiedAuditLog' cmdlet not found" -ForegroundColor Red
+ Write-LogHost ""
+ Write-LogHost "Troubleshooting:" -ForegroundColor Yellow
+ Write-LogHost " 1. Ensure ExchangeOnlineManagement module v3+ is installed" -ForegroundColor White
+ Write-LogHost " 2. Try: Install-Module ExchangeOnlineManagement -Scope CurrentUser" -ForegroundColor White
+ Write-LogHost " 3. Verify authentication completed successfully" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost "Role Requirements:" -ForegroundColor Yellow
+ Write-LogHost " • View-Only Audit Logs role (minimum)" -ForegroundColor White
+ Write-LogHost " • Compliance Management role group" -ForegroundColor White
+ Write-LogHost " • Organization Management role group" -ForegroundColor White
+ return $false
+ }
+
+ # Perform probe query to test permissions
+ try {
+ $now = (Get-Date).ToUniversalTime()
+ $probeStart = $now.AddMinutes(-7)
+ $probeEnd = $now.AddMinutes(-6)
+
+ # Lightweight probe with unlikely operation
+ $null = Search-UnifiedAuditLog -StartDate $probeStart -EndDate $probeEnd -Operations 'UserLoggedIn' -ResultSize 1 -ErrorAction Stop
+
+ Write-LogHost " EOM capability check passed" -ForegroundColor Green
+ return $true
+ }
+ catch {
+ $msg = $_.Exception.Message
+ Write-LogHost " ✗ DIAGNOSTIC FAILED: Probe query failed" -ForegroundColor Red
+ Write-LogHost " Error: $msg" -ForegroundColor Yellow
+ Write-LogHost ""
+
+ if ($msg -match 'is not within the current user|Access denied|not authorized|insufficient') {
+ Write-LogHost "Likely Cause: Missing required roles" -ForegroundColor Yellow
+ Write-LogHost " Add account to 'Audit Logs' role group in Microsoft Purview" -ForegroundColor White
+ }
+ elseif ($msg -match 'The term .*Search-UnifiedAuditLog.* is not recognized') {
+ Write-LogHost "Likely Cause: Module not loaded properly" -ForegroundColor Yellow
+ Write-LogHost " Try: Import-Module ExchangeOnlineManagement -Force" -ForegroundColor White
+ }
+ else {
+ Write-LogHost "General Guidance:" -ForegroundColor Yellow
+ Write-LogHost " 1. Ensure Unified Audit Log is enabled tenant-wide" -ForegroundColor White
+ Write-LogHost " 2. Verify role assignments are properly configured" -ForegroundColor White
+ Write-LogHost " 3. Check for conditional access policies blocking access" -ForegroundColor White
+ }
+
+ return $false
+ }
+ }
+ else {
+ # ========================================
+ # GRAPH API MODE DIAGNOSTICS
+ # ========================================
+
+ Write-LogHost "Running Graph API capability diagnostics..." -ForegroundColor Cyan
+
+ # Verify connected to Graph
+ try {
+ $context = Get-MgContext -ErrorAction Stop
+
+ if (-not $context) {
+ Write-LogHost " ✗ DIAGNOSTIC FAILED: Not connected to Microsoft Graph" -ForegroundColor Red
+ Write-LogHost " Run Connect-PurviewAudit first to establish connection" -ForegroundColor Yellow
+ return $false
+ }
+
+ # Check for required scopes
+ $requiredScope = 'AuditLog.Read.All'
+ if ($context.Scopes -notcontains $requiredScope) {
+ Write-LogHost " [!] WARNING: Missing required scope: $requiredScope" -ForegroundColor Yellow
+ Write-LogHost " Queries may fail without this permission" -ForegroundColor Yellow
+ }
+ }
+ catch {
+ Write-LogHost " ✗ DIAGNOSTIC FAILED: Unable to get Graph context" -ForegroundColor Red
+ Write-LogHost " Error: $($_.Exception.Message)" -ForegroundColor Yellow
+ return $false
+ }
+
+ # Test Graph API endpoint connectivity
+ try {
+ # Test actual query endpoint with minimal test query
+ $testQueryBody = @{
+ displayName = "PAX-Diagnostic-Test-$(Get-Date -Format 'HHmmss')"
+ filterStartDateTime = (Get-Date).AddMinutes(-1).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ filterEndDateTime = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ operationFilters = @('UserLoggedIn') # Common activity type for quick test
+ }
+
+ $createUri = Get-GraphAuditApiUri -Path 'queries'
+ $createResponse = Invoke-MgGraphRequest -Method POST -Uri $createUri -Body $testQueryBody -ErrorAction Stop if ($createResponse.id) {
+ Write-LogHost " Graph API capability check passed" -ForegroundColor Green
+ Write-LogHost " Successfully created test query (ID: $($createResponse.id))" -ForegroundColor Green
+ return $true
+ }
+ else {
+ Write-LogHost " ✗ DIAGNOSTIC WARNING: Query created but no ID returned" -ForegroundColor Yellow
+ return $false
+ }
+ }
+ catch {
+ $msg = $_.Exception.Message
+
+ # Check if this is a throttling error (429 TooManyRequests)
+ $isThrottling = $msg -match 'TooManyRequests|429|Too many requests|throttl'
+
+ if ($isThrottling) {
+ # Set flag so we don't show scary warning message later
+ $script:ThrottlingDetected = $true
+
+ # Throttling detected - friendly terminal message, full details to log only
+ Write-Host ""
+ Write-Host "============================================================================================================" -ForegroundColor DarkYellow
+ Write-Host " [!] Graph API Throttling Detected (429 - Too Many Requests)" -ForegroundColor DarkYellow
+ Write-Host "============================================================================================================" -ForegroundColor DarkYellow
+ Write-Host ""
+ Write-Host " Microsoft Graph is currently rate-limiting requests to your tenant." -ForegroundColor White
+ Write-Host ""
+ Write-Host " How PAX handles throttling:" -ForegroundColor Cyan
+ Write-Host " • Automatic exponential backoff with retries" -ForegroundColor Gray
+ Write-Host " • Circuit breaker protection (pauses after repeated failures)" -ForegroundColor Gray
+ Write-Host " • Adaptive concurrency (reduces parallel requests)" -ForegroundColor Gray
+ Write-Host " • Real-time notifications (you'll see throttle events as they occur)" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host " Recommendation: Graph API throttling typically clears within 5-10 minutes." -ForegroundColor Yellow
+ Write-Host "" # Interactive prompt (unless -Force is used for headless runs)
+ if (-not $Force) {
+ Write-Host " Options:" -ForegroundColor Cyan
+ Write-Host " [C] CONTINUE - Proceed with automatic throttling handling (may be slow)" -ForegroundColor Green
+ Write-Host " [E] EXIT - Stop gracefully and retry later (recommended if heavily throttled)" -ForegroundColor Red
+ Write-Host ""
+
+ Send-PromptNotification
+ $choice = Read-Host " Enter your choice [C/E]" if ($choice -match '^E$|^Exit$') {
+ Write-Host ""
+ Write-Host " Exiting gracefully..." -ForegroundColor Yellow
+ Write-Host " Disconnecting from Microsoft Graph..." -ForegroundColor Gray
+
+ try {
+ Disconnect-MgGraph -ErrorAction SilentlyContinue | Out-Null
+ Write-Host " Disconnected successfully" -ForegroundColor Green
+ }
+ catch {
+ Write-Host " (Graph connection cleanup completed)" -ForegroundColor Gray
+ }
+
+ Write-Host ""
+ Write-Host " Please wait 5-10 minutes before retrying." -ForegroundColor Cyan
+ Write-Host ""
+
+ # Log the graceful exit
+ Write-Output "[DIAGNOSTIC] User chose to exit due to throttling. Will retry later." | Out-File -FilePath $LogFile -Append -Encoding utf8
+
+ exit 0
+ }
+ else {
+ Write-Host ""
+ Write-Host " Proceeding with automatic throttling handling..." -ForegroundColor Green
+ Write-Host " Expect slower execution times while Graph API recovers." -ForegroundColor Gray
+ Write-Host ""
+ }
+ }
+ else {
+ # -Force flag present (headless/automation mode) - proceed automatically
+ Write-Host " -Force flag detected: Proceeding automatically with throttling handling..." -ForegroundColor Green
+ Write-Host " Expect slower execution times while Graph API recovers." -ForegroundColor Gray
+ Write-Host ""
+ }
+
+ # Log full error details to log file only (not terminal)
+ Write-Output "[DIAGNOSTIC] Graph API throttling detected during capability check" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ Write-Output "[DIAGNOSTIC] Full error details: $msg" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ Write-Output "[DIAGNOSTIC] Continuing with automatic throttling handling enabled" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ }
+ else {
+ # Non-throttling error - show full details
+ Write-LogHost " ✗ DIAGNOSTIC FAILED: Graph API endpoint test failed" -ForegroundColor Red
+ Write-LogHost " Error: $msg" -ForegroundColor Yellow
+ Write-LogHost ""
+
+ if ($msg -match 'Forbidden|403|Access.*denied|Insufficient privileges') {
+ Write-LogHost "Likely Cause: Missing required permissions" -ForegroundColor Yellow
+ Write-LogHost " Required: AuditLog.Read.All Graph API scope" -ForegroundColor White
+ Write-LogHost " Required: Azure AD role (Compliance/Security Administrator)" -ForegroundColor White
+ }
+ elseif ($msg -match 'Unauthorized|401') {
+ Write-LogHost "Likely Cause: Authentication issue" -ForegroundColor Yellow
+ Write-LogHost " Try disconnecting and reconnecting: Disconnect-MgGraph; Connect-PurviewAudit" -ForegroundColor White
+ }
+ else {
+ Write-LogHost "General Guidance:" -ForegroundColor Yellow
+ Write-LogHost " 1. Verify admin has consented to AuditLog.Read.All scope" -ForegroundColor White
+ Write-LogHost " 2. Check Azure AD role assignments" -ForegroundColor White
+ Write-LogHost " 3. Ensure network connectivity to graph.microsoft.com" -ForegroundColor White
+ }
+ }
+
+ return $false
+ }
+ }
+}
+
+# ==============================================
+# DUAL-MODE GROUP EXPANSION FUNCTION
+# ==============================================
+# Expand distribution/security groups to individual user principal names
+
+function Expand-GroupToUsers {
+ <#
+ .SYNOPSIS
+ Expands a distribution or security group to individual user principal names.
+
+ .DESCRIPTION
+ Retrieves members of a group using either EOM cmdlets or Graph API.
+
+ EOM Mode:
+ - Uses Get-DistributionGroupMember cmdlet
+ - Accepts group display name or email address
+ - Returns PrimarySmtpAddress of members
+
+ Graph API Mode:
+ - Uses Get-MgGroupMember cmdlet
+ - Requires group ObjectId (auto-resolved from display name)
+ - Returns userPrincipalName of user members
+
+ .PARAMETER GroupIdentity
+ The group identifier. Can be:
+ - Display name (e.g., "Executive Leadership")
+ - Email address (e.g., "exec-team@contoso.com")
+ - ObjectId/GUID (Graph mode only)
+
+ .PARAMETER UseEOMMode
+ If true, use EOM cmdlets. If false, use Graph API.
+
+ .OUTPUTS
+ Array of user principal names (email addresses)
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$GroupIdentity,
+
+ [Parameter(Mandatory = $false)]
+ [bool]$UseEOMMode = $false
+ )
+
+ $members = @()
+
+ if ($UseEOMMode) {
+ # ========================================
+ # EOM MODE: Get-DistributionGroupMember
+ # ========================================
+
+ try {
+ Write-LogHost " Processing group (EOM): '$GroupIdentity'" -ForegroundColor Gray
+
+ # Get-DistributionGroupMember works with display name or email
+ $groupMembers = Get-DistributionGroupMember -Identity $GroupIdentity -ErrorAction Stop
+
+ $members = $groupMembers | Select-Object -ExpandProperty PrimarySmtpAddress
+
+ Write-LogHost " Expanded: $($members.Count) member(s)" -ForegroundColor DarkGray
+ }
+ catch {
+ Write-LogHost " Warning: Failed to expand group '$GroupIdentity': $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost " Possible causes:" -ForegroundColor Yellow
+ Write-LogHost " • Group does not exist or name is misspelled" -ForegroundColor Gray
+ Write-LogHost " • Insufficient permissions (need Organization Management or similar)" -ForegroundColor Gray
+ Write-LogHost " • Group is not a distribution/mail-enabled group" -ForegroundColor Gray
+ }
+ }
+ else {
+ # ========================================
+ # GRAPH API MODE: Get-MgGroupMember
+ # ========================================
+
+ try {
+ Write-LogHost " Processing group (Graph API): '$GroupIdentity'" -ForegroundColor Gray
+
+ # Determine if we have an ObjectId (GUID) or display name
+ $groupId = $null
+ if ($GroupIdentity -match '^[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$') {
+ # Looks like a GUID, use directly
+ $groupId = $GroupIdentity
+ }
+ else {
+ # Display name or email - need to resolve to ObjectId
+ Write-LogHost " Resolving group ID from display name..." -ForegroundColor DarkGray
+
+ # Try searching by display name first
+ $groupSearch = Get-MgGroup -Filter "displayName eq '$GroupIdentity'" -ErrorAction SilentlyContinue
+
+ if (-not $groupSearch) {
+ # Try by mail/mailNickname
+ $groupSearch = Get-MgGroup -Filter "mail eq '$GroupIdentity'" -ErrorAction SilentlyContinue
+ }
+
+ if ($groupSearch) {
+ $groupId = $groupSearch.Id
+ Write-LogHost " Resolved to ObjectId: $groupId" -ForegroundColor DarkGray
+ }
+ else {
+ throw "Unable to find group with identifier: $GroupIdentity"
+ }
+ }
+
+ # Get group members (users only)
+ $groupMembers = Get-MgGroupMember -GroupId $groupId -All -ErrorAction Stop
+
+ # Filter to users only and extract UPN
+ foreach ($member in $groupMembers) {
+ # Check if member is a user (not a nested group or service principal)
+ if ($member.AdditionalProperties.'@odata.type' -eq '#microsoft.graph.user') {
+ # Get full user object to retrieve userPrincipalName
+ $user = Get-MgUser -UserId $member.Id -ErrorAction SilentlyContinue
+ if ($user -and $user.UserPrincipalName) {
+ $members += $user.UserPrincipalName
+ }
+ }
+ }
+
+ Write-LogHost " Expanded: $($members.Count) user member(s)" -ForegroundColor DarkGray
+ }
+ catch {
+ Write-LogHost " Warning: Failed to expand group '$GroupIdentity': $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost " Possible causes:" -ForegroundColor Yellow
+ Write-LogHost " • Group does not exist or identifier is invalid" -ForegroundColor Gray
+ Write-LogHost " • Insufficient permissions (need Group.Read.All or Directory.Read.All)" -ForegroundColor Gray
+ Write-LogHost " • Network connectivity issues with Graph API" -ForegroundColor Gray
+ }
+ }
+
+ return $members
+}
+
+# ==============================================
+# DUAL-MODE QUERY EXECUTION WRAPPER
+# ==============================================
+# Unified query function that routes to either EOM or Graph API
+
+function Invoke-PurviewAuditQuery {
+ <#
+ .SYNOPSIS
+ Executes an audit log query using either EOM or Graph API.
+
+ .DESCRIPTION
+ Routes audit log queries to the appropriate backend:
+ - EOM Mode: Uses Search-UnifiedAuditLog cmdlet
+ - Graph API Mode: Uses async query pattern (create → poll → retrieve)
+
+ Returns audit records in a normalized format compatible with downstream processing.
+
+ .PARAMETER StartDate
+ Query start date (inclusive)
+
+ .PARAMETER EndDate
+ Query end date (exclusive)
+
+ .PARAMETER Operations
+ Activity type(s) to query
+
+ .PARAMETER UserIds
+ Optional array of user principal names to filter by
+
+ .PARAMETER ResultSize
+ Maximum number of records to retrieve (EOM mode)
+
+ .PARAMETER UseEOMMode
+ If true, use EOM cmdlets. If false, use Graph API.
+
+ .OUTPUTS
+ Array of audit log records in normalized schema
+ #>
+
+ param(
+ [Parameter(Mandatory = $true)]
+ [datetime]$StartDate,
+
+ [Parameter(Mandatory = $true)]
+ [datetime]$EndDate,
+
+ [Parameter(Mandatory = $true)]
+ [string]$Operations,
+
+ [Parameter(Mandatory = $false)]
+ [string[]]$UserIds,
+
+ [Parameter(Mandatory = $false)]
+ [int]$ResultSize = 5000,
+
+ [Parameter(Mandatory = $false)]
+ [bool]$UseEOMMode = $false
+ )
+
+ if ($UseEOMMode) {
+ # ========================================
+ # EOM MODE: Use existing Search-UnifiedAuditLog logic
+ # ========================================
+
+ # Call existing retry wrapper (preserves all the sophisticated logic)
+ $results = Invoke-SearchUnifiedAuditLogWithRetry `
+ -Start $StartDate `
+ -End $EndDate `
+ -Operation $Operations `
+ -ResultSize $ResultSize `
+ -UserIds $UserIds `
+ -AutoSubdivide $true
+
+ return $results
+ }
+ else {
+ # ========================================
+ # GRAPH API MODE: Async query pattern
+ # ========================================
+
+ try {
+ # Step 1: Create async query
+ Write-Host " [Graph API] Creating async query for $Operations..." -ForegroundColor DarkGray
+
+ # Use last included minute (EndDate - 1 minute) since end date is exclusive
+ $endDisplay = $EndDate.AddMinutes(-1)
+ $displayName = "PAX_Query_$($StartDate.ToString('yyyyMMdd_HHmm'))-$($endDisplay.ToString('yyyyMMdd_HHmm'))"
+
+ # M365 usage mode requires operationFilters only (no recordType/service filters)
+ $recordTypesArg = $null
+ $serviceFilterArg = $null
+ if (-not $script:IncludeM365Usage) {
+ # Only populate filters when NOT in M365 usage mode
+ $recordTypesArg = $RecordTypes
+ $serviceFilterArg = $script:CurrentServiceFilter
+ if (-not $serviceFilterArg -and $ServiceTypes -and $ServiceTypes.Count -gt 0) {
+ $serviceFilterArg = $ServiceTypes[0]
+ }
+ }
+
+ $queryId = Invoke-GraphAuditQuery `
+ -DisplayName $displayName `
+ -FilterStartDateTime $StartDate `
+ -FilterEndDateTime $EndDate `
+ -OperationFilters @($Operations) `
+ -RecordTypeFilters $recordTypesArg `
+ -ServiceFilter $serviceFilterArg
+
+ if (-not $queryId) {
+ Write-Host " [Graph API] Failed to create query" -ForegroundColor Red
+ return @()
+ }
+
+ Write-Host " [Graph API] Query created: $queryId" -ForegroundColor DarkGray
+
+ # Step 2: Poll for completion
+ # Replaced fixed-count polling with time-budget model supporting extended outages (up to 30 minutes)
+ $effectiveOutageMinutes = if ($MaxNetworkOutageMinutes -and $MaxNetworkOutageMinutes -gt 0) { $MaxNetworkOutageMinutes } else { 30 }
+ $maxPollDurationSeconds = $effectiveOutageMinutes * 60 # Absolute cap for network outage tolerance
+ $pollInterval = 5 # Base interval (seconds) when healthy
+ $maxHealthyInterval = 15 # Cap interval when status retrieval succeeds
+ $pollStart = Get-Date
+ $pollCount = 0
+ $queryComplete = $false
+ $networkErrorStreak = 0
+ $networkOutageStart = $null
+ $lastNetMessage = $null # Throttle repetitive network messages
+
+ Write-Host " [Graph API] Polling for query completion..." -ForegroundColor DarkGray
+
+ # Transient network resilience variables
+ $transientPatterns = @('timed out','unable to connect','connection','remote name could not be resolved','temporarily unavailable')
+
+ while (-not $queryComplete) {
+ $elapsedTotal = (Get-Date) - $pollStart
+ if ($elapsedTotal.TotalSeconds -ge $maxPollDurationSeconds) {
+ Write-Host " [NET] Polling aborted after $effectiveOutageMinutes minutes without completion (network outage window exceeded)" -ForegroundColor Red
+ break
+ }
+ Start-Sleep -Seconds $pollInterval
+ $pollCount++
+
+ $status = $null
+ try {
+ $status = Get-GraphAuditQueryStatus -QueryId $queryId -ErrorAction Stop
+ # Successful status retrieval resets outage tracking
+ $networkErrorStreak = 0
+ if ($networkOutageStart) {
+ $outageDuration = (Get-Date) - $networkOutageStart
+ # Only log recovery if outage lasted > 1 minute (ignore brief connection blips)
+ if ($outageDuration.TotalMinutes -ge 1) {
+ Write-Host " [NET] Network recovered after $([Math]::Round($outageDuration.TotalMinutes,1)) minutes" -ForegroundColor Green
+ }
+ $networkOutageStart = $null
+ $lastNetMessage = $null
+ }
+ # Query status retrieved successfully; interval adjusted
+ # Tighten interval gradually back to healthy baseline
+ $pollInterval = [Math]::Max(5, [Math]::Min($pollInterval - 2, $maxHealthyInterval))
+ }
+ catch {
+ $errMsg = $_.Exception.Message
+ if ($transientPatterns | Where-Object { $errMsg.ToLower().Contains($_) }) {
+ $networkErrorStreak++
+ if (-not $networkOutageStart) { $networkOutageStart = Get-Date }
+ $outageElapsed = (Get-Date) - $networkOutageStart
+ # Throttle messages: only show if outage > 1 min OR first error with no recent message
+ if ($outageElapsed.TotalMinutes -ge 1 -or ($networkErrorStreak -eq 1 -and (-not $lastNetMessage -or ((Get-Date) - $lastNetMessage).TotalSeconds -ge 60))) {
+ if (-not $lastNetMessage -or ((Get-Date) - $lastNetMessage).TotalSeconds -ge 60) {
+ Write-Host " [NET] Poll $pollCount`: transient network issue (streak $networkErrorStreak, outage $([Math]::Round($outageElapsed.TotalMinutes,1))m)" -ForegroundColor Yellow
+ $lastNetMessage = Get-Date
+ }
+ }
+ # Dynamic backoff growth with ceiling (to avoid hammering during outage)
+ $pollInterval = [Math]::Min(90, [Math]::Round($pollInterval * 1.6 + (Get-Random -Minimum 2 -Maximum 6)))
+ continue
+ } else {
+ Write-Host " [Graph API] Non-transient status error: $errMsg" -ForegroundColor Red
+ break
+ }
+ }
+
+ if (-not $status) { continue }
+
+ Write-Host " [Graph API] Poll $pollCount`: Status=$($status.Status), RecordCount=$($status.RecordCount)" -ForegroundColor DarkGray
+
+ switch ($status.Status) {
+ 'succeeded' {
+ $queryComplete = $true
+ Write-Host " [Graph API] Query completed: $($status.RecordCount) records available" -ForegroundColor Green
+ break
+ }
+ 'failed' {
+ Write-Host " [Graph API] Query failed" -ForegroundColor Red
+ return @()
+ }
+ 'cancelled' {
+ Write-Host " [Graph API] Query was cancelled" -ForegroundColor Yellow
+ return @()
+ }
+ default { continue }
+ }
+ }
+
+ if (-not $queryComplete) {
+ Write-Host " [Graph API] Query timed out after $($pollCount * $pollInterval) seconds" -ForegroundColor Yellow
+ return @()
+ }
+
+ # Step 3: Retrieve records
+ Write-Host " [Graph API] Retrieving records..." -ForegroundColor DarkGray
+
+ # Retrieve records with transient retry resilience
+ $graphRecords = $null
+ $recordStart = Get-Date
+ $recordAttempt = 0
+ $maxRecordDurationSeconds = $effectiveOutageMinutes * 60
+ $retrieveInterval = 4
+ while (-not $graphRecords) {
+ $recordAttempt++
+ if (((Get-Date) - $recordStart).TotalSeconds -ge $maxRecordDurationSeconds) {
+ Write-Host " [NET] Record retrieval aborted after $effectiveOutageMinutes minutes of network instability" -ForegroundColor Red
+ break
+ }
+ try {
+ # Graph API: MaxRecords=0 (unlimited) - 10K limit only applies to EOM mode
+ $graphRecords = Get-GraphAuditRecords -QueryId $queryId -MaxRecords 0 -ErrorAction Stop
+ }
+ catch {
+ $err = $_.Exception.Message
+ if ($transientPatterns | Where-Object { $err.ToLower().Contains($_) }) {
+ Write-Host " [NET] Transient record fetch issue (attempt $recordAttempt, elapsed $([Math]::Round(((Get-Date)-$recordStart).TotalMinutes,2))m): $err" -ForegroundColor Yellow
+ $retrieveInterval = [Math]::Min(90, [Math]::Round($retrieveInterval * 1.5 + (Get-Random -Minimum 1 -Maximum 5)))
+ Start-Sleep -Seconds $retrieveInterval
+ continue
+ } else {
+ Write-Host " [Graph API] Non-transient record fetch error: $err" -ForegroundColor Red
+ break
+ }
+ }
+ }
+ if (-not $graphRecords) { Write-Host " [NET] Retrieval failed after extended retry window" -ForegroundColor Red }
+
+ if (-not $graphRecords -or $graphRecords.Count -eq 0) {
+ Write-Host " [Graph API] No records returned" -ForegroundColor Gray
+ return @()
+ }
+
+ Write-Host " [Graph API] Retrieved $($graphRecords.Count) records, normalizing..." -ForegroundColor DarkGray
+
+ # Step 4: Normalize to EOM-compatible schema
+ $normalized = @()
+ foreach ($record in $graphRecords) {
+ $normalizedRecord = ConvertFrom-GraphAuditRecord -GraphRecord $record
+ if ($normalizedRecord) {
+ $normalized += $normalizedRecord
+ }
+ }
+
+ # Filter by UserIds if specified (Graph API doesn't support UPN filtering in query)
+ if ($UserIds -and $UserIds.Count -gt 0 -and $normalized.Count -gt 0) {
+ Write-Host " [Graph API] Applying client-side UserIds filter..." -ForegroundColor DarkGray
+ $beforeFilter = $normalized.Count
+ $normalized = $normalized | Where-Object { $UserIds -contains $_.UserIds }
+ Write-Host " [Graph API] Filtered: $beforeFilter → $($normalized.Count) records" -ForegroundColor DarkGray
+ }
+
+ Write-Host " [Graph API] Normalization complete: $($normalized.Count) records ready" -ForegroundColor Green
+
+ return $normalized
+ }
+ catch {
+ Write-Host " [Graph API] Query error: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host " [Graph API] Falling back to empty result set" -ForegroundColor Yellow
+ return @()
+ }
+ }
+}
+
+# ==============================================
+# DUAL-MODE DISCONNECTION FUNCTION
+# ==============================================
+# Unified disconnection for both EOM and Graph API modes
+
+function Disconnect-PurviewAudit {
+ <#
+ .SYNOPSIS
+ Disconnects from either Exchange Online or Microsoft Graph.
+
+ .DESCRIPTION
+ Cleanly disconnects active sessions based on mode.
+
+ EOM Mode:
+ - Calls Disconnect-ExchangeOnline
+ - No confirmation prompt
+
+ Graph API Mode:
+ - Calls Disconnect-MgGraph
+ - Clears Graph context
+
+ .PARAMETER UseEOMMode
+ If true, disconnect from EOM. If false, disconnect from Graph.
+
+ .OUTPUTS
+ None
+ #>
+
+ param(
+ [Parameter(Mandatory = $false)]
+ [bool]$UseEOMMode = $false
+ )
+
+ if ($UseEOMMode) {
+ # ========================================
+ # EOM MODE: Disconnect-ExchangeOnline
+ # ========================================
+
+ try {
+ Write-LogHost "Disconnecting from Exchange Online..." -ForegroundColor Gray
+ Disconnect-ExchangeOnline -Confirm:$false -ErrorAction Stop | Out-Null
+ Write-LogHost " Disconnected from Exchange Online" -ForegroundColor Green
+ }
+ catch {
+ # Silently handle - may not be connected or already disconnected
+ Write-LogHost " (Exchange Online disconnection skipped or already disconnected)" -ForegroundColor DarkGray
+ }
+ }
+ else {
+ # ========================================
+ # GRAPH API MODE: Disconnect-MgGraph
+ # ========================================
+
+ try {
+ # Check if connected first
+ $context = Get-MgContext -ErrorAction SilentlyContinue
+
+ if ($context) {
+ Write-LogHost "Disconnecting from Microsoft Graph..." -ForegroundColor Gray
+ Disconnect-MgGraph -ErrorAction Stop | Out-Null
+ Write-LogHost " Disconnected from Microsoft Graph" -ForegroundColor Green
+ }
+ else {
+ Write-LogHost " (Not connected to Microsoft Graph)" -ForegroundColor DarkGray
+ }
+ }
+ catch {
+ # Silently handle - may not be connected or already disconnected
+ Write-LogHost " (Microsoft Graph disconnection skipped or already disconnected)" -ForegroundColor DarkGray
+ }
+ }
+}
+
+# Pre-query diagnostic: verify Search-UnifiedAuditLog availability & likely permission coverage.
+# NOTE: This function is now wrapped by Test-PurviewAuditCapability for EOM mode compatibility
+function Invoke-AuditCapabilityDiagnostics {
+ param()
+ if ($SkipDiagnostics) { return $true }
+ $cmd = Get-Command Search-UnifiedAuditLog -ErrorAction SilentlyContinue
+ if (-not $cmd) {
+ Write-LogHost "DIAGNOSTIC: 'Search-UnifiedAuditLog' cmdlet not found in this session." -ForegroundColor Red
+ Write-LogHost "Guidance: Ensure ExchangeOnlineManagement module (v3+) is installed and imported. Try: Install-Module ExchangeOnlineManagement -Scope CurrentUser" -ForegroundColor Yellow
+ Write-LogHost "Role Requirements: Membership in 'Audit Logs' (preferred) or 'View-Only Audit Logs' / appropriate Compliance role group." -ForegroundColor Yellow
+ return $false
+ }
+ # Attempt a minimal, very narrow harmless probe query (empty expected results)
+ try {
+ $now = (Get-Date).ToUniversalTime()
+ $probeStart = $now.AddMinutes(-7)
+ $probeEnd = $now.AddMinutes(-6)
+ # Use an operation that is unlikely to appear but valid syntactically
+ $null = Search-UnifiedAuditLog -StartDate $probeStart -EndDate $probeEnd -Operations 'UserLoggedIn' -ResultSize 1 -ErrorAction Stop
+ Write-LogHost "Diagnostics: Audit search cmdlet available (probe succeeded/no error)." -ForegroundColor DarkGray
+ return $true
+ }
+ catch {
+ $msg = $_.Exception.Message
+ Write-LogHost "DIAGNOSTIC: Probe audit search failed: $msg" -ForegroundColor Yellow
+ if ($msg -match 'is not within the current user' -or $msg -match 'Access denied' -or $msg -match 'not authorized' -or $msg -match 'insufficient') {
+ Write-LogHost "Likely Missing Roles: Add the account to 'Audit Logs' (Microsoft Purview) or at minimum 'View-Only Audit Logs'." -ForegroundColor Red
+ }
+ elseif ($msg -match 'The term .*Search-UnifiedAuditLog.* is not recognized') {
+ Write-LogHost "Module Issue: Cmdlet not loaded. Import-Module ExchangeOnlineManagement or update module version." -ForegroundColor Red
+ }
+ else {
+ Write-LogHost "General Guidance: Ensure Unified Audit Log is enabled tenant-wide & correct role assignments are in place." -ForegroundColor Yellow
+ }
+ return $false
+ }
+}
+
+function Invoke-SearchUnifiedAuditLogWithRetry {
+ <#
+ Provides pagination & early 10K detection.
+ Adjustments:
+ * Honors $PacingMs but leaves adaptive / circuit breaker to caller.
+ * Maintains metrics.PagesFetched & global limit flags used by higher layers.
+ #>
+ param(
+ [Parameter(Mandatory)][datetime]$Start,
+ [Parameter(Mandatory)][datetime]$End,
+ [Parameter(Mandatory)][string]$Operation,
+ [Parameter(Mandatory)][int]$ResultSize,
+ [string[]]$UserIds,
+ [int]$MaxRetries = 3,
+ [bool]$AutoSubdivide = $true
+ )
+
+ $script:Hit10KLimit = $false
+ $script:LimitTimeWindow = ""
+ $allResults = New-Object System.Collections.ArrayList
+ $totalFetched = 0
+ $pageNumber = 1
+ $maxPages = 50
+ $pageSize = [Math]::Min($ResultSize, 5000)
+ $useSessionPagination = $ResultSize -gt 5000
+ $sessionId = if ($useSessionPagination) { [guid]::NewGuid().ToString() } else { $null }
+
+ Write-LogHost (" Using {0} pagination (page size {1})" -f ($(if ($useSessionPagination){'session'} else {'standard'}), $pageSize)) -ForegroundColor DarkCyan
+
+ try {
+ while ($totalFetched -lt $ResultSize -and $pageNumber -le $maxPages) {
+ $remainingNeeded = $ResultSize - $totalFetched
+ $currentPageSize = [Math]::Min($pageSize, $remainingNeeded)
+ $attempt = 0; $pageResults = $null
+ while ($attempt -le $MaxRetries) {
+ try {
+ $params = @{ StartDate = $Start; EndDate = $End; Operations = $Operation; ResultSize = $currentPageSize; ErrorAction = 'Stop' }
+ if ($UserIds) { $params['UserIds'] = $UserIds }
+ if ($useSessionPagination) {
+ $params['SessionId'] = $sessionId
+ $params['SessionCommand'] = if ($pageNumber -eq 1) { 'ReturnLargeSet' } else { 'ReturnNextPreviewPage' }
+ }
+ if ($PacingMs -gt 0) { Start-Sleep -Milliseconds $PacingMs }
+ if ($attempt -gt 0) { Write-LogHost " Retrying page $pageNumber (attempt $($attempt+1))" -ForegroundColor Yellow }
+ $pageResults = Search-UnifiedAuditLog @params
+ break
+ }
+ catch {
+ $attempt++
+ if ($attempt -le $MaxRetries) {
+ $delay = [Math]::Min(30, [Math]::Pow(2, $attempt))
+ Write-LogHost " Page $pageNumber failed: $($_.Exception.Message). Backoff ${delay}s" -ForegroundColor DarkYellow
+ Start-Sleep -Seconds $delay
+ if ($useSessionPagination -and $attempt -gt 1) { $sessionId = [guid]::NewGuid().ToString(); Write-LogHost " New session id for retry: $sessionId" -ForegroundColor DarkGray }
+ } else {
+ Write-LogHost " Page $pageNumber permanently failed after $attempt attempts" -ForegroundColor Red
+ throw
+ }
+ }
+ }
+
+ if ($pageResults -and $pageResults.Count -gt 0) {
+ # Early 10K detection (first page result count meta)
+ if ($pageNumber -eq 1 -and $AutoSubdivide) {
+ try {
+ $est = $pageResults[0].ResultCount
+ if ($null -ne $est -and $est -ge 10000) {
+ Write-LogHost " [!] Estimated >=10K records in window – consider subdivision" -ForegroundColor Yellow
+ }
+ } catch {}
+ }
+ # Safe add - handle both array and single object returns
+ if ($pageResults -is [Array]) {
+ foreach ($item in $pageResults) { [void]$allResults.Add($item) }
+ } else {
+ [void]$allResults.Add($pageResults)
+ }
+ $totalFetched += $pageResults.Count
+ # Hard stop enforcement: never return more than requested -ResultSize
+ if ($totalFetched -ge $ResultSize) {
+ if ($totalFetched -gt $ResultSize) {
+ $excess = $totalFetched - $ResultSize
+ # Trim excess items from tail
+ for ($trim = 0; $trim -lt $excess; $trim++) { [void]$allResults.RemoveAt($allResults.Count - 1) }
+ $totalFetched = $ResultSize
+ }
+ Write-LogHost " Requested result size $ResultSize reached (cumulative: $totalFetched) – stopping" -ForegroundColor DarkCyan
+ try { $script:metrics.PagesFetched += 1 } catch {}
+ break
+ }
+ try { $script:metrics.PagesFetched += 1 } catch {}
+ Write-LogHost " Page $pageNumber returned $($pageResults.Count) (cumulative: $totalFetched)" -ForegroundColor DarkCyan
+ if ($pageResults.Count -lt $currentPageSize) { break }
+ if ($totalFetched -ge 10000) {
+ $script:Hit10KLimit = $true
+ $script:LimitTimeWindow = "$(($Start).ToString('yyyy-MM-dd HH:mm')) to $(($End).ToString('yyyy-MM-dd HH:mm'))"
+ # SMART SUBDIVISION for EOM: Analyze timestamp distribution
+ if ($AutoSubdivide -and $allResults.Count -ge 10000) {
+ try {
+ $timestamps = @()
+ foreach ($rec in $allResults) {
+ if ($rec.CreationDate) {
+ $ts = script:Parse-DateSafe $rec.CreationDate; if ($ts) { $timestamps += $ts }
+ }
+ }
+ if ($timestamps.Count -gt 100) {
+ $sorted = $timestamps | Sort-Object
+ $coveredHours = ($sorted[-1] - $sorted[0]).TotalHours
+ $totalHours = ($End - $Start).TotalHours
+ if ($coveredHours -gt 0 -and $coveredHours -lt $totalHours) {
+ $recordsPerHour = 10000 / $coveredHours
+ $targetHours = 8000 / $recordsPerHour
+ $subdivFactor = [Math]::Max(2, [Math]::Ceiling($totalHours / $targetHours))
+ Write-LogHost " [SMART SUBDIVISION] EOM: $([Math]::Round($coveredHours,2))h of $([Math]::Round($totalHours,2))h → suggest dividing by $subdivFactor" -ForegroundColor Cyan
+ }
+ }
+ } catch {}
+ }
+ Write-LogHost " 10K server limit reached in this window" -ForegroundColor Yellow
+ break
+ }
+ } else {
+ Write-LogHost " Page $pageNumber empty – stopping" -ForegroundColor DarkCyan
+ break
+ }
+ $pageNumber++
+ }
+
+ if ($pageNumber -gt $maxPages) {
+ Write-LogHost " Reached max page limit ($maxPages)" -ForegroundColor Yellow
+ }
+ Write-LogHost " Pagination complete: $($allResults.Count) records" -ForegroundColor Green
+ return $allResults.ToArray()
+ }
+ catch {
+ Write-LogHost " Pagination failed: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+}
+
+# Wrapper for main processing (kept minimal for clarity)
+function Invoke-PAXProcessingCore {
+ param()
+ try {
+ # Existing core logic already executed above in previous top-level scope.
+ # This wrapper intentionally left minimal to avoid structural parse issues.
+ }
+ catch {
+ Write-LogHost "Core processing error: $($_.Exception.Message)" -ForegroundColor Red
+ throw
+ }
+}
+
+$script:adaptiveThroughputBaseline = $null
+$script:adaptiveLowLatencyStreak = 0
+$script:consecutiveBlockFailures = 0
+$script:circuitBreakerOpen = $false
+$script:circuitBreakerOpenUntil = $null
+
+function Get-BackoffDelaySeconds {
+ param(
+ [Parameter(Mandatory)][int]$Attempt,
+ [Parameter(Mandatory)][double]$BaseSeconds,
+ [Parameter(Mandatory)][int]$MaxSeconds
+ )
+ if ($Attempt -lt 1) { return 0 }
+ $raw = $BaseSeconds * [math]::Pow(2, ($Attempt - 1))
+ return [math]::Min($MaxSeconds, $raw)
+}
+
+function Test-CircuitBreakerTrip {
+ param(
+ [Parameter(Mandatory)][int]$ConsecutiveFailures,
+ [Parameter(Mandatory)][int]$Threshold
+ )
+ return ($ConsecutiveFailures -ge $Threshold)
+}
+
+$JsonDepth = 60
+$FlatDepthStandard = 6
+$FlatDepthDeep = 120
+$ExplosionPerRecordRowCap = 1000
+$script:TenantPrimaryDomain = $null
+if (-not $script:TenantId) { $script:TenantId = $null }
+$script:TenantIndicators = @()
+$ForcedRawInputCsvExplosion = $false
+
+# Auth config storage for token refresh (AppRegistration mode)
+$script:AuthConfig = @{
+ Method = $null
+ TenantId = $null
+ ClientId = $null
+ ClientSecret = $null # SecureString
+ CertThumbprint = $null
+ CertPath = $null
+ CertPassword = $null # SecureString
+ CertStoreLocation = 'CurrentUser'
+ TokenIssueTime = $null
+ CanReauthenticate = $false
+}
+
+# Shared auth state for thread job token refresh (synchronized hashtable for cross-thread access)
+# Thread jobs read Token from this hashtable; main thread updates it proactively before expiry
+$script:SharedAuthState = [hashtable]::Synchronized(@{
+ Token = $null
+ ExpiresOn = $null
+ LastRefresh = $null
+ RefreshCount = 0
+ AuthMethod = $null
+})
+
+# Checkpoint/Resume state for long-running operations
+$script:CheckpointPath = $null # Path to checkpoint JSON file
+$script:CheckpointData = $null # Loaded/active checkpoint object
+$script:IsResumeMode = $false # Whether we're resuming from checkpoint
+$script:PartialOutputPath = $null # Path to _PARTIAL.csv file during execution
+$script:OriginallySkippedPartitionIndices = @() # Partition indices that were already completed before this run (for resume mode)
+
+# Token expiration detection (reactive - triggers on 401 Unauthorized)
+$script:TokenAcquiredTime = $null # When current token was obtained
+$script:AuthFailureDetected = $false # Set to $true when 401 error detected - triggers reauth prompt
+$script:Auth401MessageShown = $false # Suppresses duplicate 401 error messages (reset after successful reauth)
+$script:AuthPromptInProgress = $false # Debounce flag - prevents multiple auth prompts from triggering simultaneously
+
+# PowerShell version detection for parallel processing features
+$script:IsPS7 = ($PSVersionTable.PSVersion.Major -ge 7)
+
+if ($RAWInputCSV) { $ForcedRawInputCsvExplosion = $true }
+
+$script:RegexTrueFalse = [regex]::new('^(?i:true|false)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+$script:RegexYes1 = [regex]::new('^(?i:yes|1)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+$script:RegexNo0 = [regex]::new('^(?i:no|0)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+$script:LocaleDateParsingNotified = $false
+
+function script:Parse-DateSafe {
+ <#
+ .SYNOPSIS
+ Culture-invariant date parsing that handles Purview API date formats.
+ .DESCRIPTION
+ Purview API returns dates in US format (M/d/yyyy HH:mm:ss) regardless of client locale.
+ This function safely parses such dates on systems with non-US regional settings (e.g., UK).
+ #>
+ param([Parameter(Mandatory=$false)][AllowNull()][AllowEmptyString()]$DateValue)
+
+ # Log once when running under non-US locale
+ if (-not $script:LocaleDateParsingNotified) {
+ $script:LocaleDateParsingNotified = $true
+ $currentCulture = [System.Threading.Thread]::CurrentThread.CurrentCulture.Name
+ if ($currentCulture -and $currentCulture -ne 'en-US') {
+ Write-LogHost " [DATE] Locale-aware date parsing active (Culture: $currentCulture)" -ForegroundColor DarkCyan
+ }
+ }
+
+ # Already a DateTime? Return as-is
+ if ($DateValue -is [datetime]) { return $DateValue }
+
+ # Null or empty? Return null
+ if ([string]::IsNullOrWhiteSpace($DateValue)) { return $null }
+
+ $dateStr = [string]$DateValue
+
+ # Try ISO 8601 formats first (most common from properly-formatted API responses)
+ $isoFormats = @(
+ 'yyyy-MM-ddTHH:mm:ss.fffffffK',
+ 'yyyy-MM-ddTHH:mm:ss.fffK',
+ 'yyyy-MM-ddTHH:mm:ssK',
+ 'yyyy-MM-ddTHH:mm:ss.fffffff',
+ 'yyyy-MM-ddTHH:mm:ss.fffZ',
+ 'yyyy-MM-ddTHH:mm:ssZ',
+ 'yyyy-MM-ddTHH:mm:ss.fff',
+ 'yyyy-MM-ddTHH:mm:ss',
+ 'yyyy-MM-dd HH:mm:ss.fff',
+ 'yyyy-MM-dd HH:mm:ss',
+ 'yyyy-MM-dd'
+ )
+
+ foreach ($fmt in $isoFormats) {
+ try {
+ return [datetime]::ParseExact($dateStr, $fmt, [System.Globalization.CultureInfo]::InvariantCulture, [System.Globalization.DateTimeStyles]::AdjustToUniversal)
+ }
+ catch { }
+ }
+
+ # Try US formats explicitly (what Purview actually returns - causes UK locale issues)
+ $usFormats = @(
+ 'M/d/yyyy HH:mm:ss',
+ 'M/d/yyyy h:mm:ss tt',
+ 'M/d/yyyy H:mm:ss',
+ 'MM/dd/yyyy HH:mm:ss',
+ 'M/d/yyyy',
+ 'MM/dd/yyyy'
+ )
+
+ foreach ($fmt in $usFormats) {
+ try {
+ return [datetime]::ParseExact($dateStr, $fmt, [System.Globalization.CultureInfo]::InvariantCulture)
+ }
+ catch { }
+ }
+
+ # Last resort: use InvariantCulture with Parse
+ try {
+ return [datetime]::Parse($dateStr, [System.Globalization.CultureInfo]::InvariantCulture)
+ }
+ catch {
+ return $null
+ }
+}
+
+function script:Format-DatePurviewFast($dt) {
+ if (-not $dt) { return '' }
+ try {
+ if ($dt -is [datetime]) {
+ return $dt.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ }
+ else {
+ $p = script:Parse-DateSafe $dt
+ if ($null -eq $p) { return '' }
+ return $p.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ }
+ }
+ catch { return '' }
+}
+
+function script:BoolTFFast($v) {
+ if ($null -eq $v) { return '' }
+ if ($v -is [bool]) { return $v.ToString().ToUpper() }
+ $vStr = [string]$v
+ if ($script:RegexTrueFalse.IsMatch($vStr)) { return $vStr.ToUpper() }
+ if ($script:RegexYes1.IsMatch($vStr)) { return 'TRUE' }
+ if ($script:RegexNo0.IsMatch($vStr)) { return 'FALSE' }
+ return $vStr
+}
+
+# Apply FlatDepth override (if provided)
+try {
+ if ($PSBoundParameters.ContainsKey('FlatDepth')) {
+ $FlatDepthDeep = $FlatDepth
+ $FlatDepthStandard = [int][Math]::Min($FlatDepth, $FlatDepthStandard)
+ }
+} catch {}
+
+function script:ToJsonIfObjectFast($v) {
+ if ($null -eq $v) { return '' }
+ if (Test-ScalarValue $v) { return $v }
+ try { return ($v | ConvertTo-Json -Depth $JsonDepth -Compress) }
+ catch { return [string]$v }
+}
+
+function script:GetArrayFast($parent, [string]$name) {
+ $val = Get-SafeProperty $parent $name
+ if ($null -eq $val) { return @() }
+ if ($val -is [System.Collections.IEnumerable] -and -not ($val -is [string])) {
+ return @($val)
+ }
+ return @($val)
+}
+
+$effectiveExplodeForProgress = ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)
+
+# MEMORY MANAGEMENT: Resolve MaxMemoryMB (-1 = auto 75% of system RAM, 0 = disabled, >0 = explicit limit)
+$script:ResolvedMaxMemoryMB = $MaxMemoryMB
+if ($MaxMemoryMB -eq -1) {
+ # Auto-detect: use 75% of total physical memory
+ try {
+ $totalRAM = [math]::Round((Get-CimInstance -ClassName Win32_ComputerSystem -ErrorAction SilentlyContinue).TotalPhysicalMemory / 1MB, 0)
+ $script:ResolvedMaxMemoryMB = [math]::Round($totalRAM * 0.75, 0)
+ Write-LogHost "Memory management: Auto-detected ${totalRAM}MB total RAM -> limit set to $($script:ResolvedMaxMemoryMB)MB (75%)" -ForegroundColor Cyan
+ } catch {
+ # Fallback if CIM fails (e.g., Linux/macOS)
+ $script:ResolvedMaxMemoryMB = 4096
+ Write-LogHost "Memory management: Could not detect system RAM, defaulting to 4096MB limit" -ForegroundColor Yellow
+ }
+} elseif ($MaxMemoryMB -eq 0) {
+ $script:ResolvedMaxMemoryMB = 0
+ Write-LogHost "Memory management: DISABLED (-MaxMemoryMB 0)" -ForegroundColor DarkGray
+}
+
+# Memory flush mode: enabled when ResolvedMaxMemoryMB > 0 AND explosion is disabled (explosion needs full $allLogs in memory)
+$script:memoryFlushEnabled = ($script:ResolvedMaxMemoryMB -gt 0) -and (-not $ExplodeDeep) -and (-not $ExplodeArrays) -and (-not $ForcedRawInputCsvExplosion)
+$script:memoryFlushed = $false # Track if we've flushed $allLogs during this run (affects export path)
+$enableParallelSwitchUsed = $EnableParallel.IsPresent
+if ($enableParallelSwitchUsed) { $ParallelMode = 'On' }
+
+function Get-ParallelActivationDecision {
+ param(
+ [array]$QueryPlan,
+ [string]$ParallelMode,
+ [int]$MaxParallelGroups,
+ [int]$MaxConcurrency
+ )
+ $ps7 = ($PSVersionTable.PSVersion.Major -ge 7)
+ $totalGroups = $QueryPlan.Count
+ $totalActivities = ($QueryPlan | ForEach-Object { $_.Activities.Count } | Measure-Object -Sum).Sum
+ # Auto parallel eligibility heuristic: previously required more than one group, causing single-activity
+ # multi-partition scenarios (e.g., CopilotInteraction with 3 partitions) to run sequentially.
+ # Adjust logic: allow auto parallel when there's at least one group AND either >1 group OR
+ # a single group whose planned concurrency would yield >1 partition.
+ $singleGroupMultiPartition = ($totalGroups -eq 1) -and ($QueryPlan[0].Concurrency -gt 1)
+ $autoEligible = $ps7 -and ($MaxParallelGroups -gt 0) -and ($MaxConcurrency -gt 1) -and ($totalGroups -ge 1) -and (($totalGroups -gt 1) -or $singleGroupMultiPartition)
+
+ switch ($ParallelMode) {
+ 'On' {
+ return @{ Enabled = ($ps7 -and $MaxParallelGroups -gt 0 -and $MaxConcurrency -gt 0); Reason = if ($ps7) { 'Forced On' } else { 'PS < 7 (cannot parallel)' }; AutoEligible = $autoEligible }
+ }
+ 'Auto' {
+ return @{ Enabled = $autoEligible; Reason = if ($autoEligible) { 'Auto criteria met' } else { 'Auto criteria not met' }; AutoEligible = $autoEligible }
+ }
+ default {
+ return @{ Enabled = $false; Reason = 'Mode Off'; AutoEligible = $autoEligible }
+ }
+ }
+}
+
+$weights = if ($effectiveExplodeForProgress) { @{ Query = 0.30; Explosion = 0.60; Export = 0.10 } } else { @{ Query = 0.80; Explosion = 0.00; Export = 0.20 } }
+if ($RAWInputCSV) {
+ try {
+ $weights = @{ Parsing = 0.10; Query = 0.0; Explosion = 0.80; Export = 0.10 }
+ }
+ catch {}
+}
+$script:originalWeights = $weights.Clone()
+$script:progressState = @{ Weights = $weights; Phase = 'Query'; Parsing = @{Current = 0; Total = 0 }; Query = @{Current = 0; Total = 0 }; Explode = @{Current = 0; Total = 0 }; Export = @{Current = 0; Total = 1 } }
+function Set-ProgressPhase { param([ValidateSet('Parsing', 'Query', 'Explosion', 'Export', 'Complete')] [string]$Phase, [string]$Status = ''); $script:progressState.Phase = $Phase; Update-Progress -Status $Status }
+function Update-Progress {
+ param(
+ [string]$Status = '',
+ [int]$BatchCurrent = 0,
+ [int]$BatchTotal = 0,
+ [int]$BatchRangeStart = 0,
+ [int]$BatchRangeEnd = 0,
+ [int]$BatchStartPercent = 0,
+ [int]$BatchEndPercent = 0,
+ [bool]$BatchTotalIsEstimate = $false
+ )
+ $w = $script:progressState.Weights; $ps = $script:progressState.Parsing; $qs = $script:progressState.Query; $es = $script:progressState.Explode; $xs = $script:progressState.Export
+ $pPct = if ($ps.Total -gt 0 -and $w.ContainsKey('Parsing') -and $w.Parsing -gt 0) { [double]$ps.Current / [double]$ps.Total } else { 0.0 }
+ $qPct = if ($qs.Total -gt 0) { [double]$qs.Current / [double]$qs.Total } else { 0.0 }
+ $ePct = if ($es.Total -gt 0 -and $w.Explosion -gt 0) { [double]$es.Current / [double]$es.Total } else { 0.0 }
+ $xPct = if ($xs.Total -gt 0) { [double]$xs.Current / [double]$xs.Total } else { 0.0 }
+ # Zero-record weighting: emphasize Query progression when no records retrieved yet
+ if ($script:progressState.Phase -eq 'Query' -and ($script:metrics.TotalRecordsFetched -eq 0)) {
+ $w.Query = 1.0; $w.Explosion = 0.0; $w.Export = 0.0; if ($w.ContainsKey('Parsing')) { $w.Parsing = 0.0 }
+ }
+ # Restoration: Once at least one record has been fetched, revert weights if they were temporarily overridden.
+ elseif ($script:progressState.Phase -eq 'Query' -and ($script:metrics.TotalRecordsFetched -gt 0)) {
+ if ($script:originalWeights -and $w.Query -eq 1.0 -and $w.Explosion -eq 0.0 -and $w.Export -eq 0.0) {
+ foreach ($key in $script:originalWeights.Keys) { $w[$key] = $script:originalWeights[$key] }
+ }
+ }
+ # Calculate phase-specific progress details
+ $phase = $script:progressState.Phase
+ $pDetail = if ($w.ContainsKey('Parsing') -and $w.Parsing -gt 0 -and $ps.Total -gt 0) { "{0}/{1}({2}%)" -f $ps.Current, $ps.Total, ([int]([Math]::Round($pPct * 100))) } else { '' }
+ $qDetail = if ($w.Query -gt 0 -and $qs.Total -gt 0) { "{0}/{1}({2}%)" -f $qs.Current, $qs.Total, ([int]([Math]::Round($qPct * 100))) } else { '' }
+ if ($BatchRangeStart -ge 1 -and $BatchRangeEnd -ge 1 -and $es.Total -gt 0) {
+ if ($BatchStartPercent -ge 0 -and $BatchEndPercent -gt 0) {
+ $batchTotalDisplay = if ($BatchTotalIsEstimate) { "~$BatchTotal" } else { "$BatchTotal" }
+ $batchInfo = if ($BatchTotal -ge 1) { " Batch: {0}/{1}({2}%-{3}%)" -f $BatchCurrent, $batchTotalDisplay, $BatchStartPercent, $BatchEndPercent } else { '' }
+ }
+ else {
+ $batchPct = if ($BatchTotal -gt 0 -and $BatchCurrent -gt 0) { [int]([Math]::Round(([double]$BatchCurrent / [double]$BatchTotal) * 100)) } else { 0 }
+ $batchTotalDisplay = if ($BatchTotalIsEstimate) { "~$BatchTotal" } else { "$BatchTotal" }
+ $batchInfo = if ($BatchTotal -ge 1) { " Batch: {0}/{1}({2}%)" -f $BatchCurrent, $batchTotalDisplay, $batchPct } else { '' }
+ }
+ $explosionCounts = "Records {0}-{1}/{2}{3}" -f $BatchRangeStart, $BatchRangeEnd, $es.Total, $batchInfo
+ }
+ elseif ($BatchTotal -ge 1) {
+ $batchPct = if ($BatchTotal -gt 0 -and $BatchCurrent -gt 0) { [int]([Math]::Round(([double]$BatchCurrent / [double]$BatchTotal) * 100)) } else { 0 }
+ $batchTotalDisplay = if ($BatchTotalIsEstimate) { "~$BatchTotal" } else { "$BatchTotal" }
+ $batchInfo = " Batch: {0}/{1}({2}%)" -f $BatchCurrent, $batchTotalDisplay, $batchPct
+ $explosionCounts = if ($es.Total -gt 0) { "Records {0}/{1}{2}" -f $es.Current, $es.Total, $batchInfo } else { "0/0" }
+ }
+ else {
+ $explosionCounts = if ($es.Total -gt 0) { "{0}/{1}({2}%)" -f $es.Current, $es.Total, ([int]([Math]::Round($ePct * 100))) } else { '0/0' }
+ }
+ $eDetail = if ($w.Explosion -gt 0) {
+ if ($phase -eq 'Explosion') {
+ " | $explosionCounts"
+ }
+ else {
+ " | Explosion: $explosionCounts"
+ }
+ }
+ else { '' }
+ $batchDetail = ''
+ $xDetail = if ($xs.Total -gt 0) { " | Export: {0}/{1}({2}%)" -f $xs.Current, $xs.Total, ([int]([Math]::Round($xPct * 100))) } else { ' | Export: 0/0' }
+ $parsingLabel = 'Pre-parsing JSON'
+ if (($AgentId -or $AgentsOnly -or $ExcludeAgents -or $PromptFilter) -and $phase -eq 'Parsing') {
+ $parsingLabel = 'Pre-parsing + Filtering'
+ }
+ $phasePrefix = switch ($phase) { 'Parsing' { $parsingLabel } 'Query' { 'Query' } 'Explosion' { 'Explosion' } 'Export' { 'Export' } 'Complete' { 'Complete' } default { $phase } }
+ if ($phase -eq 'Parsing' -and $pDetail) {
+ $composite = "${phasePrefix}: $pDetail$eDetail$batchDetail$xDetail"
+ }
+ elseif ($phase -eq 'Explosion' -and -not $qDetail) {
+ $composite = "Explosion: $explosionCounts$batchDetail$xDetail"
+ }
+ else {
+ $composite = if ($qDetail) { "${phasePrefix}: $qDetail$eDetail$batchDetail$xDetail" } else { "${phasePrefix}:$eDetail$batchDetail$xDetail" }
+ }
+ $statusText = if ($Status) { "$Status :: $composite" } else { $composite }
+ if ($statusText.Length -gt 180) { $statusText = $statusText.Substring(0, 177) + '...' }
+ # Placeholder for progress display compatibility
+}
+function Complete-Progress {
+ # Placeholder for progress display compatibility
+}
+
+# Lightweight explicit progress tick to ensure visual movement in long zero-record scenarios.
+function Write-ProgressTick {
+ # Placeholder for progress display compatibility
+}
+
+$script:learnedActivityBlockSize = @{}
+$script:globalLearnedBlockSize = $BlockHours
+
+function Get-QueryPlan {
+ param([string[]]$RequestedActivities)
+ # Normalize and deduplicate (DSPM logic already handled fallback, so no default here)
+ $normalized = @(); foreach ($a in $RequestedActivities) { if ($a -and -not ($normalized -contains $a)) { $normalized += $a } }
+ # If still empty after DSPM logic, something went wrong - but DSPM validation should prevent this
+ if ($normalized.Count -eq 0) {
+ Write-Host "ERROR: No activity types provided to Get-QueryPlan. This should not happen after DSPM validation." -ForegroundColor Red
+ exit 1
+ }
+ $plan = @(); $i = 0
+
+ # DUAL-MODE QUERY PLANNING:
+ # Graph API mode: Combine all activity types into single group (Graph API accepts multiple operationFilters)
+ # EOM mode: Separate groups per activity type (Search-UnifiedAuditLog performs better with single activity)
+ if (-not $UseEOM) {
+ # Graph API mode: Single group with all activities combined
+ $plan += @{
+ Name = "Combined: $($normalized -join ', ')";
+ Group = 'GraphCombined';
+ Activities = $normalized;
+ Concurrency = $MaxConcurrency
+ }
+ }
+ else {
+ # EOM mode: One group per activity type (sequential processing)
+ foreach ($a in $normalized) {
+ $i++
+ $plan += @{
+ Name = "Activity: $a";
+ Group = 'EOM_Sequential';
+ Activities = @($a);
+ Concurrency = $MaxConcurrency
+ }
+ }
+ }
+ return $plan
+}
+
+function Update-LearnedBlockSize {
+ param([string]$ActivityType, [double]$BlockHours, [int]$RecordCount, [bool]$Success)
+ if ($Success) {
+ if ($RecordCount -eq $ResultSize) {
+ $newSize = [Math]::Max(0.083333, $BlockHours * 0.5)
+ $script:learnedActivityBlockSize[$ActivityType] = $newSize
+ $script:globalLearnedBlockSize = [Math]::Min($script:globalLearnedBlockSize, $newSize)
+ Write-LogHost " → Learned: Reducing block size to $([math]::Round($newSize,2))h due to limit hit" -ForegroundColor Magenta
+ }
+ elseif ($RecordCount -gt ($ResultSize * 0.8)) {
+ $newSize = [Math]::Max(0.083333, $BlockHours * 0.7)
+ $script:learnedActivityBlockSize[$ActivityType] = $newSize
+ Write-LogHost " → Learned: Reducing block size to $([math]::Round($newSize,2))h (high volume: $RecordCount records)" -ForegroundColor Magenta
+ }
+ elseif ($RecordCount -lt ($ResultSize * 0.1)) {
+ $newSize = [Math]::Min(24.0, $BlockHours * 1.5)
+ $script:learnedActivityBlockSize[$ActivityType] = $newSize
+ Write-LogHost " → Learned: Increasing block size to $([math]::Round($newSize,2))h (low volume: $RecordCount records)" -ForegroundColor Magenta
+ }
+ elseif ($RecordCount -lt ($ResultSize * 0.05)) {
+ $newSize = [Math]::Min(24.0, $BlockHours * 2.0)
+ $script:learnedActivityBlockSize[$ActivityType] = $newSize
+ Write-LogHost " → Learned: Increasing block size to $([math]::Round($newSize,2))h (very low volume: $RecordCount records)" -ForegroundColor Magenta
+ }
+ } else {
+ $newSize = [Math]::Max(0.083333, $BlockHours * 0.5)
+ $script:learnedActivityBlockSize[$ActivityType] = $newSize
+ $script:globalLearnedBlockSize = [Math]::Min($script:globalLearnedBlockSize, $newSize)
+ Write-LogHost " → Learned: Reducing block size to $([math]::Round($newSize,2))h due to failure" -ForegroundColor Magenta
+ }
+}
+function Get-NextSmallerBlockSize { param([double]$CurrentSize) return [Math]::Max(0.001389, $CurrentSize / 2) } # Min 2 minutes
+
+function Get-OptimalBlockSize { param([string]$ActivityType) if ($script:learnedActivityBlockSize.ContainsKey($ActivityType)) { return $script:learnedActivityBlockSize[$ActivityType] } elseif ($script:globalLearnedBlockSize -ne $BlockHours) { return $script:globalLearnedBlockSize } else { return $BlockHours } }
+
+function Invoke-ActivityTimeWindowProcessing {
+ param(
+ [Parameter(Mandatory = $true)][string]$ActivityType,
+ [Parameter(Mandatory = $true)][datetime]$StartDate,
+ [Parameter(Mandatory = $true)][datetime]$EndDate,
+ [int]$PartitionIndex = 1,
+ [int]$TotalPartitions = 1,
+ [bool]$UseEOMMode = $false
+ )
+
+ Write-Host "Processing $ActivityType (partition $PartitionIndex/$TotalPartitions) from $($StartDate.ToString('yyyy-MM-dd HH:mm')) to $($EndDate.ToString('yyyy-MM-dd HH:mm'))..." -ForegroundColor White
+ $blockHours = Get-OptimalBlockSize -ActivityType $ActivityType
+ Write-Host " Using initial block size: $blockHours hours" -ForegroundColor DarkCyan
+
+ $allResults = New-Object System.Collections.ArrayList
+ $current = $StartDate
+ $blockNumber = 1
+
+ while ($current -lt $EndDate) {
+ # Show progress BEFORE block processing to ensure visibility (before log output clears it)
+ Write-ProgressTick
+
+ if ($script:circuitBreakerOpen) {
+ if ($script:circuitBreakerOpenUntil -and (Get-Date) -lt $script:circuitBreakerOpenUntil) {
+ Write-LogHost " Circuit breaker OPEN until $($script:circuitBreakerOpenUntil.ToString('HH:mm:ss')) – skipping remaining blocks for $ActivityType" -ForegroundColor Red
+ break
+ } else {
+ $script:circuitBreakerOpen = $false
+ $script:consecutiveBlockFailures = 0
+ Write-LogHost " Circuit breaker cooldown elapsed – resuming block processing" -ForegroundColor DarkGreen
+ }
+ }
+ if ($script:learnedActivityBlockSize.ContainsKey($ActivityType)) {
+ $blockHours = $script:learnedActivityBlockSize[$ActivityType]
+ }
+
+ $blockEnd = $current.AddHours($blockHours)
+ if ($blockEnd -gt $EndDate) { $blockEnd = $EndDate }
+
+ $actualBlockHours = [math]::Round(($blockEnd - $current).TotalHours, 2)
+ Write-Host " Block $blockNumber`: $($current.ToString('yyyy-MM-dd HH:mm')) to $($blockEnd.ToString('yyyy-MM-dd HH:mm')) ($($actualBlockHours)h)" -ForegroundColor Yellow
+
+ try {
+ $results = Invoke-PurviewAuditQuery -StartDate $current -EndDate $blockEnd -Operations $ActivityType -ResultSize $ResultSize -UserIds $script:targetUsers -UseEOMMode $UseEOMMode
+
+ if ($results -and $results.Count -gt 0) {
+ # Safe add - handle both array and single object
+ if ($results -is [Array]) {
+ foreach ($item in $results) { [void]$allResults.Add($item) }
+ } else {
+ [void]$allResults.Add($results)
+ }
+ Write-Host " Added $($results.Count) records (total: $($allResults.Count))" -ForegroundColor Green
+ Update-LearnedBlockSize -ActivityType $ActivityType -BlockHours $actualBlockHours -RecordCount $results.Count -Success $true
+ $script:consecutiveBlockFailures = 0
+ }
+ else {
+ Write-Host " No records found in this block" -ForegroundColor Gray
+ $script:consecutiveBlockFailures = 0
+ }
+ }
+ catch {
+ Write-Host " Block failed: $($_.Exception.Message)" -ForegroundColor Red
+ Update-LearnedBlockSize -ActivityType $ActivityType -BlockHours $actualBlockHours -RecordCount 0 -Success $false
+ $script:consecutiveBlockFailures++
+ $attemptNum = $script:consecutiveBlockFailures
+ $expDelay = [math]::Min($BackoffMaxSeconds, $BackoffBaseSeconds * [math]::Pow(2, ($attemptNum - 1)))
+ $jitterMs = Get-Random -Minimum 150 -Maximum 750
+ $totalDelaySec = [math]::Round($expDelay,2) + [math]::Round($jitterMs/1000,2)
+ try { $script:metrics.BackoffTotalDelaySeconds += $totalDelaySec } catch {}
+ Write-LogHost " Reliability: Backoff delay $([math]::Round($expDelay,2))s + jitter $([math]::Round($jitterMs/1000,2))s (attempt $attemptNum)" -ForegroundColor DarkYellow
+ Start-Sleep -Seconds ([int][math]::Ceiling($expDelay))
+ Start-Sleep -Milliseconds $jitterMs
+ if ($script:consecutiveBlockFailures -ge $CircuitBreakerThreshold) {
+ $script:circuitBreakerOpen = $true
+ $script:circuitBreakerOpenUntil = (Get-Date).AddSeconds($CircuitBreakerCooldownSeconds)
+ try { $script:metrics.CircuitBreakerTrips++ } catch {}
+ Write-LogHost " CIRCUIT BREAKER TRIPPED after $script:consecutiveBlockFailures consecutive block failures – cooling down for $CircuitBreakerCooldownSeconds seconds (until $($script:circuitBreakerOpenUntil.ToString('HH:mm:ss')))" -ForegroundColor Magenta
+ break
+ }
+ if ($blockHours -gt 0.5) {
+ $smallerBlockHours = Get-NextSmallerBlockSize -CurrentSize $blockHours
+ Write-Host " Retrying with smaller $smallerBlockHours hour block..." -ForegroundColor Yellow
+
+ try {
+ $blockEnd = $current.AddHours($smallerBlockHours)
+ if ($blockEnd -gt $EndDate) { $blockEnd = $EndDate }
+
+ $results = Invoke-PurviewAuditQuery -StartDate $current -EndDate $blockEnd -Operations $ActivityType -ResultSize $ResultSize -UserIds $script:targetUsers -UseEOMMode $UseEOMMode
+
+ if ($results -and $results.Count -gt 0) {
+ # Safe add - handle both array and single object
+ if ($results -is [Array]) {
+ foreach ($item in $results) { [void]$allResults.Add($item) }
+ } else {
+ [void]$allResults.Add($results)
+ }
+ Write-Host " Smaller block succeeded: $($results.Count) records" -ForegroundColor Green
+ Update-LearnedBlockSize -ActivityType $ActivityType -BlockHours $smallerBlockHours -RecordCount $results.Count -Success $true
+ $blockHours = $smallerBlockHours
+ $script:consecutiveBlockFailures = 0
+ }
+ }
+ catch {
+ Write-Host " Smaller block also failed: $($_.Exception.Message)" -ForegroundColor Red
+ $script:consecutiveBlockFailures++
+ $attemptNum = $script:consecutiveBlockFailures
+ $expDelay = [math]::Min($BackoffMaxSeconds, $BackoffBaseSeconds * [math]::Pow(2, ($attemptNum - 1)))
+ $jitterMs = Get-Random -Minimum 150 -Maximum 750
+ $totalDelaySec = [math]::Round($expDelay,2) + [math]::Round($jitterMs/1000,2)
+ try { $script:metrics.BackoffTotalDelaySeconds += $totalDelaySec } catch {}
+ Write-LogHost " Reliability: Backoff delay $([math]::Round($expDelay,2))s + jitter $([math]::Round($jitterMs/1000,2))s (attempt $attemptNum)" -ForegroundColor DarkYellow
+ Start-Sleep -Seconds ([int][math]::Ceiling($expDelay))
+ Start-Sleep -Milliseconds $jitterMs
+ if ($script:consecutiveBlockFailures -ge $CircuitBreakerThreshold) {
+ $script:circuitBreakerOpen = $true
+ $script:circuitBreakerOpenUntil = (Get-Date).AddSeconds($CircuitBreakerCooldownSeconds)
+ try { $script:metrics.CircuitBreakerTrips++ } catch {}
+ Write-LogHost " CIRCUIT BREAKER TRIPPED after $script:consecutiveBlockFailures consecutive block failures – cooling down for $CircuitBreakerCooldownSeconds seconds (until $($script:circuitBreakerOpenUntil.ToString('HH:mm:ss')))" -ForegroundColor Magenta
+ break
+ }
+ }
+ }
+ }
+
+ try {
+ if ($script:progressState.Query.Current -ge $script:progressState.Query.Total) {
+ $script:progressState.Query.Total += 1
+ }
+ $script:progressState.Query.Current += 1
+ $script:progressBlocksCompleted = ($script:progressBlocksCompleted + 1)
+ $script:progressBlockHoursSum = ($script:progressBlockHoursSum + $actualBlockHours)
+ if ($script:progressBlocksCompleted -gt 0) {
+ # --- Progress Estimation Logic (Improved for multi-partition accuracy) ---
+ # Previously, the dynamic recalculation only considered the current partition's remaining hours.
+ # In multi-partition scenarios this allowed Query.Total to shrink between partitions, causing
+ # premature 100% completion when later partitions had not yet started.
+ # New approach:
+ # 1. Estimate remaining blocks in the CURRENT partition (as before).
+ # 2. Add an estimate for yet-to-start partitions based on the average blocks/partition so far.
+ # 3. Enforce a monotonic (non-decreasing) Query.Total so percent cannot jump to 100% early.
+ $avgBlock = $script:progressBlockHoursSum / $script:progressBlocksCompleted
+ $elapsedHours = $script:progressBlockHoursSum
+ $currentPartitionRangeHours = ($EndDate - $StartDate).TotalHours
+ $remainingHoursCurrentPartition = [Math]::Max(0.0, $currentPartitionRangeHours - $elapsedHours)
+ $remainingBlocksEstCurrent = if ($avgBlock -gt 0) { [Math]::Ceiling($remainingHoursCurrentPartition / $avgBlock) } else { 0 }
+ $remainingPartitions = if ($TotalPartitions -gt $PartitionIndex) { $TotalPartitions - $PartitionIndex } else { 0 }
+ $avgBlocksPerCompletedPartition = if ($PartitionIndex -gt 0) { [double]$script:progressBlocksCompleted / [double]$PartitionIndex } else { [double]$script:progressBlocksCompleted }
+ $futurePartitionBlocksEst = if ($remainingPartitions -gt 0 -and $avgBlocksPerCompletedPartition -gt 0) { [int][Math]::Ceiling($avgBlocksPerCompletedPartition * $remainingPartitions) } else { 0 }
+ $newCalcGlobal = $script:progressBlocksCompleted + $remainingBlocksEstCurrent + $futurePartitionBlocksEst
+ # Apply optional smoothing but NEVER allow total to decrease (monotonic total).
+ if ($ProgressSmoothingAlpha -gt 0 -and $script:progressState.Query.Total -gt 0) {
+ $smoothed = [int]([Math]::Round(($ProgressSmoothingAlpha * $newCalcGlobal) + ((1 - $ProgressSmoothingAlpha) * $script:progressState.Query.Total)))
+ $newTotalCandidate = [Math]::Max($script:progressState.Query.Total, $smoothed, $newCalcGlobal)
+ } else {
+ $newTotalCandidate = [Math]::Max($script:progressState.Query.Total, $newCalcGlobal)
+ }
+ $script:progressState.Query.Total = [Math]::Max($script:progressState.Query.Total, $newTotalCandidate, $script:progressBlocksCompleted)
+ }
+ Update-Progress
+ # Explicit tick for visibility even if Update-Progress weighting collapses.
+ Write-ProgressTick
+ }
+ catch {}
+
+ $current = $blockEnd
+ $blockNumber++
+ }
+
+ Write-Host " Completed $ActivityType (partition $PartitionIndex/$TotalPartitions)`: $($allResults.Count) total records" -ForegroundColor Green
+ return $allResults.ToArray()
+}
+
+# Note: Write-Log and Write-LogHost are defined earlier in the script (near line 670)
+
+function Open-CsvWriter {
+ param([string]$Path, [string[]]$Columns)
+ $enc = New-Object System.Text.UTF8Encoding($false)
+ # OPTIMIZATION: Use 1MB StreamWriter buffer (default is 1KB) to reduce write syscalls
+ $script:PAX_CsvWriter = [System.IO.StreamWriter]::new($Path, $false, $enc, 1048576)
+ $escapedCols = New-Object System.Collections.Generic.List[string]
+ foreach ($col in $Columns) {
+ $c = [string]$col
+ $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' ')
+ $escaped = $c -replace '"', '""'
+ if ($needsQuote) { $escaped = '"' + $escaped + '"' }
+ $escapedCols.Add($escaped) | Out-Null
+ }
+ $script:PAX_CsvWriter.WriteLine(($escapedCols -join ','))
+}
+function Close-CsvWriter { if ($script:PAX_CsvWriter) { try { $script:PAX_CsvWriter.Flush(); $script:PAX_CsvWriter.Dispose() } catch {}; Remove-Variable PAX_CsvWriter -Scope Script -ErrorAction SilentlyContinue } }
+function Write-CsvRows {
+ param([System.Collections.IEnumerable]$Rows, [string[]]$Columns)
+ if (-not $Rows) { return }
+ if (-not $script:PAX_CsvWriter) { throw "CSV writer not initialized" }
+
+ # Pre-compile regex once (not per-cell) for significant performance gain
+ $needsQuotePattern = [regex]::new('[",\r\n]', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+
+ # OPTIMIZATION: Pre-allocate larger buffer (4MB) to reduce write syscalls
+ $sb = New-Object System.Text.StringBuilder(4194304)
+ $colCount = $Columns.Count
+ $fieldValues = New-Object string[] $colCount # Reuse array instead of creating List per row
+
+ # OPTIMIZATION: Build column index lookup table for O(1) access by name
+ # This eliminates per-cell string lookups which were the main bottleneck
+ $columnIndex = @{}
+ for ($i = 0; $i -lt $colCount; $i++) {
+ $columnIndex[$Columns[$i]] = $i
+ }
+
+ foreach ($row in $Rows) {
+ if ($null -eq $row) { continue }
+
+ # Reset field values (faster than creating new array)
+ for ($i = 0; $i -lt $colCount; $i++) { $fieldValues[$i] = "" }
+
+ # For hashtables, iterate keys and map to column index (much faster than iterating columns)
+ if ($row -is [hashtable]) {
+ foreach ($key in $row.Keys) {
+ if ($columnIndex.ContainsKey($key)) {
+ $idx = $columnIndex[$key]
+ $val = $row[$key]
+
+ if ($null -eq $val) { continue }
+
+ # Handle arrays/collections
+ if ($val -is [System.Collections.IEnumerable] -and -not ($val -is [string])) {
+ try { $val = ($val | ForEach-Object { if ($_ -ne $null) { [string]$_ } else { '' } }) -join ';' } catch { $val = [string]$val }
+ }
+
+ $s = [string]$val
+ # Use pre-compiled regex and avoid method calls where possible
+ if ($s.Length -gt 0 -and ($needsQuotePattern.IsMatch($s) -or $s[0] -eq ' ' -or $s[$s.Length - 1] -eq ' ')) {
+ $s = '"' + ($s -replace '"', '""') + '"'
+ }
+ $fieldValues[$idx] = $s
+ }
+ }
+ } else {
+ # OPTIMIZED: For PSObjects, iterate only populated properties (not all columns)
+ # This reduces iterations from ~163 columns to ~50 actual properties per row
+ foreach ($prop in $row.PSObject.Properties) {
+ $key = $prop.Name
+ if (-not $columnIndex.ContainsKey($key)) { continue }
+
+ $idx = $columnIndex[$key]
+ $val = $prop.Value
+
+ if ($null -eq $val) { continue }
+
+ # Handle arrays/collections
+ if ($val -is [System.Collections.IEnumerable] -and -not ($val -is [string])) {
+ try { $val = ($val | ForEach-Object { if ($_ -ne $null) { [string]$_ } else { '' } }) -join ';' } catch { $val = [string]$val }
+ }
+
+ $s = [string]$val
+ if ($s.Length -gt 0 -and ($needsQuotePattern.IsMatch($s) -or $s[0] -eq ' ' -or $s[$s.Length - 1] -eq ' ')) {
+ $s = '"' + ($s -replace '"', '""') + '"'
+ }
+ $fieldValues[$idx] = $s
+ }
+ }
+
+ [void]$sb.AppendLine(($fieldValues -join ','))
+ # Flush at 4MB (increased from 1MB to reduce write syscalls)
+ if ($sb.Length -gt 4194304) {
+ $script:PAX_CsvWriter.Write($sb.ToString())
+ [void]$sb.Clear()
+ }
+ }
+ if ($sb.Length -gt 0) { $script:PAX_CsvWriter.Write($sb.ToString()) }
+}
+
+function Test-AgentFilter {
+ param(
+ [Parameter(Mandatory = $true)]
+ $ParsedAuditData,
+ [string[]]$AgentIdFilter,
+ [bool]$AgentsOnlyFilter
+ )
+ if (-not $AgentIdFilter -and -not $AgentsOnlyFilter) {
+ return $true
+ }
+ $recordAgentId = $null
+ try {
+ if ($ParsedAuditData.AgentId) {
+ $recordAgentId = [string]$ParsedAuditData.AgentId
+ }
+ }
+ catch {
+ return $false
+ }
+ if ($AgentsOnlyFilter) {
+ if ([string]::IsNullOrWhiteSpace($recordAgentId)) {
+ return $false
+ }
+ if (-not $AgentIdFilter) {
+ return $true
+ }
+ }
+ if ($AgentIdFilter) {
+ if ([string]::IsNullOrWhiteSpace($recordAgentId)) {
+ return $false
+ }
+ foreach ($filterId in $AgentIdFilter) {
+ if ($recordAgentId -eq $filterId) {
+ return $true
+ }
+ }
+ return $false
+ }
+ return $true
+}
+
+# Ensure output directory exists
+if (-not (Test-Path $OutputPath)) { New-Item -Path $OutputPath -ItemType Directory -Force | Out-Null }
+
+# Generate output filename with proper extension
+$isDSPMEnabled = Test-DSPMFeaturesEnabled
+$fileExtension = if ($ExportWorkbook) { "xlsx" } else { "csv" }
+$filePrefix = "Purview_Audit"
+
+# Determine initial combine mode (needed early for filename decision)
+$isCsv = (-not $ExportWorkbook)
+$initialCsvCombine = if ($RAWInputCSV -and $isCsv) { $true } elseif ($isCsv) { $CombineOutput.IsPresent } else { $false }
+
+# Determine script mode for logging and validation
+$scriptMode = if ($RAWInputCSV) {
+ "Replay (RAWInputCSV)"
+} elseif ($ExplodeDeep) {
+ "Deep Column Explosion (-ExplodeDeep)"
+} elseif ($ExplodeArrays -or $ForcedRawInputCsvExplosion) {
+ if ($ForcedRawInputCsvExplosion -and -not $ExplodeArrays.IsPresent -and -not $ExplodeDeep.IsPresent) { "Array Explosion (-ExplodeArrays, RAWInput implied)" } else { "Array Explosion (-ExplodeArrays)" }
+} else {
+ "Standard (1:1, no explosion)"
+}
+
+# Resolve OutputFile path
+# OnlyUserInfo mode: Output file is EntraUsers_MAClicensing (log file will match)
+if ($OnlyUserInfo) {
+ $fileExtension = if ($ExportWorkbook) { "xlsx" } else { "csv" }
+ $OutputFile = Join-Path $OutputPath "EntraUsers_MAClicensing_$global:ScriptRunTimestamp.$fileExtension"
+}
+elseif ($AppendFile) {
+ # User provided filename or full path for appending
+ if ([System.IO.Path]::IsPathRooted($AppendFile)) {
+ # Full path provided
+ $OutputFile = $AppendFile
+ } else {
+ # Relative filename - combine with OutputPath
+ $OutputFile = Join-Path $OutputPath $AppendFile
+ }
+
+ # Validate file exists
+ if (-not (Test-Path $OutputFile)) {
+ Write-Host "ERROR: Cannot append to file - file does not exist: $OutputFile" -ForegroundColor Red
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "The file must exist before you can append to it." -ForegroundColor Yellow
+ Write-Host "Either:" -ForegroundColor Green
+ Write-Host " 1. Create the file first by running without -AppendFile" -ForegroundColor Green
+ Write-Host " 2. Verify the path and filename are correct" -ForegroundColor Green
+ exit 1
+ }
+
+ # Note: Column validation happens after banner display
+}
+elseif ($ExportWorkbook) {
+ # Excel workbook mode - determine final filename upfront so log file matches
+ if ($CombineOutput) {
+ # Single-tab workbook
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $OutputFile = Join-Path $OutputPath "Purview_Audit_CombinedUsageActivity_EntraUsers_MAClicensing_$global:ScriptRunTimestamp.xlsx"
+ } else {
+ $OutputFile = Join-Path $OutputPath "Purview_Audit_CombinedUsageActivity_$global:ScriptRunTimestamp.xlsx"
+ }
+ } else {
+ # Multi-tab workbook
+ if ($isDSPMEnabled) {
+ $OutputFile = Join-Path $OutputPath "Purview_Audit_MultiTab_$global:ScriptRunTimestamp.xlsx"
+ } else {
+ $OutputFile = Join-Path $OutputPath "Purview_Audit_MultiTab_$global:ScriptRunTimestamp.xlsx"
+ }
+ }
+} elseif ($isCsv -and $initialCsvCombine) {
+ # CSV combined activity naming; EntraUsers exported separately
+ # Initial default combined output filename (may be dynamically downgraded to single-activity later)
+ $OutputFile = Join-Path $OutputPath "Purview_Audit_CombinedUsageActivity_$global:ScriptRunTimestamp.csv"
+} else {
+ $OutputFile = Join-Path $OutputPath "${filePrefix}_$global:ScriptRunTimestamp.$fileExtension"
+}
+
+# Ensure output directory exists after possible OutputPath override
+if (-not (Test-Path $OutputPath)) { New-Item -Path $OutputPath -ItemType Directory -Force | Out-Null }
+
+# When ExportWorkbook mode, set up intermediate CSV path (OutputFile stays as final .xlsx for log naming)
+if ($ExportWorkbook -and -not $AppendFile) {
+ # CSV intermediate file uses same base name but .csv extension
+ $script:CsvOutputFile = Join-Path $OutputPath ([System.IO.Path]::GetFileNameWithoutExtension($OutputFile) + ".csv")
+} else {
+ $script:CsvOutputFile = $OutputFile
+}
+
+# ============================================================
+# CHECKPOINT SYSTEM: Initialize for new runs or set paths for resume
+# ============================================================
+# Checkpoint is enabled for ALL auth modes (AppRegistration, WebLogin, DeviceCode)
+# Enables resume after Ctrl+C, network interruptions, system restarts, or any failure
+$script:CheckpointEnabled = (-not $RAWInputCSV) -and (-not $OnlyUserInfo)
+
+# NOTE: Use $ResumeSpecified here (set early via RemainingArgs parsing) rather than $script:IsResumeMode
+# which is only set later during resume detection. This skips new checkpoint creation when -Resume is specified.
+if ($ResumeSpecified) {
+ # Resume mode: checkpoint paths will be set later during resume detection
+ # Skip all checkpoint initialization here - it will be handled in the resume detection block
+ $script:FinalOutputPath = $null # Will be set during resume
+ $script:PartialOutputPath = $null # Will be set during resume
+ # Also skip log file setup - will be set after checkpoint is loaded
+ $script:DeferLogFileSetup = $true
+}
+elseif ($script:CheckpointEnabled) {
+ # New run with checkpoint enabled: Add _PARTIAL suffix
+ $script:FinalOutputPath = $OutputFile
+ $dir = Split-Path $OutputFile -Parent
+ $baseName = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile)
+ $ext = [System.IO.Path]::GetExtension($OutputFile)
+ $script:PartialOutputPath = Join-Path $dir "${baseName}_PARTIAL${ext}"
+ $OutputFile = $script:PartialOutputPath
+ # For ExportWorkbook mode, CsvOutputFile must use .csv extension (not .xlsx from PartialOutputPath)
+ if ($ExportWorkbook) {
+ $script:CsvOutputFile = Join-Path $dir "${baseName}_PARTIAL.csv"
+ } else {
+ $script:CsvOutputFile = $script:PartialOutputPath
+ }
+
+ # Initialize checkpoint file for this run with ALL parameters for complete state restoration
+ $baseFileName = Split-Path $script:FinalOutputPath -Leaf
+ $allParams = @{
+ # Date range
+ StartDate = $StartDate
+ EndDate = $EndDate
+ # Activity/Record filtering
+ ActivityTypes = $ActivityTypes
+ RecordTypes = $RecordTypes
+ ServiceTypes = $ServiceTypes
+ UserIds = $UserIds
+ GroupNames = $GroupNames
+ # Agent filtering
+ AgentId = $AgentId
+ AgentsOnly = $AgentsOnly.IsPresent
+ ExcludeAgents = $ExcludeAgents.IsPresent
+ # Prompt filtering
+ PromptFilter = $PromptFilter
+ # Schema/Explosion
+ ExplodeArrays = $ExplodeArrays.IsPresent
+ ExplodeDeep = $ExplodeDeep.IsPresent
+ FlatDepth = $FlatDepth
+ StreamingSchemaSample = $StreamingSchemaSample
+ StreamingChunkSize = $StreamingChunkSize
+ # M365/User info
+ IncludeM365Usage = $IncludeM365Usage.IsPresent
+ IncludeUserInfo = $IncludeUserInfo.IsPresent
+ IncludeDSPMForAI = $IncludeDSPMForAI.IsPresent
+ IncludeCopilotInteraction = $IncludeCopilotInteraction.IsPresent
+ ExcludeCopilotInteraction = $ExcludeCopilotInteraction.IsPresent
+ # Partitioning
+ BlockHours = $BlockHours
+ PartitionHours = $PartitionHours
+ MaxPartitions = $MaxPartitions
+ # Output
+ ExportWorkbook = $ExportWorkbook.IsPresent
+ CombineOutput = $CombineOutput.IsPresent
+ # Auth (no secrets)
+ Auth = $Auth
+ TenantId = $TenantId
+ ClientId = $ClientId
+ # Other
+ ResultSize = $ResultSize
+ MaxConcurrency = $MaxConcurrency
+ UseEOM = $UseEOM.IsPresent
+ AutoCompleteness = $AutoCompleteness.IsPresent
+ IncludeTelemetry = $IncludeTelemetry.IsPresent
+ }
+ Initialize-CheckpointForNewRun -OutputPath $OutputPath -BaseOutputFileName $baseFileName -RunTimestamp $global:ScriptRunTimestamp -StartDate (script:Parse-DateSafe $StartDate) -EndDate (script:Parse-DateSafe $EndDate) -AllParameters $allParams
+}
+else {
+ # No checkpoint needed (AppRegistration mode or RAWInputCSV or OnlyUserInfo)
+ $script:FinalOutputPath = $OutputFile
+ $script:PartialOutputPath = $null
+}
+
+# Update LogFile to match OutputFile base name (extension swapped to .log)
+# Skip for resume mode - log file will be set after checkpoint is loaded
+if (-not $script:DeferLogFileSetup) {
+ $logBaseName = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile)
+ $logDir = Split-Path $OutputFile -Parent
+ $script:LogFile = Join-Path $logDir ("{0}.log" -f $logBaseName)
+ $LogFile = $script:LogFile
+}
+
+# Flush buffered logs now that log file is finalized (skip for resume - will flush after checkpoint load)
+if ($script:LogBuffer -and $script:LogBuffer.Count -gt 0) {
+ foreach ($entry in $script:LogBuffer) {
+ try { Add-Content -Path $script:LogFile -Value $entry -Encoding UTF8 -ErrorAction SilentlyContinue } catch {}
+ }
+ $script:LogBuffer.Clear()
+}
+
+# Note: $scriptMode already defined earlier for validation - reformat for display consistency
+$scriptModeDisplay = if ($ExplodeDeep) { "Deep Column Explosion" } elseif ($ExplodeArrays -or $ForcedRawInputCsvExplosion) { if ($ForcedRawInputCsvExplosion -and -not $ExplodeArrays.IsPresent -and -not $ExplodeDeep.IsPresent) { "Array Explosion (RAWInput implied)" } else { "Array Explosion" } } else { "Standard (1:1)" }
+
+# Skip banner output to log file in resume mode (log file not set yet - will be set after checkpoint loads)
+if (-not $script:DeferLogFileSetup) {
+@"
+=== Portable Audit eXporter (PAX) - Purview Audit Log Exporter ===
+Script Start Time (UTC): $((Get-Date).ToUniversalTime().ToString('yyyy-MM-dd HH:mm:ss')) UTC
+Script Version: v$ScriptVersion
+Mode: $scriptModeDisplay
+Date Range: $(if ($RAWInputCSV) { if ([string]::IsNullOrWhiteSpace($StartDate) -and [string]::IsNullOrWhiteSpace($EndDate)) { 'Full CSV (no date filter)' } else { "$StartDate (inclusive) to $EndDate (exclusive) (filters)" } } else { "$StartDate (inclusive) to $EndDate (exclusive)" })
+Output File: $OutputFile
+Log File: $LogFile
+========================================================
+
+"@ | Out-File -FilePath $LogFile -Encoding UTF8
+}
+
+Write-LogHost "=== Portable Audit eXporter (PAX) - Purview Audit Log Exporter ===" -ForegroundColor Cyan
+Write-LogHost ("Script Version: v$ScriptVersion") -ForegroundColor White
+
+
+# Fast-path: ensure M365 usage bundle is applied before output summary in raw/replay scenarios
+if ($IncludeM365Usage -and -not ($PSBoundParameters.ContainsKey('ActivityTypes'))) {
+ $ActivityTypes = @($m365UsageActivityBundle + $copilotBaseActivityType) | Select-Object -Unique
+ # Activity types will be displayed in "Activity Types for This Run" section
+}
+
+# Display active mode (Replay, EOM, or Graph API)
+Write-LogHost ""
+if ($RAWInputCSV) {
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost " REPLAY MODE: Offline CSV (no service connections)" -ForegroundColor Cyan
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost " Source: $RAWInputCSV" -ForegroundColor White
+ Write-LogHost " Explosion: $scriptModeDisplay" -ForegroundColor White
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+}
+elseif ($UseEOM) {
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost " QUERY MODE: Exchange Online Management" -ForegroundColor Cyan
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost " API Method: Search-UnifiedAuditLog cmdlet" -ForegroundColor White
+ Write-LogHost " Module: ExchangeOnlineManagement" -ForegroundColor White
+ Write-LogHost " Authentication: $Auth" -ForegroundColor White
+ Write-LogHost " Parallel Support: DISABLED (serial-only processing)" -ForegroundColor Yellow
+ Write-LogHost " Permissions: Exchange Online RBAC roles required" -ForegroundColor White
+ Write-LogHost " (View-Only Audit Logs, Compliance Management)" -ForegroundColor Gray
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Cyan
+}
+else {
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Green
+ Write-LogHost " QUERY MODE: Microsoft Graph Security API (Default)" -ForegroundColor Green
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Green
+ Write-LogHost " API Method: REST-based audit log queries" -ForegroundColor White
+ Write-LogHost " Module: Microsoft.Graph.Security" -ForegroundColor White
+ Write-LogHost " Authentication: $Auth (OAuth 2.0)" -ForegroundColor White
+ $parallelStatus = if ($PSVersionTable.PSVersion.Major -ge 7) { "AVAILABLE (PowerShell 7+)" } else { "LIMITED (PowerShell 5.1 detected)" }
+ Write-LogHost " Parallel Support: $parallelStatus" -ForegroundColor Green
+ Write-LogHost " Permissions: AuditLog.Read.All Graph API scope" -ForegroundColor White
+ Write-LogHost " + Azure AD role (Compliance/Security Admin)" -ForegroundColor Gray
+ Write-LogHost "═══════════════════════════════════════════════════════" -ForegroundColor Green
+}
+Write-LogHost ""
+
+$startTimeStamp = try { $script:metrics.StartTime.ToUniversalTime().ToString('yyyy-MM-dd HH:mm:ss') } catch { (Get-Date).ToUniversalTime().ToString('yyyy-MM-dd HH:mm:ss') }
+Write-LogHost ("Script execution started at $startTimeStamp UTC") -ForegroundColor White
+
+# For OnlyUserInfo mode, show simplified header (no Mode/DateRange/Purview output info)
+if (-not $OnlyUserInfo) {
+ Write-LogHost "Mode: $scriptMode" -ForegroundColor White
+ ${rangeText} = if ($RAWInputCSV) { if ([string]::IsNullOrWhiteSpace($StartDate) -and [string]::IsNullOrWhiteSpace($EndDate)) { 'Full CSV (no date filter)' } else { "$StartDate (inclusive) to $EndDate (exclusive) (filters)" } } else { "$StartDate (inclusive) to $EndDate (exclusive)" }
+ Write-LogHost "Date Range: $rangeText" -ForegroundColor White
+} else {
+ Write-LogHost "Mode: OnlyUserInfo (Entra user and MAC licensing export only)" -ForegroundColor Cyan
+}
+
+# --- Early build of $finalActivityTypes for user warning checks (before auth) ---
+# This preview build is used ONLY for multi-output warning and PAYG billing warning
+# The full/authoritative build happens later in the pipeline after authentication
+$finalActivityTypes = @()
+if ($PSBoundParameters.ContainsKey('ActivityTypes') -and $ActivityTypes) {
+ $finalActivityTypes += $ActivityTypes
+}
+if ($IncludeDSPMForAI) {
+ $finalActivityTypes += 'ConnectedAIAppInteraction'
+ $finalActivityTypes += 'AIInteraction'
+ $finalActivityTypes += 'AIAppInteraction'
+}
+if ($IncludeM365Usage) {
+ $finalActivityTypes += $m365UsageActivityBundle
+}
+# Add CopilotInteraction as default if no custom types and not excluded
+$userProvidedCustomTypes = $PSBoundParameters.ContainsKey('ActivityTypes')
+if (-not $ExcludeCopilotInteraction -and (-not $userProvidedCustomTypes -or $IncludeDSPMForAI)) {
+ if (-not ($finalActivityTypes -contains 'CopilotInteraction')) {
+ $finalActivityTypes += 'CopilotInteraction'
+ }
+}
+# Remove CopilotInteraction if explicitly excluded
+if ($ExcludeCopilotInteraction) {
+ $finalActivityTypes = $finalActivityTypes | Where-Object { $_ -ne 'CopilotInteraction' }
+}
+$finalActivityTypes = $finalActivityTypes | Select-Object -Unique
+
+# --- Multi-Output Warning: Prompt when many files/tabs expected without -CombineOutput ---
+$activityTypeCount = $finalActivityTypes.Count
+$isMultiOutputScenario = ($activityTypeCount -gt 10) -and (-not $CombineOutput)
+$outputType = if ($ExportWorkbook) { "tabs" } else { "CSV files" }
+
+if ($isMultiOutputScenario -and -not $Force) {
+ Write-LogHost ""
+ Write-LogHost "============================================================================================================" -ForegroundColor Yellow
+ Write-Host "WARNING: Multiple Output $outputType Detected" -ForegroundColor Yellow
+ Write-LogHost "============================================================================================================" -ForegroundColor Yellow
+ Write-LogHost ""
+ Write-LogHost "You have $activityTypeCount activity types selected." -ForegroundColor Cyan
+ Write-LogHost "Without -CombineOutput, this will create $activityTypeCount separate $outputType." -ForegroundColor Cyan
+ Write-LogHost ""
+ Write-LogHost "Recommendation:" -ForegroundColor Green
+ Write-LogHost " • Add -CombineOutput to merge all activity types into a single $(if ($ExportWorkbook) { 'tab' } else { 'CSV file' })" -ForegroundColor Green
+ Write-LogHost ""
+ Write-LogHost "Do you want to continue with $activityTypeCount separate $outputType?" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost " [Y] YES - Continue with separate $outputType (I understand there will be many $outputType)" -ForegroundColor Green
+ Write-LogHost " [C] COMBINE - Enable -CombineOutput and continue (single merged $(if ($ExportWorkbook) { 'tab' } else { 'CSV' }))" -ForegroundColor Cyan
+ Write-LogHost " [E] EXIT - Cancel script execution" -ForegroundColor Red
+ Write-LogHost ""
+
+ Send-PromptNotification
+ $multiOutput_choice = Read-Host "Enter your choice (Y/C/E)"
+
+ if ($multiOutput_choice -eq 'Y' -or $multiOutput_choice -eq 'y') {
+ Write-LogHost ""
+ Write-LogHost "Continuing with $activityTypeCount separate output files..." -ForegroundColor Green
+ Write-LogHost ""
+ }
+ elseif ($multiOutput_choice -eq 'C' -or $multiOutput_choice -eq 'c') {
+ Write-LogHost ""
+ Write-LogHost "ENABLED: -CombineOutput mode" -ForegroundColor Green
+ Write-LogHost " All $activityTypeCount activity types will be merged into a single CSV file." -ForegroundColor Cyan
+ Write-LogHost ""
+ $CombineOutput = $true
+ }
+ else {
+ Write-LogHost ""
+ Write-LogHost "User choice: EXIT - Script execution cancelled" -ForegroundColor Red
+ Write-LogHost ""
+ exit 0
+ }
+}
+elseif ($isMultiOutputScenario -and $Force) {
+ Write-LogHost "Force mode: Skipping multi-output warning ($activityTypeCount activity types, separate $outputType)" -ForegroundColor DarkGray
+ Write-LogHost " → Continuing with separate $outputType (use -CombineOutput to merge if desired)" -ForegroundColor DarkGray
+}
+# --- End Multi-Output Warning ---
+
+# --- DSPM for AI: Billing Information Warning ---
+if (($finalActivityTypes -contains 'AIAppInteraction') -or ($finalActivityTypes -contains 'ConnectedAIAppInteraction') -or ($finalActivityTypes -contains 'AIInteraction')) {
+ if (-not $Force) {
+ Write-LogHost ""
+ Write-LogHost "============================================================================================================" -ForegroundColor Yellow
+ Write-Host "INFORMATION: DSPM for AI Audit Logging - Billing Details" -ForegroundColor Cyan
+ Write-LogHost "============================================================================================================" -ForegroundColor Yellow
+ Write-LogHost ""
+ Write-LogHost "DSPM Activity Types:" -ForegroundColor Cyan
+ Write-LogHost " • AIInteraction - FREE (Microsoft platforms: Copilot Studio, Azure AI Studio)" -ForegroundColor Green
+ Write-LogHost " • ConnectedAIAppInteraction - MIXED (FREE for Microsoft apps, PAYG for third-party)" -ForegroundColor Yellow
+ if ($finalActivityTypes -contains 'AIAppInteraction') {
+ Write-LogHost " • AIAppInteraction - PAYG BILLING REQUIRED (third-party AI like ChatGPT)" -ForegroundColor DarkYellow
+ }
+ Write-LogHost ""
+ # Check if AIAppInteraction is included - offer options
+ if ($finalActivityTypes -contains 'AIAppInteraction') {
+ Write-LogHost "[!] IMPORTANT: AIAppInteraction REQUIRES Microsoft Purview PAYG billing" -ForegroundColor Yellow
+ Write-LogHost ""
+ Write-LogHost "PAYG Requirements:" -ForegroundColor Cyan
+ Write-LogHost " • Azure subscription linked to M365 tenant" -ForegroundColor Cyan
+ Write-LogHost " • Microsoft Purview PAYG billing enabled in Compliance portal" -ForegroundColor Cyan
+ Write-LogHost ""
+ Write-LogHost "Do you have PAYG billing configured in your tenant?" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost " [Y] YES - I have PAYG billing, continue with all DSPM types" -ForegroundColor Green
+ Write-LogHost " [N] NO - I don't have PAYG billing, remove AIAppInteraction and continue" -ForegroundColor Yellow
+ Write-LogHost " (Third-party AI records will NOT be included)" -ForegroundColor DarkGray
+ Write-LogHost " [E] EXIT - Cancel script execution" -ForegroundColor Red
+ Write-LogHost ""
+
+ Send-PromptNotification
+ $payg_choice = Read-Host "Enter your choice (Y/N/E)"
+
+ if ($payg_choice -eq 'Y' -or $payg_choice -eq 'y') {
+ Write-LogHost ""
+ Write-LogHost "Continuing with all DSPM types (AIInteraction, ConnectedAIAppInteraction, AIAppInteraction)..." -ForegroundColor Green
+ Write-LogHost ""
+ }
+ elseif ($payg_choice -eq 'N' -or $payg_choice -eq 'n') {
+ Write-LogHost ""
+ Write-LogHost "REMOVED: AIAppInteraction (third-party AI records will NOT be captured)" -ForegroundColor Yellow
+ Write-LogHost "Continuing with: AIInteraction, ConnectedAIAppInteraction (Microsoft platforms only)" -ForegroundColor Green
+ Write-LogHost ""
+ Write-LogHost "Note: Without PAYG billing, only Microsoft-hosted AI activity will be captured." -ForegroundColor Yellow
+ Write-LogHost " Third-party AI apps (ChatGPT, etc.) require PAYG billing." -ForegroundColor Yellow
+ Write-LogHost ""
+
+ # Set flag to remove AIAppInteraction during later rebuild
+ $script:RemoveAIAppInteraction = $true
+ $finalActivityTypes = $finalActivityTypes | Where-Object { $_ -ne 'AIAppInteraction' }
+ }
+ else {
+ Write-LogHost ""
+ Write-LogHost "User choice: EXIT - Script execution cancelled" -ForegroundColor Red
+ Write-LogHost ""
+ exit 0
+ }
+ }
+ else {
+ # No AIAppInteraction - simple Y/N prompt
+ Write-LogHost "Note: AIAppInteraction (PAYG-only third-party AI) is NOT included." -ForegroundColor DarkGray
+ Write-LogHost " Only Microsoft-hosted AI activity will be captured (AIInteraction, ConnectedAIAppInteraction)." -ForegroundColor DarkGray
+ Write-LogHost ""
+ Send-PromptNotification
+ $payg_choice = Read-Host "Continue with DSPM for AI export? (Y/N)"
+
+ if ($payg_choice -eq 'Y' -or $payg_choice -eq 'y') {
+ Write-LogHost ""
+ Write-LogHost "Continuing with DSPM for AI export..." -ForegroundColor Green
+ Write-LogHost ""
+ }
+ else {
+ Write-LogHost ""
+ Write-LogHost "User choice: ABORT - DSPM for AI export declined" -ForegroundColor Red
+ Write-LogHost "Script execution cancelled by user." -ForegroundColor Yellow
+ Write-LogHost ""
+ exit 0
+ }
+ }
+ }
+ else {
+ Write-LogHost "Force mode enabled: Skipping DSPM for AI billing information prompt" -ForegroundColor DarkGray
+ Write-LogHost "User choice: CONTINUE (Force mode - automatic acceptance)" -ForegroundColor Gray
+ }
+}
+# --- End PAYG Billing Warning ---
+
+# Output file/directory display based on export mode
+# Note: If activity type switches are used, detailed filenames will be shown after activity types are finalized
+# For OnlyUserInfo mode, only show Entra file output (skip Purview data file messaging)
+if ($OnlyUserInfo) {
+ $outputDir = if ($OutputPath) { $OutputPath } else { "C:\Temp\" }
+ if ($ExportWorkbook) {
+ $entraOutputFile = Join-Path $outputDir "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.xlsx"
+ Write-LogHost "Output File: $entraOutputFile (Entra users workbook)" -ForegroundColor White
+ } else {
+ $entraOutputFile = Join-Path $outputDir "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"
+ Write-LogHost "Output File: $entraOutputFile" -ForegroundColor White
+ }
+} elseif ($AppendFile) {
+ # AppendFile mode: Show exact filename being appended to
+ $fileType = if ($ExportWorkbook) {
+ if ($CombineOutput) { "single-tab workbook" } else { "multi-tab workbook" }
+ } else {
+ "CSV file"
+ }
+ Write-LogHost "Output File: $OutputFile ($fileType)" -ForegroundColor White
+ Write-LogHost " Mode: Appending to existing file" -ForegroundColor Cyan
+} elseif ($IncludeDSPMForAI -or $ExcludeCopilotInteraction) {
+ # Activity type switches present - defer detailed filename listing until after activity types are finalized
+ $outputDir = if ($ExportWorkbook) {
+ if ($OutputPath) { $OutputPath } else { "C:\Temp\" }
+ } else {
+ Split-Path $OutputFile -Parent
+ }
+ Write-LogHost "Output Directory: $outputDir\" -ForegroundColor White
+ Write-LogHost " (Detailed filenames will be shown after activity types are finalized)" -ForegroundColor Gray
+} elseif ($ExportWorkbook) {
+ # Excel mode: always one .xlsx file (combined tab or multiple tabs)
+ $outputDir = if ($OutputPath) { $OutputPath } else { "C:\Temp\" }
+ if ($CombineOutput) {
+ # New naming: Purview_Audit_CombinedUsageActivity[_EntraUsers]_timestamp.xlsx
+ $baseName = "Purview_Audit_CombinedUsageActivity"
+ if ($IncludeUserInfo -and -not $UseEOM) { $baseName += "_EntraUsers" }
+ Write-LogHost "Output File: ${outputDir}${baseName}_.xlsx (single-tab workbook)" -ForegroundColor White
+ } else {
+ Write-LogHost "Output File: ${outputDir}Purview_Audit_MultiTab_.xlsx (multi-tab workbook)" -ForegroundColor White
+ }
+} else {
+ # CSV mode: combined file or separate files per activity type
+ if ($CombineOutput) {
+ # Single combined CSV file
+ Write-LogHost "Output File: $OutputFile (combined - all activity types)" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraFile = (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv")
+ Write-LogHost " Entra Users File: $entraFile" -ForegroundColor Gray
+ }
+ } else {
+ # Separate CSV files per activity type
+ $outputDir = Split-Path $OutputFile -Parent
+ $timestamp = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile) -replace '.*_(\d{8}_\d{6}).*', '$1'
+ Write-LogHost "Output Directory: $outputDir\" -ForegroundColor White
+ Write-LogHost "Output Files: ${outputDir}\Purview_Audit__${timestamp}.csv" -ForegroundColor Gray
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraFile = "${outputDir}\EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"
+ Write-LogHost " Entra Users: $entraFile" -ForegroundColor Gray
+ }
+ }
+}
+
+Write-LogHost "Log File: $LogFile" -ForegroundColor White
+if (-not $RAWInputCSV) {
+ Write-LogHost "Authentication: $Auth" -ForegroundColor White
+}
+
+if ($AgentId -or $AgentsOnly -or $ExcludeAgents -or $PromptFilter -or $UserIds -or $GroupNames) {
+ Write-LogHost "Filters:" -ForegroundColor Yellow
+ if ($AgentsOnly) { Write-LogHost " AgentsOnly: Only records with AgentId present" -ForegroundColor Gray }
+ if ($AgentId) {
+ $agentDisplay = if ($AgentId.Count -eq 1) {
+ "Specific AgentId: $($AgentId[0])"
+ }
+ elseif ($AgentId.Count -le 3) {
+ "Specific AgentIds ($($AgentId.Count)): " + ($AgentId -join '; ')
+ }
+ else {
+ "Specific AgentIds ($($AgentId.Count) total):"
+ }
+ Write-LogHost " $agentDisplay" -ForegroundColor Gray
+ if ($AgentId.Count -gt 3) {
+ for ($i = 0; $i -lt [Math]::Min(3, $AgentId.Count); $i++) {
+ $displayId = if ($AgentId[$i].Length -gt 80) { $AgentId[$i].Substring(0, 77) + '...' } else { $AgentId[$i] }
+ Write-LogHost " [$($i+1)] $displayId" -ForegroundColor DarkGray
+ }
+ if ($AgentId.Count -gt 3) {
+ Write-LogHost " ... and $($AgentId.Count - 3) more" -ForegroundColor DarkGray
+ }
+ }
+ }
+ if ($ExcludeAgents) { Write-LogHost " ExcludeAgents: Only records without AgentId" -ForegroundColor Gray }
+ if ($PromptFilter) {
+ $promptLabel = switch ($PromptFilter) {
+ 'Prompt' { 'Only prompts (Message_isPrompt = True)' }
+ 'Response' { 'Only responses (Message_isPrompt = False)' }
+ 'Both' { 'Both prompts and responses (Message_isPrompt = True or False)' }
+ 'Null' { 'Only records with no Message_isPrompt values (Null/Empty)' }
+ }
+ Write-LogHost " PromptFilter: $promptLabel" -ForegroundColor Gray
+ }
+ if ($UserIds -or $GroupNames) {
+ if ($UserIds) {
+ if ($UserIds.Count -eq 1) { Write-LogHost " UserIds: 1 user" -ForegroundColor Gray } else { Write-LogHost " UserIds: $($UserIds.Count) users" -ForegroundColor Gray }
+ }
+ if ($GroupNames) {
+ if ($GroupNames.Count -eq 1) { Write-LogHost " GroupNames: 1 group" -ForegroundColor Gray } else { Write-LogHost " GroupNames: $($GroupNames.Count) groups" -ForegroundColor Gray }
+ }
+ }
+}
+
+Write-LogHost "=============================================" -ForegroundColor Cyan
+Write-LogHost ""
+
+# Now perform AppendFile validation if needed (after banner display)
+if ($AppendFile) {
+
+ $validation = Test-AppendFileCompatibility `
+ -FilePath $OutputFile `
+ -IsExcel $ExportWorkbook `
+ -ExplodeArrays:$ExplodeArrays `
+ -ExplodeDeep:$ExplodeDeep
+
+ if (-not $validation.Compatible) {
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host " ERROR: Explosion Parameter Mismatch - Cannot Append" -ForegroundColor Red
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host ""
+ Write-Host "The existing file was created with different explosion parameters than" -ForegroundColor Yellow
+ Write-Host "the current command. Appending would create incompatible data structures." -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host "Existing file: $OutputFile" -ForegroundColor White
+ Write-Host " Columns: $($validation.ExistingCount)" -ForegroundColor Gray
+ Write-Host " Mode: $($validation.ExistingMode.DisplayName)" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host "Current command:" -ForegroundColor White
+ Write-Host " Mode: $($validation.CurrentMode.DisplayName)" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host "Root Cause:" -ForegroundColor Cyan
+ Write-Host " Explosion parameters must match between original file and append operation." -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host "Resolution Options:" -ForegroundColor Cyan
+ Write-Host " 1. Match the original file's parameters:" -ForegroundColor White
+ Write-Host " Use: $($validation.ExistingMode.DisplayName)" -ForegroundColor Gray
+ Write-Host " 2. Create new output file instead:" -ForegroundColor White
+ Write-Host " Remove -AppendFile parameter" -ForegroundColor Gray
+
+ if ($ExportWorkbook) {
+ Write-Host ""
+ Write-Host "Note for Excel mode:" -ForegroundColor DarkGray
+ Write-Host " If parameters matched, mismatched columns would create timestamped" -ForegroundColor DarkGray
+ Write-Host " duplicate tabs instead of appending (no data loss)." -ForegroundColor DarkGray
+ }
+ else {
+ Write-Host ""
+ Write-Host "CRITICAL for CSV mode:" -ForegroundColor Yellow
+ Write-Host " CSV append with mismatched explosion parameters creates CORRUPTED files!" -ForegroundColor Yellow
+ Write-Host " This validation prevents data corruption by failing early." -ForegroundColor Yellow
+ }
+
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ exit 1
+ }
+
+ Write-LogHost " Explosion parameters compatible ($($validation.CurrentMode.DisplayName)) - safe to append" -ForegroundColor Green
+
+ Write-LogHost "AppendFile mode: Appending to existing file: $OutputFile" -ForegroundColor Cyan
+}
+
+Write-LogHost "=============================================" -ForegroundColor Cyan
+Write-LogHost ""
+if ($ExplodeDeep -and $ExplodeArrays) { Write-LogHost "Note: -ExplodeDeep takes precedence over -ExplodeArrays (arrays will still explode, plus deep flatten)." -ForegroundColor DarkYellow }
+if ($ForcedRawInputCsvExplosion -and -not $ExplodeDeep -and -not $ExplodeArrays.IsPresent) { Write-LogHost "RAWInputCSV provided -> forcing Purview array explosion (non-exploded mode disabled)." -ForegroundColor Yellow }
+if ($script:memoryFlushEnabled) {
+ $memSource = if ($MaxMemoryMB -eq -1) { "auto-detected" } else { "user-specified" }
+ Write-LogHost "Memory management: $($script:ResolvedMaxMemoryMB)MB limit ($memSource) - will flush to disk when exceeded" -ForegroundColor Cyan
+ Write-LogHost " Note: Not compatible with explosion modes (-ExplodeDeep/-ExplodeArrays) - those modes require in-memory processing." -ForegroundColor DarkGray
+} elseif ($script:ResolvedMaxMemoryMB -gt 0 -and ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)) {
+ Write-LogHost "Note: Memory limit ($($script:ResolvedMaxMemoryMB)MB) ignored because explosion mode is active" -ForegroundColor DarkYellow
+}
+
+if ($RAWInputCSV) {
+ # Build snapshot then optionally inject EntraUsersOutput immediately after OutputFile
+ $paramSnapshot = [ordered]@{
+ Mode = $scriptMode
+ RAWInputCSV = $RAWInputCSV
+ 'StartDate (inclusive)' = $StartDate
+ 'EndDate (exclusive)' = $EndDate
+ ActivityTypes = ($ActivityTypes -join ';')
+ ExcludeCopilotInteraction = $ExcludeCopilotInteraction.IsPresent
+ ExplodeArrays = $ForcedRawInputCsvExplosion
+ ExplodeDeep = $ExplodeDeep.IsPresent
+ UseEOM = $UseEOM.IsPresent
+ MaxMemoryMB = $(if ($script:ResolvedMaxMemoryMB -eq 0) { 'Off' } else { "$($script:ResolvedMaxMemoryMB)MB" + $(if ($MaxMemoryMB -eq -1) { ' (auto)' } else { '' }) })
+ MaxPartitions = $MaxPartitions
+ ResultSize = $ResultSize
+ PacingMs = $PacingMs
+ ExportWorkbook = $ExportWorkbook.IsPresent
+ CombineOutput = $CombineOutput.IsPresent
+ AppendFile = $(if ($AppendFile) { $AppendFile } else { '' })
+ Force = $Force.IsPresent
+ SkipDiagnostics = $SkipDiagnostics.IsPresent
+ AutoCompleteness = $AutoCompleteness.IsPresent
+ EmitMetricsJson = $EmitMetricsJson.IsPresent
+ MetricsPath = $(if ($MetricsPath) { $MetricsPath } else { '' })
+ StreamingSchemaSample = $StreamingSchemaSample
+ StreamingChunkSize = $StreamingChunkSize
+ OutputFile = $OutputFile
+ LogFile = $LogFile
+ PSVersion = $PSVersionTable.PSVersion.ToString()
+ PSEdition = $PSVersionTable.PSEdition
+ HostName = $Host.Name
+ HostVersion = $(try { $Host.Version.ToString() } catch { '' })
+ }
+ $copilotIncluded = $IncludeCopilotInteraction.IsPresent -or ($ActivityTypes -contains $copilotBaseActivityType)
+ $paramSnapshot['IncludeCopilotInteraction'] = $copilotIncluded
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraPath = if ($ExportWorkbook) { 'Workbook Tab: EntraUsers' } else { (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv") }
+ # Rebuild ordered snapshot with EntraUsersOutput immediately after OutputFile
+ $newSnap = [ordered]@{}
+ foreach ($k in $paramSnapshot.Keys) {
+ $newSnap[$k] = $paramSnapshot[$k]
+ if ($k -eq 'OutputFile') { $newSnap['EntraUsersOutput'] = $entraPath }
+ }
+ $paramSnapshot = $newSnap
+ }
+}
+else {
+ # Smart parameter snapshot: Show only applicable parameters for the chosen query mode
+ $paramSnapshot = [ordered]@{
+ 'StartDate (inclusive)' = $StartDate
+ 'EndDate (exclusive)' = $EndDate
+ OutputFile = $OutputFile
+ LogFile = $LogFile
+ }
+
+ # Authentication (both modes, but different usage)
+ if ($UseEOM) {
+ # EOM mode: Auth parameter controls connection
+ $paramSnapshot['Auth'] = $Auth
+ } else {
+ # Graph API mode: Auth parameter controls connection
+ $paramSnapshot['Auth'] = $Auth
+ # Capture AppRegistration context (without exposing secrets)
+ $paramSnapshot['TenantId'] = $(if ($TenantId) { $TenantId } elseif ($env:GRAPH_TENANT_ID) { '[GRAPH_TENANT_ID]' } else { '' })
+ $paramSnapshot['ClientId'] = $(if ($ClientId) { $ClientId } elseif ($env:GRAPH_CLIENT_ID) { '[GRAPH_CLIENT_ID]' } else { '' })
+ if ($PSBoundParameters.ContainsKey('ClientSecret') -or $env:GRAPH_CLIENT_SECRET) {
+ $paramSnapshot['ClientSecret'] = '[securestring provided]'
+ }
+ if ($PSBoundParameters.ContainsKey('ClientCertificateThumbprint')) {
+ $paramSnapshot['ClientCertificateThumbprint'] = $ClientCertificateThumbprint
+ }
+ if ($PSBoundParameters.ContainsKey('ClientCertificateStoreLocation')) {
+ $paramSnapshot['ClientCertificateStoreLocation'] = $ClientCertificateStoreLocation
+ }
+ if ($PSBoundParameters.ContainsKey('ClientCertificatePath')) {
+ $paramSnapshot['ClientCertificatePath'] = $ClientCertificatePath
+ }
+ if ($PSBoundParameters.ContainsKey('ClientCertificatePassword')) {
+ $paramSnapshot['ClientCertificatePassword'] = '[securestring provided]'
+ }
+ }
+
+ # Query parameters specific to each mode
+ if ($UseEOM) {
+ # EOM-specific: Search-UnifiedAuditLog parameters
+ $paramSnapshot['BlockHours'] = $BlockHours
+ $paramSnapshot['MaxPartitions'] = $MaxPartitions
+ $paramSnapshot['ResultSize'] = $ResultSize
+ $paramSnapshot['PacingMs'] = $PacingMs
+ } else {
+ # Graph API-specific: Parallel processing parameters
+ $paramSnapshot['MaxConcurrency'] = $MaxConcurrency
+ $paramSnapshot['ParallelMode'] = $ParallelMode
+ $paramSnapshot['MaxParallelGroups'] = $MaxParallelGroups
+ $paramSnapshot['IncludeUserInfo'] = $IncludeUserInfo.IsPresent
+ $paramSnapshot['OnlyUserInfo'] = $OnlyUserInfo.IsPresent
+ $paramSnapshot['MaxNetworkOutageMinutes'] = $MaxNetworkOutageMinutes
+ $paramSnapshot['PartitionHours'] = if ($PartitionHours -gt 0) { $PartitionHours } else { 'auto' }
+ $paramSnapshot['MaxPartitions'] = $MaxPartitions
+ $paramSnapshot['ResultSize'] = $ResultSize
+ $paramSnapshot['PacingMs'] = $PacingMs
+ $paramSnapshot['MaxMemoryMB'] = $(if ($script:ResolvedMaxMemoryMB -eq 0) { 'Off' } else { "$($script:ResolvedMaxMemoryMB)MB" + $(if ($MaxMemoryMB -eq -1) { ' (auto)' } else { '' }) })
+ }
+
+ # Common toggles and output options
+ $paramSnapshot['UseEOM'] = $UseEOM.IsPresent
+ $paramSnapshot['ExportWorkbook'] = $ExportWorkbook.IsPresent
+ $paramSnapshot['CombineOutput'] = $CombineOutput.IsPresent
+ $paramSnapshot['AppendFile'] = $(if ($AppendFile) { $AppendFile } else { '' })
+ $paramSnapshot['Force'] = $Force.IsPresent
+ $paramSnapshot['SkipDiagnostics'] = $SkipDiagnostics.IsPresent
+ $paramSnapshot['AutoCompleteness'] = $AutoCompleteness.IsPresent
+ $paramSnapshot['EmitMetricsJson'] = $EmitMetricsJson.IsPresent
+ $paramSnapshot['MetricsPath'] = $(if ($MetricsPath) { $MetricsPath } else { '' })
+ $paramSnapshot['StreamingSchemaSample'] = $StreamingSchemaSample
+ $paramSnapshot['StreamingChunkSize'] = $StreamingChunkSize
+
+ # Common parameters (work in both modes)
+ $paramSnapshot['ActivityTypes'] = ($ActivityTypes -join ';')
+ $paramSnapshot['RecordTypes'] = $(if ($RecordTypes) { ($RecordTypes -join ';') } else { '' })
+ $paramSnapshot['ServiceTypes'] = $(if ($ServiceTypes) { ($ServiceTypes -join ';') } else { '' })
+ $copilotIncluded = $IncludeCopilotInteraction.IsPresent -or ($ActivityTypes -contains $copilotBaseActivityType)
+ $paramSnapshot['IncludeCopilotInteraction'] = $copilotIncluded
+ $paramSnapshot['IncludeM365Usage'] = $IncludeM365Usage.IsPresent
+ $paramSnapshot['IncludeDSPMForAI'] = $IncludeDSPMForAI.IsPresent
+ $paramSnapshot['ExcludeCopilotInteraction'] = $ExcludeCopilotInteraction.IsPresent
+
+ # Post-processing filters (work in both modes - applied during/after explosion)
+ $paramSnapshot['AgentsOnly'] = $AgentsOnly.IsPresent
+ $paramSnapshot['AgentId'] = $(if ($AgentId) { ($AgentId -join ';') } else { '' })
+ $paramSnapshot['ExcludeAgents'] = $ExcludeAgents.IsPresent
+ $paramSnapshot['UserId'] = $(if ($UserIds) { ($UserIds -join ';') } else { '' })
+
+ # GroupNames only works in live mode (requires auth for expansion)
+ if (-not $UseEOM -and $GroupNames) {
+ $paramSnapshot['GroupName'] = ($GroupNames -join ';')
+ }
+
+ $paramSnapshot['PromptFilter'] = $(if ($PromptFilter) { $PromptFilter } else { '' })
+
+ # Output format parameters (work in both modes)
+ $paramSnapshot['ExplodeArrays'] = ($ExplodeArrays.IsPresent -or $ForcedRawInputCsvExplosion -or $ExplodeDeep.IsPresent)
+ $paramSnapshot['ExplodeDeep'] = $ExplodeDeep.IsPresent
+ $paramSnapshot['ExplosionThreads'] = $(if ($ExplosionThreads -eq 0) { 'auto' } else { $ExplosionThreads })
+
+ # Environment info
+ $paramSnapshot['PSVersion'] = $PSVersionTable.PSVersion.ToString()
+ $paramSnapshot['PSEdition'] = $PSVersionTable.PSEdition
+ $paramSnapshot['HostName'] = $Host.Name
+ $paramSnapshot['HostVersion'] = $(try { $Host.Version.ToString() } catch { '' })
+
+ # Entra users output reference (Graph mode only, inserted directly under OutputFile)
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraPath = if ($ExportWorkbook) { 'Workbook Tab: EntraUsers' } else { (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv") }
+ # Rebuild snapshot to inject after OutputFile
+ $newSnap = [ordered]@{}
+ foreach ($k in $paramSnapshot.Keys) {
+ $newSnap[$k] = $paramSnapshot[$k]
+ if ($k -eq 'OutputFile') { $newSnap['EntraUsersOutput'] = $entraPath }
+ }
+ $paramSnapshot = $newSnap
+ }
+}
+# Parameter Snapshot will be displayed after DSPM processing (see line ~4150)
+
+# Predeclare script-scope collections to satisfy StrictMode before first access
+if (-not (Get-Variable -Name DeepExtraColumns -Scope Script -ErrorAction SilentlyContinue)) { $script:DeepExtraColumns = $null }
+
+<#
+=====================================================================
+ Operational Logic
+=====================================================================
+#>
+
+function Find-AllArrays {
+ param(
+ $Data,
+ [string]$Path = '',
+ [int]$Depth = 0,
+ [hashtable]$Arrays
+ )
+ if ($null -eq $Data) { return @{} }
+ if (-not $Arrays) { $Arrays = @{} }
+ if ($Depth -gt 6) { return $Arrays }
+ if ($null -eq $Data) { return $Arrays }
+
+ $isArray = ($Data -is [System.Collections.IEnumerable] -and -not ($Data -is [string]) -and (($Data -is [System.Collections.IList]) -or $Data.GetType().IsArray))
+ if ($isArray) {
+ $key = if ($Path) { $Path } else { 'root' }
+ if (-not $Arrays.ContainsKey($key)) {
+ $Arrays[$key] = [pscustomobject]@{ Path = $Path; Data = $Data; Count = ($Data | Measure-Object).Count }
+ }
+ }
+
+ $props = $null
+ if ($Data -is [System.Management.Automation.PSObject]) { $props = $Data.PSObject.Properties }
+ elseif ($Data -is [System.Collections.IDictionary]) { $props = $Data.GetEnumerator() }
+
+ if ($props) {
+ foreach ($p in $props) {
+ $name = if ($p -is [System.Collections.DictionaryEntry]) { $p.Key } else { $p.Name }
+ $val = if ($p -is [System.Collections.DictionaryEntry]) { $p.Value } else { $p.Value }
+ $childPath = if ($Path) { "$Path.$name" } else { $name }
+ Find-AllArrays -Data $val -Path $childPath -Depth ($Depth + 1) -Arrays $Arrays | Out-Null
+ }
+ }
+ # Note: Do NOT recurse into array elements - arrays are treated as terminal values
+ # that will be converted to JSON strings for predictable column names
+ return $Arrays
+}
+
+function Test-ScalarValue { param($v) ($null -eq $v -or $v -is [string] -or $v -is [char] -or $v -is [bool] -or $v -is [int] -or $v -is [long] -or $v -is [double] -or $v -is [decimal] -or $v -is [float] -or $v -is [datetime] -or $v -is [guid]) }
+
+function Import-CsvToDataTable {
+ <#
+ .SYNOPSIS
+ Imports a CSV file directly into a System.Data.DataTable using fast .NET StreamReader.
+
+ .DESCRIPTION
+ This is 10-50x faster than Import-Csv | ConvertTo-DataTable for large files because it:
+ 1. Uses .NET StreamReader instead of PowerShell's Import-Csv
+ 2. Avoids creating intermediate PSObjects
+ 3. Parses CSV directly into DataTable rows
+
+ .PARAMETER Path
+ The path to the CSV file to import.
+
+ .OUTPUTS
+ System.Data.DataTable
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$Path
+ )
+
+ $dataTable = New-Object System.Data.DataTable
+ $reader = $null
+
+ try {
+ $reader = New-Object System.IO.StreamReader($Path, [System.Text.Encoding]::UTF8)
+ $lineNum = 0
+ $columns = @()
+
+ while ($null -ne ($line = $reader.ReadLine())) {
+ # Parse CSV line (handles quoted fields with commas)
+ $fields = [System.Collections.Generic.List[string]]::new()
+ $field = [System.Text.StringBuilder]::new()
+ $inQuotes = $false
+
+ for ($i = 0; $i -lt $line.Length; $i++) {
+ $c = $line[$i]
+ if ($c -eq '"') {
+ if ($inQuotes -and $i + 1 -lt $line.Length -and $line[$i + 1] -eq '"') {
+ [void]$field.Append('"')
+ $i++
+ } else {
+ $inQuotes = -not $inQuotes
+ }
+ } elseif ($c -eq ',' -and -not $inQuotes) {
+ [void]$fields.Add($field.ToString())
+ [void]$field.Clear()
+ } else {
+ [void]$field.Append($c)
+ }
+ }
+ [void]$fields.Add($field.ToString())
+
+ if ($lineNum -eq 0) {
+ # Header row - create columns
+ $columns = $fields.ToArray()
+ foreach ($col in $columns) {
+ [void]$dataTable.Columns.Add($col, [string])
+ }
+ } else {
+ # Data row
+ $row = $dataTable.NewRow()
+ for ($j = 0; $j -lt [Math]::Min($columns.Count, $fields.Count); $j++) {
+ $val = $fields[$j]
+ $row[$j] = if ([string]::IsNullOrEmpty($val)) { [DBNull]::Value } else { $val }
+ }
+ [void]$dataTable.Rows.Add($row)
+ }
+ $lineNum++
+ }
+ }
+ finally {
+ if ($reader) { $reader.Dispose() }
+ }
+
+ return ,$dataTable
+}
+
+function ConvertTo-DataTable {
+ <#
+ .SYNOPSIS
+ Converts an array of PSObjects to a System.Data.DataTable for high-performance Excel export.
+
+ .DESCRIPTION
+ Export-Excel with piped PSObjects processes cells one-by-one (~400 cells/sec), which is extremely
+ slow for large datasets. Send-SQLDataToExcel with DataTable uses bulk insert and is 100-1000x faster.
+ This function converts PSObject arrays to DataTable format for use with Send-SQLDataToExcel.
+
+ .PARAMETER InputObject
+ The array of PSObjects to convert to a DataTable.
+
+ .OUTPUTS
+ System.Data.DataTable
+ #>
+ param(
+ [Parameter(Mandatory = $true, ValueFromPipeline = $true)]
+ [object[]]$InputObject
+ )
+
+ begin {
+ $dataTable = New-Object System.Data.DataTable
+ $isFirstRow = $true
+ $columns = @()
+ }
+
+ process {
+ foreach ($obj in $InputObject) {
+ if ($isFirstRow) {
+ $columns = @($obj.PSObject.Properties.Name)
+ foreach ($colName in $columns) {
+ [void]$dataTable.Columns.Add($colName, [string])
+ }
+ $isFirstRow = $false
+ }
+
+ $row = $dataTable.NewRow()
+ foreach ($colName in $columns) {
+ $val = $obj.$colName
+ $row[$colName] = if ($null -eq $val) { [DBNull]::Value } else { [string]$val }
+ }
+ [void]$dataTable.Rows.Add($row)
+ }
+ }
+
+ end {
+ return ,$dataTable
+ }
+}
+
+function Export-DataTableToExcel {
+ <#
+ .SYNOPSIS
+ High-performance Excel export using DataTable bulk insert method.
+
+ .DESCRIPTION
+ Wrapper function that converts PSObjects to DataTable and exports using Send-SQLDataToExcel.
+ This is 100-1000x faster than piping to Export-Excel for large datasets.
+
+ .PARAMETER Data
+ The array of PSObjects to export.
+
+ .PARAMETER Path
+ The path to the Excel file.
+
+ .PARAMETER WorksheetName
+ The name of the worksheet/tab.
+ #>
+ param(
+ [Parameter(Mandatory = $true)]
+ [object[]]$Data,
+
+ [Parameter(Mandatory = $true)]
+ [string]$Path,
+
+ [Parameter(Mandatory = $true)]
+ [string]$WorksheetName
+ )
+
+ $dataTable = $Data | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $dataTable -Path $Path -WorkSheetName $WorksheetName -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+}
+
+function ConvertTo-UniqueString {
+ param([object]$items, [char]$Sep = ';')
+ if ($null -eq $items) { return $null }
+ $set = New-Object System.Collections.Generic.HashSet[string]
+ foreach ($v in $items) { if ($null -ne $v -and $v -ne '') { [void]$set.Add([string]$v) } }
+ ([string]::Join($Sep, $set))
+}
+
+function ConvertTo-FlatColumns {
+ param([object]$Node, [string]$Prefix = '', [int]$MaxDepth = 60)
+ $cols = @{}
+ function Recurse([object]$n, [string]$p, [int]$d) {
+ if ($d -gt $MaxDepth) { return }
+ if ($null -eq $n) { if ($p) { $cols[$p.TrimEnd('.')] = $null }; return }
+ if (Test-ScalarValue $n) { if ($p) { $cols[$p.TrimEnd('.')] = $n }; return }
+ if ($n -is [System.Collections.IEnumerable] -and -not ($n -is [string]) -and -not ($n -is [System.Collections.IDictionary])) {
+ # Smart array handling: single-element arrays recurse without index, multi-element become JSON
+ $arr = @($n)
+ if ($arr.Count -eq 1) {
+ # Single element: recurse into it without adding index to path (clean column names)
+ Recurse -n $arr[0] -p $p -d ($d + 1)
+ } elseif ($arr.Count -gt 1) {
+ # Multiple elements: serialize to JSON (row explosion handles important arrays separately)
+ if ($p) {
+ try { $cols[$p.TrimEnd('.')] = ($n | ConvertTo-Json -Depth 10 -Compress -ErrorAction SilentlyContinue) }
+ catch { $cols[$p.TrimEnd('.')] = '' }
+ }
+ } else {
+ # Empty array
+ if ($p) { $cols[$p.TrimEnd('.')] = '' }
+ }
+ return
+ }
+ $props = $null; try { $props = $n.PSObject.Properties } catch {}
+ if ($props) {
+ foreach ($prop in $props) { $name = [string]$prop.Name; $child = $prop.Value; $cp = if ($p) { $p + $name + '.' } else { $name + '.' }; Recurse -n $child -p $cp -d ($d + 1) }
+ }
+ }
+ Recurse -n $Node -p $Prefix -d 0
+ return $cols
+}
+
+function To-RecordArray {
+ param($records)
+ $result = @()
+ if ($null -eq $records) { return $result }
+ $isEnumerable = ($records -is [System.Collections.IEnumerable])
+ $isScalarish = ($records -is [string] -or $records -is [System.Management.Automation.PSObject] -or $records -is [System.Management.Automation.PSCustomObject])
+ if ($isEnumerable -and -not $isScalarish) {
+ foreach ($r in $records) { $result += ,$r }
+ }
+ else {
+ $result += ,$records
+ }
+ return $result
+}
+
+try {
+ $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
+ $schemaHelper = Join-Path $scriptDir "..\..\scripts\lib\M365UsageSchema.ps1"
+ if (-not (Test-Path $schemaHelper)) { $schemaHelper = Join-Path $scriptDir "M365UsageSchema.ps1" }
+ if (Test-Path $schemaHelper) { . $schemaHelper }
+} catch {}
+
+function Invoke-ReplayInlineExport {
+ param(
+ [Parameter(Mandatory)] [System.Collections.IEnumerable]$Logs
+ )
+ Write-LogHost "Replay inline export starting..." -ForegroundColor Magenta
+ $exportTemp = Join-Path ([System.IO.Path]::GetTempPath()) ("pax_export_" + [guid]::NewGuid().ToString() + ".tmp")
+ # Unified header: auto-detect all activity types from input CSV (no switch required)
+ $columnOrder = Get-UnifiedReplayHeader -RawCsvPath $RAWInputCSV
+ Open-CsvWriter -Path $exportTemp -Columns $columnOrder
+ $total = 0
+ $idx = 0
+ $errCount = 0
+ $errLimit = 25
+ foreach ($log in $Logs) {
+ $idx++
+ if ($idx % 5000 -eq 0) { Write-LogHost ("Replay inline progress: {0} records" -f $idx) -ForegroundColor DarkGray }
+ try {
+ $records = Convert-ToPurviewExplodedRecords -Record $log -Deep:$ExplodeDeep -PromptFilterValue $PromptFilter
+ $recordsArr = To-RecordArray $records
+ if ($recordsArr.Count -gt 0) {
+ $total += $recordsArr.Count
+ $emitSet = $recordsArr | ForEach-Object { $_ | Select-Object -Property $columnOrder }
+ $rowsOut = @($emitSet)
+ if ($rowsOut.Count -gt 0) { Write-CsvRows -Rows $rowsOut -Columns $columnOrder }
+ }
+ } catch {
+ $errCount++
+ }
+ }
+ try { Close-CsvWriter } catch {}
+ try { Move-Item -Force -Path $exportTemp -Destination $OutputFile } catch {}
+ try { $script:metrics.TotalStructuredRows = $total } catch {}
+ Write-LogHost ("Replay inline export complete: {0} rows" -f $total) -ForegroundColor Green
+
+ # Explosion summary for replay mode
+ Write-LogHost ""
+ Write-LogHost "=== REPLAY EXPLOSION SUMMARY ===" -ForegroundColor Cyan
+ Write-LogHost (" Input records: {0:N0}" -f $idx) -ForegroundColor White
+ Write-LogHost (" Output rows: {0:N0}" -f $total) -ForegroundColor White
+ if ($total -gt $idx) {
+ $explosionRatio = [Math]::Round($total / $idx, 2)
+ Write-LogHost (" Expansion: {0}x ({1:N0} additional rows from array explosion)" -f $explosionRatio, ($total - $idx)) -ForegroundColor Green
+ } elseif ($total -eq $idx) {
+ Write-LogHost " Expansion: 1:1 (no arrays exploded)" -ForegroundColor Yellow
+ } else {
+ Write-LogHost (" Reduction: {0:N0} records filtered out" -f ($idx - $total)) -ForegroundColor DarkYellow
+ }
+ if ($errCount -gt 0) {
+ Write-LogHost (" Errors: {0:N0} record(s) failed to process" -f $errCount) -ForegroundColor Red
+ }
+ Write-LogHost (" Output file: {0}" -f $OutputFile) -ForegroundColor Gray
+ Write-LogHost ""
+}
+
+function Get-SafeProperty { param($obj, [string]$name) try { if ($null -ne $obj -and $obj.PSObject.Properties[$name]) { return $obj.($name) } } catch {}; return $null }
+
+# --- Purview Exploded Schema ---
+$PurviewExplodedHeader = @(
+ 'RecordId', 'CreationDate', 'RecordType', 'Operation', 'UserId', 'OrganizationId', 'Workload', 'UserType', 'UserKey',
+ 'Version', 'Id', 'RecordTypeNum', 'ResultStatus_Audit', 'AppId', 'ClientAppId', 'CorrelationId',
+ 'ModelId', 'ModelProvider', 'ModelFamily', 'TokensTotal', 'TokensInput', 'TokensOutput',
+ 'DurationMs', 'OutcomeStatus', 'ConversationId', 'TurnNumber', 'RetryCount', 'ClientVersion', 'ClientPlatform',
+ 'AssociatedAdminUnits', 'AssociatedAdminUnitsNames',
+ 'AgentId', 'AgentName', 'AgentVersion', 'AgentCategory',
+ 'AppIdentity', 'AppIdentity_DisplayName', 'AppIdentity_PublisherId', 'ApplicationName',
+ 'CreationTime', 'ClientRegion', 'ClientIP', 'AppHost', 'ThreadId', 'Context_Id', 'Context_Type', 'Message_Id',
+ 'Message_isPrompt', 'AccessedResource_Action', 'AccessedResource_PolicyDetails', 'AccessedResource_SiteUrl',
+ 'AISystemPlugin_Id', 'AISystemPlugin_Name', 'ModelTransparencyDetails_ModelName', 'MessageIds',
+ 'CopilotLogVersion',
+ # DSPM for AI: Additional columns for enhanced DSPM activity types
+ 'AccessedResource_Name', 'AccessedResource_SensitivityLabel', 'AccessedResource_ResourceType',
+ 'SensitivityLabel', 'Context_Item'
+)
+
+# --- M365 Usage Base Header ---
+$M365UsageBaseHeader = @(
+ 'RecordId','CreationDate','RecordType','Operation','UserId','AuditData','AssociatedAdminUnits','AssociatedAdminUnitsNames','CreationTime','Id','OrganizationId','ResultStatus','UserKey','UserType','Version','Workload','ClientIP','ObjectId','AzureActiveDirectoryEventType','ExtendedProperties','ExtendedProperties.ResultStatusDetail','ExtendedProperties.Name','ExtendedProperties.Value','ExtendedProperties.UserAgent','ExtendedProperties.RequestType','ModifiedProperties','Actor','Actor.ID','Actor.Type','ActorContextId','ActorIpAddress','InterSystemsId','IntraSystemId','SupportTicketId','Target','Target.ID','Target.Type','TargetContextId','ApplicationId','DeviceProperties','DeviceProperties.OS','DeviceProperties.Name','DeviceProperties.Value','DeviceProperties.BrowserType','DeviceProperties.SessionId','ErrorNumber','ExtendedProperties.KeepMeSignedIn','DeviceProperties.Id','DeviceProperties.DisplayName','DeviceProperties.TrustType','ExtendedProperties.UserAuthenticationMethod','DeviceProperties.IsCompliant','DeviceProperties.IsCompliantAndManaged',
+ # SharePoint / OneDrive
+ 'SiteUrl','SourceRelativeUrl','SourceFileName','SourceFileExtension','ListId','ListItemUniqueId','WebId','ApplicationDisplayName','EventSource','ItemType','SiteSensitivityLabelId','GeoLocation','IsManagedDevice','DeviceDisplayName','ListBaseType','ListServerTemplate','AuthenticationType','Site','DoNotDistributeEvent','HighPriorityMediaProcessing',
+ # App Access Context
+ 'AppAccessContext.ClientAppId','AppAccessContext.ClientAppName','AppAccessContext.CorrelationId','AppAccessContext.AADSessionId','AppAccessContext.UniqueTokenId','AppAccessContext.AuthTime','AppAccessContext.TokenIssuedAtTime','AppAccessContext.UserObjectId','AppAccessContext.DeviceId'
+)
+
+# --- Unified Replay Header (auto-detects all activity types) ---
+# Scans input CSV to detect columns from any record type, merges with PurviewExplodedHeader for Copilot
+# Skips CopilotEventData.* paths since explosion produces flat column names
+function Get-UnifiedReplayHeader {
+ param(
+ [Parameter(Mandatory)][string]$RawCsvPath,
+ [int]$Sample = 500
+ )
+ # Base columns common to all activity types
+ $base = @('RecordId','CreationDate','RecordType','Operation','UserId','AuditData','AssociatedAdminUnits','AssociatedAdminUnitsNames','CreationTime','Id','OrganizationId','ResultStatus','UserKey','UserType','Version','Workload','ClientIP','ObjectId','AzureActiveDirectoryEventType','ExtendedProperties','ExtendedProperties.ResultStatusDetail','ExtendedProperties.Name','ExtendedProperties.Value','ExtendedProperties.UserAgent','ExtendedProperties.RequestType','ModifiedProperties','Actor','Actor.ID','Actor.Type','ActorContextId','ActorIpAddress','InterSystemsId','IntraSystemId','SupportTicketId','Target','Target.ID','Target.Type','TargetContextId','ApplicationId','DeviceProperties','DeviceProperties.OS','DeviceProperties.Name','DeviceProperties.Value','DeviceProperties.BrowserType','DeviceProperties.SessionId','ErrorNumber','ExtendedProperties.KeepMeSignedIn','DeviceProperties.Id','DeviceProperties.DisplayName','DeviceProperties.TrustType','ExtendedProperties.UserAuthenticationMethod','DeviceProperties.IsCompliant','DeviceProperties.IsCompliantAndManaged')
+ $aug = @(
+ 'SiteUrl','SourceRelativeUrl','SourceFileName','SourceFileExtension','ListId','ListItemUniqueId','WebId','ApplicationDisplayName','EventSource','ItemType','SiteSensitivityLabelId','GeoLocation','IsManagedDevice','DeviceDisplayName','ListBaseType','ListServerTemplate','AuthenticationType','Site','DoNotDistributeEvent','HighPriorityMediaProcessing',
+ 'AppAccessContext.ClientAppId','AppAccessContext.ClientAppName','AppAccessContext.CorrelationId','AppAccessContext.AADSessionId','AppAccessContext.UniqueTokenId','AppAccessContext.AuthTime','AppAccessContext.TokenIssuedAtTime','AppAccessContext.UserObjectId','AppAccessContext.DeviceId','AppAccessContext.@odata.type','AppAccessContext.APIId','AppAccessContext.IssuedAtTime'
+ )
+ $detected = New-Object System.Collections.Generic.List[string]
+ $hasCopilot = $false
+
+ # Recursively detect column paths from JSON, skipping CopilotEventData (handled by explosion with flat names)
+ function Add-Paths([object]$node, [string]$prefix, [System.Collections.Generic.List[string]]$collector) {
+ if ($null -eq $node) { return }
+ if (Test-ScalarValue $node) { if ($prefix) { $collector.Add($prefix) | Out-Null }; return }
+ if ($node -is [System.Collections.IEnumerable] -and $node -isnot [string]) {
+ foreach ($item in $node) { Add-Paths $item $prefix $collector }
+ return
+ }
+ if ($node.PSObject -and $node.PSObject.Properties) {
+ foreach ($prop in $node.PSObject.Properties) {
+ $pn = $prop.Name; $pv = $prop.Value
+ $path = if ($prefix) { "$prefix.$pn" } else { $pn }
+ # SKIP CopilotEventData - explosion handles these with flat column names
+ if ($pn -eq 'CopilotEventData') { continue }
+ # Special handling for Name/Value arrays (pivot into columns)
+ if ($pn -eq 'ExtendedProperties' -and $pv -is [System.Collections.IEnumerable]) {
+ foreach ($item in $pv) { try { if ($item.Name) { $collector.Add("ExtendedProperties.$($item.Name)") | Out-Null } } catch {} }
+ continue
+ }
+ if ($pn -eq 'DeviceProperties' -and $pv -is [System.Collections.IEnumerable]) {
+ foreach ($item in $pv) { try { if ($item.Name) { $collector.Add("DeviceProperties.$($item.Name)") | Out-Null } } catch {} }
+ continue
+ }
+ Add-Paths $pv $path $collector
+ }
+ }
+ }
+ if ($RawCsvPath -and (Test-Path $RawCsvPath)) {
+ try {
+ $rows = Import-Csv $RawCsvPath | Select-Object -First $Sample
+ foreach ($r in $rows) {
+ try {
+ $audit = $r.AuditData | ConvertFrom-Json -ErrorAction Stop
+ if ($audit) {
+ # Detect if any Copilot records exist
+ if ($audit.CopilotEventData) { $hasCopilot = $true }
+ Add-Paths $audit '' $detected
+ }
+ } catch {}
+ }
+ } catch {}
+ }
+ # Build unified header: base + augmented + detected (non-Copilot) + PurviewExplodedHeader (flat Copilot columns)
+ $header = New-Object System.Collections.Generic.List[string]
+ foreach ($c in $base) { if (-not $header.Contains($c)) { $header.Add($c) } }
+ foreach ($c in $aug) { if (-not $header.Contains($c)) { $header.Add($c) } }
+ foreach ($c in $detected) { if (-not $header.Contains($c)) { $header.Add($c) } }
+ # Always include flat Copilot columns from PurviewExplodedHeader (supports all activity types)
+ foreach ($c in $PurviewExplodedHeader) { if (-not $header.Contains($c)) { $header.Add($c) } }
+ try {
+ if ($RawCsvPath) {
+ $hdrPath = Join-Path (Split-Path $RawCsvPath -Parent) 'UnifiedReplayHeader.txt'
+ $header | Set-Content -Path $hdrPath -Encoding utf8
+ }
+ } catch {}
+ return $header
+}
+
+# --- Legacy M365 Usage Wide Header (kept for backward compatibility) ---
+function Get-M365UsageWideHeader {
+ param(
+ [string]$RawCsvPath,
+ [int]$Sample = 500
+ )
+ # Delegate to unified header function
+ return Get-UnifiedReplayHeader -RawCsvPath $RawCsvPath -Sample $Sample
+}
+
+# --- Entra Users Schema (47 columns) ---
+# 30 core + 5 manager + 2 license columns + 10 Power BI template compatibility columns
+$EntraUsersHeader = @(
+ 'userPrincipalName','DisplayName','id','Email','givenName','surname','JobTitle','department','employeeType','employeeId','employeeHireDate',
+ 'officeLocation','city','state','Country','postalCode','companyName','employeeOrgData_division','employeeOrgData_costCenter',
+ 'accountEnabled','userType','createdDateTime','usageLocation','preferredLanguage','onPremisesSyncEnabled','onPremisesImmutableId','externalUserState',
+ 'proxyAddresses_Primary','proxyAddresses_Count','proxyAddresses_All',
+ 'manager_id','manager_displayName','manager_userPrincipalName','manager_mail','manager_jobTitle',
+ 'assignedLicenses','HasLicense',
+ # Power BI template compatibility columns (alias mappings)
+ 'ManagerID','BusinessAreaLabel','CountryofEmployment','CompanyCodeLabel','CostCentreLabel','UserName',
+ # Power BI template compatibility columns (null placeholders for Viva Insights fields)
+ 'EffectiveDate','FunctionType','BusinessAreaCode','OrgLevel_3Label'
+)
+
+function Test-EntraUsersSchema {
+ param(
+ [Parameter(Mandatory=$true)][array]$Users,
+ [switch]$Quiet
+ )
+ if (-not $Users -or $Users.Count -eq 0) { return }
+ $expected = $EntraUsersHeader
+ $actual = $Users[0].PSObject.Properties.Name
+ $missing = @(); foreach ($c in $expected) { if ($c -notin $actual) { $missing += $c } }
+ $extra = @(); foreach ($c in $actual) { if ($c -notin $expected) { $extra += $c } }
+ if ($missing.Count -gt 0 -or $extra.Count -gt 0) {
+ Write-LogHost ("WARNING: EntraUsers schema mismatch. Missing: {0}; Extra: {1}" -f ($missing -join ', '), ($extra -join ', ')) -ForegroundColor Yellow
+ } elseif (-not $Quiet) {
+ Write-LogHost "Validated EntraUsers schema ($($expected.Count) columns)." -ForegroundColor DarkGray
+ }
+}
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# FAST ROW CREATION HELPER
+# Converts a hashtable to PSCustomObject in a single operation (avoids Add-Member overhead)
+# Used by explosion logic to build rows efficiently via hashtable accumulation
+# ═══════════════════════════════════════════════════════════════════════════════
+function New-FastRow {
+ <#
+ .SYNOPSIS
+ Creates a PSCustomObject from a hashtable in a single operation.
+ .DESCRIPTION
+ Builds a row by accumulating properties in a hashtable first, then converting
+ to PSCustomObject once. This is significantly faster than repeated Add-Member calls.
+ .PARAMETER Properties
+ A hashtable containing property names and values for the new object.
+ .EXAMPLE
+ $props = @{ Name = 'Test'; Value = 123 }
+ $row = New-FastRow -Properties $props
+ #>
+ [CmdletBinding()]
+ param([Parameter(Mandatory)][hashtable]$Properties)
+ return [PSCustomObject]$Properties
+}
+
+$existingDeep = Get-Variable -Name DeepExtraColumns -Scope Script -ErrorAction SilentlyContinue
+if (-not $existingDeep -or -not $script:DeepExtraColumns) { $script:DeepExtraColumns = New-Object System.Collections.Generic.List[string] }
+
+function Convert-ToPurviewExplodedRecords {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)] $Record,
+ [switch]$Deep,
+ [switch]$PartialExplode, # NEW: Prompt-specific explosion only (preserves AuditData)
+ [string]$PromptFilterValue,
+ [switch]$SkipMetrics # Used by parallel replay to defer metrics aggregation to parent thread
+ )
+ try {
+ $auditData = if ($Record.PSObject.Properties['_ParsedAuditData']) { $Record._ParsedAuditData } else { try { $Record.AuditData | ConvertFrom-Json -ErrorAction Stop } catch { $null } }
+ if (-not $auditData) {
+ if (-not $SkipMetrics) {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringMissingAuditData++
+ }
+ return @()
+ }
+ try { Profile-AuditData $auditData } catch {}
+ # Ensure helper is available when re-entering session (PS alias scoping)
+ if (-not (Get-Command Find-AllArrays -ErrorAction SilentlyContinue)) { Set-Alias -Name Find-AllArrays -Value Find-AllArrays -ErrorAction SilentlyContinue | Out-Null }
+
+ $ced = Get-SafeProperty $auditData 'CopilotEventData'
+ if (-not $ced) {
+ # Generic M365 (non-Copilot) multi-row explosion
+ # For non-Copilot M365 usage, get a single compact base row (raw AuditData retained)
+ $baseRows = Convert-ToStructuredRecord -Record $Record -EnableExplosion:$false
+ if (-not $baseRows -or $baseRows.Count -eq 0) {
+ if (-not $SkipMetrics) {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringMissingAuditData++
+ }
+ return @()
+ }
+ $baseRow = $baseRows[0]
+ # Drop raw JSON props from exploded output
+ foreach ($rawProp in @('AuditData','OriginalAuditData','CopilotEventData')) { if ($baseRow.PSObject.Properties[$rawProp]) { $baseRow.PSObject.Members.Remove($rawProp) } }
+ # Enrich base row with scalar fields from AuditData (no raw JSON)
+ $applicationId = $null
+ try { $applicationId = Select-FirstNonNull -Values @((Get-SafeProperty $auditData 'ApplicationId'), (Get-SafeProperty $auditData 'AppId'), (Get-SafeProperty $auditData 'ClientAppId')) } catch {}
+ $enrichMap = @{
+ 'Id' = { try { Get-SafeProperty $auditData 'Id' } catch { $null } }
+ 'CreationTime' = { try { $ct = Get-SafeProperty $auditData 'CreationTime'; if ($ct) { $parsed = script:Parse-DateSafe $ct; if ($parsed) { $parsed.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { $null } } else { $null } } catch { $null } }
+ 'OrganizationId' = { try { Get-SafeProperty $auditData 'OrganizationId' } catch { $null } }
+ 'ResultStatus' = { try { Get-SafeProperty $auditData 'ResultStatus' } catch { $null } }
+ 'UserKey' = { try { Get-SafeProperty $auditData 'UserKey' } catch { $null } }
+ 'UserType' = { try { Get-SafeProperty $auditData 'UserType' } catch { $null } }
+ 'Version' = { try { Get-SafeProperty $auditData 'Version' } catch { $null } }
+ 'Workload' = { try { Get-SafeProperty $auditData 'Workload' } catch { $null } }
+ 'UserId' = { try { Get-SafeProperty $auditData 'UserId' } catch { $null } }
+ 'ClientIP' = { try { Get-SafeProperty $auditData 'ClientIP' } catch { $null } }
+ 'ObjectId' = { try { Get-SafeProperty $auditData 'ObjectId' } catch { $null } }
+ 'AzureActiveDirectoryEventType'= { try { Get-SafeProperty $auditData 'AzureActiveDirectoryEventType' } catch { $null } }
+ 'ActorContextId' = { try { Get-SafeProperty $auditData 'ActorContextId' } catch { $null } }
+ 'ActorIpAddress' = { try { Get-SafeProperty $auditData 'ActorIpAddress' } catch { $null } }
+ 'InterSystemsId' = { try { Get-SafeProperty $auditData 'InterSystemsId' } catch { $null } }
+ 'IntraSystemId' = { try { Get-SafeProperty $auditData 'IntraSystemId' } catch { $null } }
+ 'SupportTicketId' = { try { Get-SafeProperty $auditData 'SupportTicketId' } catch { $null } }
+ 'TargetContextId' = { try { Get-SafeProperty $auditData 'TargetContextId' } catch { $null } }
+ 'ApplicationId' = { $applicationId }
+ 'ErrorNumber' = { try { Get-SafeProperty $auditData 'ErrorNumber' } catch { $null } }
+ 'SiteUrl' = { try { Get-SafeProperty $auditData 'SiteUrl' } catch { $null } }
+ 'SourceRelativeUrl' = { try { Get-SafeProperty $auditData 'SourceRelativeUrl' } catch { $null } }
+ 'SourceFileName' = { try { Get-SafeProperty $auditData 'SourceFileName' } catch { $null } }
+ 'SourceFileExtension' = { try { Get-SafeProperty $auditData 'SourceFileExtension' } catch { $null } }
+ 'ListId' = { try { Get-SafeProperty $auditData 'ListId' } catch { $null } }
+ 'ListItemUniqueId' = { try { Get-SafeProperty $auditData 'ListItemUniqueId' } catch { $null } }
+ 'WebId' = { try { Get-SafeProperty $auditData 'WebId' } catch { $null } }
+ 'ApplicationDisplayName' = { try { Get-SafeProperty $auditData 'ApplicationDisplayName' } catch { $null } }
+ 'EventSource' = { try { Get-SafeProperty $auditData 'EventSource' } catch { $null } }
+ 'ItemType' = { try { Get-SafeProperty $auditData 'ItemType' } catch { $null } }
+ 'SiteSensitivityLabelId' = { try { Get-SafeProperty $auditData 'SiteSensitivityLabelId' } catch { $null } }
+ 'GeoLocation' = { try { Get-SafeProperty $auditData 'GeoLocation' } catch { $null } }
+ 'IsManagedDevice' = { try { Get-SafeProperty $auditData 'IsManagedDevice' } catch { $null } }
+ 'DeviceDisplayName' = { try { Get-SafeProperty $auditData 'DeviceDisplayName' } catch { $null } }
+ 'ListBaseType' = { try { Get-SafeProperty $auditData 'ListBaseType' } catch { $null } }
+ 'ListServerTemplate' = { try { Get-SafeProperty $auditData 'ListServerTemplate' } catch { $null } }
+ 'AppAccessContext' = { try { Get-SafeProperty $auditData 'AppAccessContext' } catch { $null } }
+ 'AuthenticationType' = { try { Get-SafeProperty $auditData 'AuthenticationType' } catch { $null } }
+ 'Site' = { try { Get-SafeProperty $auditData 'Site' } catch { $null } }
+ 'DoNotDistributeEvent' = { try { Get-SafeProperty $auditData 'DoNotDistributeEvent' } catch { $null } }
+ 'HighPriorityMediaProcessing' = { try { Get-SafeProperty $auditData 'HighPriorityMediaProcessing' } catch { $null } }
+ }
+ foreach ($kv in $enrichMap.GetEnumerator()) {
+ $k = $kv.Key; $getter = $kv.Value
+ try { $val = & $getter } catch { $val = $null }
+ if ($null -ne $val -and -not $baseRow.PSObject.Properties[$k]) { Add-Member -InputObject $baseRow -NotePropertyName $k -NotePropertyValue $val -Force }
+ }
+ # Add any other scalar root properties from AuditData (excluding known arrays/objects)
+ try {
+ foreach ($p in $auditData.PSObject.Properties) {
+ $pn = $p.Name; if ($pn -in @('ExtendedProperties','DeviceProperties','ModifiedProperties','Actor','Target','CopilotEventData')) { continue }
+ $pv = $p.Value
+ try { if (Test-ScalarValue $pv) { if (-not $baseRow.PSObject.Properties[$pn]) { Add-Member -InputObject $baseRow -NotePropertyName $pn -NotePropertyValue $pv -Force } } } catch {}
+ }
+ } catch {}
+
+ # Flatten AppAccessContext into columns
+ try {
+ $aac = Get-SafeProperty $auditData 'AppAccessContext'
+ if ($aac -and -not (Test-ScalarValue $aac)) {
+ $flatAac = ConvertTo-FlatColumns -Node $aac -Prefix 'AppAccessContext.' -MaxDepth $FlatDepthStandard
+ foreach ($k in $flatAac.Keys) {
+ if (-not $baseRow.PSObject.Properties[$k]) { Add-Member -InputObject $baseRow -NotePropertyName $k -NotePropertyValue $flatAac[$k] -Force }
+ }
+ if ($baseRow.PSObject.Properties['AppAccessContext']) { $baseRow.PSObject.Members.Remove('AppAccessContext') }
+ }
+ elseif ($aac -and (Test-ScalarValue $aac)) {
+ if (-not $baseRow.PSObject.Properties['AppAccessContext']) { Add-Member -InputObject $baseRow -NotePropertyName 'AppAccessContext' -NotePropertyValue $aac -Force }
+ }
+ } catch {}
+ # Detect arrays up to standard depth (6)
+ $arrays = Find-AllArrays -Data $auditData -Depth 0 -Arrays @{}
+ $arrayInfos = @()
+ if ($arrays) { $arrayInfos = $arrays.Values }
+
+ # Pivot Name/Value arrays (ExtendedProperties, DeviceProperties) into columns
+ $nvArrays = @('ExtendedProperties','DeviceProperties')
+ $nvPivot = @{}
+ foreach ($nvPath in $nvArrays) {
+ try {
+ $nv = (Get-SafeProperty $auditData $nvPath)
+ if ($nv -and ($nv -is [System.Collections.IEnumerable])) {
+ foreach ($item in $nv) {
+ try {
+ $n = $item.Name; $v = $item.Value
+ if (-not $n) { continue }
+ $key = "$nvPath.$n"
+ if (-not $nvPivot.ContainsKey($key)) { $nvPivot[$key] = New-Object System.Collections.Generic.List[object] }
+ $nvPivot[$key].Add($v) | Out-Null
+ } catch {}
+ }
+ }
+ } catch {}
+ }
+ if ($arrayInfos.Count -gt 0) {
+ $arrayInfos = $arrayInfos | Where-Object { $_.Path -notin $nvArrays }
+ }
+ $rowCount = if ($arrayInfos.Count -gt 0) { ($arrayInfos | ForEach-Object { $_.Count } | Measure-Object -Maximum).Maximum } else { 1 }
+ if ($rowCount -gt $ExplosionPerRecordRowCap) {
+ $rowCount = $ExplosionPerRecordRowCap
+ try { $script:metrics.ExplosionTruncated = $true } catch {}
+ }
+ # HashSet to avoid duplicate columns when flattening
+ $baseSet = New-Object System.Collections.Generic.HashSet[string]
+ foreach ($p in $baseRow.PSObject.Properties) { $null = $baseSet.Add($p.Name) }
+
+ $rows = New-Object System.Collections.Generic.List[object]
+ for ($i = 0; $i -lt $rowCount; $i++) {
+ # Build row as hashtable first (fast), then convert to PSCustomObject once
+ $rowHash = [ordered]@{}
+ foreach ($p in $baseRow.PSObject.Properties) { $rowHash[$p.Name] = $p.Value }
+ # Add pivoted Name/Value arrays into the row
+ if ($nvPivot.Keys.Count -gt 0) {
+ foreach ($pk in $nvPivot.Keys) {
+ if ($rowHash.Contains($pk)) { continue }
+ $val = $nvPivot[$pk]
+ if ($val -isnot [string] -and $val -is [System.Collections.IEnumerable]) {
+ $val = ($val | ForEach-Object { $_.ToString() }) -join ';'
+ }
+ $rowHash[$pk] = $val
+ }
+ }
+ foreach ($info in $arrayInfos) {
+ $path = $info.Path
+ $dataArr = $info.Data
+ if ($i -lt $dataArr.Count) {
+ $el = $dataArr[$i]
+ if ($null -eq $el) { continue }
+ if (Test-ScalarValue $el) {
+ if (-not $rowHash.Contains($path)) { $rowHash[$path] = $el }
+ }
+ else {
+ $flatEl = ConvertTo-FlatColumns -Node $el -Prefix "$path." -MaxDepth $FlatDepthStandard
+ foreach ($k in $flatEl.Keys) { if (-not $rowHash.Contains($k)) { $rowHash[$k] = $flatEl[$k] } }
+ }
+ }
+ }
+ # Convert hashtable to PSCustomObject in single operation (much faster than Add-Member loop)
+ $rows.Add([PSCustomObject]$rowHash) | Out-Null
+ }
+ if ($Deep) {
+ # Deep flatten entire AuditData for each row (no raw JSON)
+ for ($ri = 0; $ri -lt $rows.Count; $ri++) {
+ $r = $rows[$ri]
+ $flatAudit = ConvertTo-FlatColumns -Node $auditData -Prefix '' -MaxDepth $FlatDepthDeep
+ foreach ($k in $flatAudit.Keys) { if (-not $r.PSObject.Properties[$k]) { Add-Member -InputObject $r -NotePropertyName $k -NotePropertyValue $flatAudit[$k] -Force } }
+ }
+ }
+ if (-not $SkipMetrics -and $rows.Count -gt 1) { try { $script:metrics.ExplosionEvents += 1; $script:metrics.ExplosionRowsFromEvents += ($rows.Count - 1); if ($rows.Count -gt $script:metrics.ExplosionMaxPerRecord) { $script:metrics.ExplosionMaxPerRecord = $rows.Count } } catch {} }
+ return $rows
+ }
+ $messages = script:GetArrayFast $ced 'Messages'
+ if ($PromptFilterValue) {
+ $filteredMessages = New-Object System.Collections.Generic.List[object]
+ if ($PromptFilterValue -eq 'Null') {
+ foreach ($msg in $messages) { if ($null -eq $msg.isPrompt) { $filteredMessages.Add($msg) } }
+ }
+ elseif ($PromptFilterValue -eq 'Both') {
+ foreach ($msg in $messages) { if ($null -ne $msg.isPrompt) { $filteredMessages.Add($msg) } }
+ }
+ else {
+ $targetValue = ($PromptFilterValue -eq 'Prompt')
+ foreach ($msg in $messages) { try { if ($msg.isPrompt -eq $targetValue) { $filteredMessages.Add($msg) } } catch {} }
+ }
+ $messages = $filteredMessages
+ if ($messages.Count -eq 0) {
+ if (-not $SkipMetrics) {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringPromptFiltered++
+ }
+ return @()
+ }
+ }
+ $contexts = script:GetArrayFast $ced 'Contexts'
+ $resources = script:GetArrayFast $ced 'AccessedResources'
+ $pluginsRaw = script:GetArrayFast $ced 'AISystemPlugin'
+ $modelDetRaw = script:GetArrayFast $ced 'ModelTransparencyDetails'
+ $messageIds = script:GetArrayFast $ced 'MessageIds'
+
+ # DSPM for AI: Extract SensitivityLabels array
+ $sensitivityLabels = script:GetArrayFast $ced 'SensitivityLabels'
+
+ # DSPM for AI: Determine activity type for conditional 2-level explosion
+ $activityType = try { $auditData.Operation } catch { $null }
+
+ # DSPM for AI: Extract 2nd-level arrays (for full explosion mode)
+ $plugins = $null
+ $recordingSessions = $null
+ $contextItems = $null
+
+ if (-not $PartialExplode) {
+ # Full explosion mode: Extract 2nd-level arrays for row count calculation
+ if ($activityType -eq 'ConnectedAIAppInteraction' -and $appIdentityRaw) {
+ $plugins = script:GetArrayFast $appIdentityRaw 'Plugins'
+ }
+ if ($activityType -eq 'CopilotInteraction' -and $contexts.Count -gt 0) {
+ # Find max Items[] count across all Contexts
+ $maxItemsCount = 0
+ foreach ($ctx in $contexts) {
+ if ($ctx) {
+ $items = script:GetArrayFast $ctx 'Items'
+ if ($items -and $items.Count -gt $maxItemsCount) {
+ $maxItemsCount = $items.Count
+ }
+ }
+ }
+ if ($maxItemsCount -gt 0) {
+ $contextItems = $maxItemsCount # Store count for row calculation
+ }
+ }
+ }
+
+ if ($PromptFilterValue) { $rowCount = [Math]::Max(1, $messages.Count) } else {
+ # DSPM for AI: Include all arrays in row count calculation (including AISystemPlugin and ModelTransparencyDetails)
+ $arrayCounts = @(1, $messages.Count, $contexts.Count, $resources.Count, $sensitivityLabels.Count, $pluginsRaw.Count, $modelDetRaw.Count)
+
+ # Full explosion: include 2nd-level arrays in row count
+ if (-not $PartialExplode) {
+ if ($plugins) { $arrayCounts += $plugins.Count }
+ if ($recordingSessions) { $arrayCounts += $recordingSessions.Count }
+ if ($contextItems) { $arrayCounts += $contextItems }
+ }
+
+ $rowCount = ($arrayCounts | Measure-Object -Maximum).Maximum
+ }
+ # Removed $plugin0 and $model0 - now using indexed access in row loop for full explosion
+ $creationDate = script:Format-DatePurviewFast $Record.CreationDate
+ $creationTime = try { script:Format-DatePurviewFast $auditData.CreationTime } catch { '' }
+ $appIdentityRaw = (Select-FirstNonNull -Values @((Get-SafeProperty $auditData 'AppIdentity'), (Get-SafeProperty $ced 'AppIdentity')))
+ if ($appIdentityRaw -is [string]) { $appIdentity = $appIdentityRaw; $appDisp = ''; $appPub = '' }
+ elseif ($null -ne $appIdentityRaw) {
+ $appIdentity = ''; $appDisp = Get-SafeProperty $appIdentityRaw 'DisplayName'; $appPub = Get-SafeProperty $appIdentityRaw 'PublisherId'
+ }
+ else { $appIdentity = ''; $appDisp = ''; $appPub = '' }
+ $appHost = (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'AppHost'), (Get-SafeProperty $auditData 'AppHost'), (Get-SafeProperty $auditData 'Workload')))
+ $clientRegion = (Get-SafeProperty $auditData 'ClientRegion')
+ $agentId = (Get-SafeProperty $auditData 'AgentId')
+ $agentName = (Get-SafeProperty $auditData 'AgentName')
+ $agentVersion = (Select-FirstNonNull -Values @((Get-SafeProperty $auditData 'AgentVersion'), (Get-SafeProperty $ced 'AgentVersion'), (Get-SafeProperty $ced 'Version')))
+
+ # Agent categorization based on AgentId pattern
+ $agentCategory = ""
+ if ($agentId) {
+ if ($agentId -like "CopilotStudio.Declarative.*") {
+ $agentCategory = "Declarative Agent"
+ } elseif ($agentId -like "CopilotStudio.CustomEngine.*") {
+ $agentCategory = "Custom Engine Agent"
+ } elseif ($agentId -like "P_*") {
+ $agentCategory = "Declarative Agent (Purview)"
+ } elseif ($agentId) {
+ $agentCategory = "Other Agent"
+ }
+ }
+
+ # With -IncludeUserInfo, license data now appears only in EntraUsers output
+ # (or in combined mode via left join)
+
+ $appName = (Select-FirstNonNull -Values @((Get-SafeProperty $auditData 'ApplicationName'), (Get-SafeProperty $ced 'HostAppName'), (Get-SafeProperty $ced 'ClientAppName')))
+ $threadId = (Get-SafeProperty $ced 'ThreadId')
+ $auditUserKey = try { $auditData.UserKey } catch { $null }
+ # $modelName moved to row loop for indexed access
+ $clientIP = (Get-SafeProperty $auditData 'ClientIP')
+ $organizationId = (Get-SafeProperty $auditData 'OrganizationId')
+ $version = (Get-SafeProperty $auditData 'Version')
+ $userType = (Get-SafeProperty $auditData 'UserType')
+ $copilotLogVersion = (Get-SafeProperty $auditData 'CopilotLogVersion')
+ $workload = (Get-SafeProperty $auditData 'Workload')
+
+ # Extract fields to match ExplodeArrays output for Power BI compatibility
+ $auditDataId = try { $auditData.Id } catch { $null }
+ $recordTypeNum = try { $auditData.RecordType } catch { $null }
+ $resultStatusAudit = try { $auditData.ResultStatus } catch { $null }
+ $appId = try { $auditData.AppId } catch { $null }
+ $clientAppId = try { $auditData.ClientAppId } catch { $null }
+ $correlationId = try { $auditData.CorrelationId } catch { $null }
+
+ # Model and token fields (same as ExplodeArrays)
+ $modelId = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelId'), (Get-SafeProperty $ced 'ModelID'), (Get-SafeProperty $auditData 'ModelId'))
+ $modelProvider = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelProvider'), (Get-SafeProperty $ced 'Provider'), (Get-SafeProperty $ced 'ModelVendor'))
+ $modelFamily = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelFamily'), (Get-SafeProperty $ced 'ModelType'))
+ $usageNode = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'Usage'), (Get-SafeProperty $ced 'TokenUsage'), (Get-SafeProperty $ced 'Tokens'), (Get-SafeProperty $auditData 'Usage'))
+ $tokensTotal = $null; $tokensInput = $null; $tokensOutput = $null
+ if ($usageNode) {
+ function Local:Get-Num([object]$v) { if ($null -eq $v) { return $null }; try { if ($v -is [string] -and [string]::IsNullOrWhiteSpace($v)) { return $null }; return [double]$v } catch { return $null } }
+ $tokensTotal = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Total'), (Get-SafeProperty $usageNode 'TotalTokens'), (Get-SafeProperty $usageNode 'TokensTotal')))
+ $tokensInput = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Input'), (Get-SafeProperty $usageNode 'Prompt'), (Get-SafeProperty $usageNode 'InputTokens'), (Get-SafeProperty $usageNode 'TokensInput')))
+ $tokensOutput = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Output'), (Get-SafeProperty $usageNode 'Completion'), (Get-SafeProperty $usageNode 'OutputTokens'), (Get-SafeProperty $usageNode 'TokensOutput')))
+ }
+ if (-not $tokensTotal -and ($tokensInput -or $tokensOutput)) { try { $tokensTotal = ($tokensInput + $tokensOutput) } catch {} }
+
+ # Duration, outcome, conversation fields (same as ExplodeArrays)
+ function Local:Get-NumSafe([object]$v) { if ($null -eq $v) { return $null }; try { if ($v -is [string] -and [string]::IsNullOrWhiteSpace($v)) { return $null }; return [double]$v } catch { return $null } }
+ $durationMs = Local:Get-NumSafe (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'DurationMs'), (Get-SafeProperty $ced 'ElapsedMs'), (Get-SafeProperty $ced 'ProcessingTimeMs'), (Get-SafeProperty $ced 'LatencyMs')))
+ $outcomeStatus = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'OutcomeStatus'), (Get-SafeProperty $ced 'Outcome'), (Get-SafeProperty $ced 'Result'), (Get-SafeProperty $ced 'Status'))
+ if ($outcomeStatus -is [bool]) { $outcomeStatus = if ($outcomeStatus) { 'Success' } else { 'Failure' } }
+ $conversationId = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ConversationId'), (Get-SafeProperty $ced 'ConversationID'), (Get-SafeProperty $ced 'SessionId'))
+ $turnNumber = Local:Get-NumSafe (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'TurnNumber'), (Get-SafeProperty $ced 'TurnIndex'), (Get-SafeProperty $ced 'MessageIndex')))
+ $retryCount = Local:Get-NumSafe (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'RetryCount'), (Get-SafeProperty $ced 'Retries')))
+ $clientVersion = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ClientVersion'), (Get-SafeProperty $ced 'Version'), (Get-SafeProperty $ced 'Build'))
+ $clientPlatform = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ClientPlatform'), (Get-SafeProperty $ced 'Platform'), (Get-SafeProperty $ced 'OS'))
+
+ $baseSet = New-Object System.Collections.Generic.HashSet[string]; foreach ($c in $PurviewExplodedHeader) { $null = $baseSet.Add($c) }
+ $rows = New-Object System.Collections.Generic.List[object]
+ for ($i = 0; $i -lt $rowCount; $i++) {
+ $rowObj = [PSCustomObject]@{
+ RecordId = $(if ($Record.RecordId) { $Record.RecordId } elseif ($Record.Identity) { $Record.Identity } elseif ($Record.Id) { $Record.Id } else { $auditData.Id })
+ CreationDate = $creationDate
+ RecordType = $Record.RecordType
+ Operation = $auditData.Operation
+ UserId = $auditData.UserId
+ OrganizationId = $organizationId
+ Workload = $workload
+ UserType = $userType
+ UserKey = $auditUserKey
+ Version = $version
+ Id = $auditDataId
+ RecordTypeNum = $recordTypeNum
+ ResultStatus_Audit = $resultStatusAudit
+ AppId = $appId
+ ClientAppId = $clientAppId
+ CorrelationId = $correlationId
+ ModelId = $modelId
+ ModelProvider = $modelProvider
+ ModelFamily = $modelFamily
+ TokensTotal = $tokensTotal
+ TokensInput = $tokensInput
+ TokensOutput = $tokensOutput
+ DurationMs = $durationMs
+ OutcomeStatus = $outcomeStatus
+ ConversationId = $conversationId
+ TurnNumber = $turnNumber
+ RetryCount = $retryCount
+ ClientVersion = $clientVersion
+ ClientPlatform = $clientPlatform
+ AssociatedAdminUnits = (Get-SafeProperty $auditData 'AssociatedAdminUnits')
+ AssociatedAdminUnitsNames = (Get-SafeProperty $auditData 'AssociatedAdminUnitsNames')
+ AgentId = $agentId
+ AgentName = $agentName
+ AgentVersion = $agentVersion
+ AgentCategory = $agentCategory
+ AppIdentity = $appIdentity
+ AppIdentity_DisplayName = $appDisp
+ AppIdentity_PublisherId = $appPub
+ ApplicationName = $appName
+ CreationTime = $creationTime
+ ClientRegion = $clientRegion
+ ClientIP = $clientIP
+ AppHost = $appHost
+ ThreadId = $threadId
+ Context_Id = $(if ($i -lt $contexts.Count -and $contexts[$i]) { try { Get-SafeProperty $contexts[$i] 'Id' } catch { '' } } else { '' })
+ Context_Type = $(if ($i -lt $contexts.Count -and $contexts[$i]) { try { Get-SafeProperty $contexts[$i] 'Type' } catch { '' } } else { '' })
+ Message_Id = $(if ($i -lt $messages.Count) { $msg = $messages[$i]; if ($msg -is [psobject]) { try { Get-SafeProperty $msg 'Id' } catch { '' } } else { $msg } } else { '' })
+ Message_isPrompt = $(if ($i -lt $messages.Count) { $msg = $messages[$i]; if ($msg -is [psobject]) { try { script:BoolTFFast (Get-SafeProperty $msg 'isPrompt') } catch { '' } } else { '' } } else { '' })
+ AccessedResource_Action = $(if ($i -lt $resources.Count -and $resources[$i]) { try { Get-SafeProperty $resources[$i] 'Action' } catch { '' } } else { '' })
+ AccessedResource_PolicyDetails = $(if ($i -lt $resources.Count -and $resources[$i]) { try { script:ToJsonIfObjectFast (Get-SafeProperty $resources[$i] 'PolicyDetails') } catch { '' } } else { '' })
+ AccessedResource_SiteUrl = $(if ($i -lt $resources.Count -and $resources[$i]) { try { Get-SafeProperty $resources[$i] 'SiteUrl' } catch { '' } } else { '' })
+ # DSPM for AI: Enhanced AccessedResource properties
+ AccessedResource_Name = $(if ($i -lt $resources.Count -and $resources[$i]) { try { Get-SafeProperty $resources[$i] 'Name' } catch { '' } } else { '' })
+ AccessedResource_SensitivityLabel = $(if ($i -lt $resources.Count -and $resources[$i]) { try { Get-SafeProperty $resources[$i] 'SensitivityLabel' } catch { '' } } else { '' })
+ AccessedResource_ResourceType = $(if ($i -lt $resources.Count -and $resources[$i]) { try { Get-SafeProperty $resources[$i] 'ResourceType' } catch { '' } } else { '' })
+ # Row explosion for AISystemPlugin array
+ AISystemPlugin_Id = $(if ($i -lt $pluginsRaw.Count -and $pluginsRaw[$i]) { try { Get-SafeProperty $pluginsRaw[$i] 'Id' } catch { '' } } else { '' })
+ AISystemPlugin_Name = $(if ($i -lt $pluginsRaw.Count -and $pluginsRaw[$i]) { try { Get-SafeProperty $pluginsRaw[$i] 'Name' } catch { '' } } else { '' })
+ # Row explosion for ModelTransparencyDetails array
+ ModelTransparencyDetails_ModelName = $(if ($i -lt $modelDetRaw.Count -and $modelDetRaw[$i]) { try { Get-SafeProperty $modelDetRaw[$i] 'ModelName' } catch { '' } } else { '' })
+ MessageIds = $(if ($messageIds.Count -gt 0) { $messageIds -join ';' } else { '' })
+ # DSPM for AI: SensitivityLabels array explosion
+ SensitivityLabel = $(if ($i -lt $sensitivityLabels.Count) { try { [string]$sensitivityLabels[$i] } catch { '' } } else { '' })
+
+ # DSPM for AI: 2-level explosion handling for Context Items[], Plugins[], RecordingSessions[]
+ # Full explosion (-ExplodeArrays/-ExplodeDeep): row-per-item explosion
+ Context_Item = $(
+ if ($activityType -eq 'CopilotInteraction') {
+ if ($PartialExplode) {
+ # Partial mode: Semi-colon-joined JSON for ALL items from matching context
+ if ($i -lt $contexts.Count -and $contexts[$i]) {
+ try {
+ $items = script:GetArrayFast $contexts[$i] 'Items'
+ if ($items -and $items.Count -gt 0) {
+ ($items | ForEach-Object { try { script:ToJsonIfObjectFast $_ } catch { '' } }) -join ';'
+ } else { '' }
+ } catch { '' }
+ } else { '' }
+ } else {
+ # Full mode: One item per row (loop through all contexts to find matching item index)
+ try {
+ $foundItem = $null
+ foreach ($ctx in $contexts) {
+ if ($ctx) {
+ $items = script:GetArrayFast $ctx 'Items'
+ if ($items -and $i -lt $items.Count) {
+ $foundItem = $items[$i]
+ break
+ }
+ }
+ }
+ if ($foundItem) { script:ToJsonIfObjectFast $foundItem } else { '' }
+ } catch { '' }
+ }
+ } else { '' }
+ )
+
+ CopilotLogVersion = $copilotLogVersion
+ }
+
+ # Partial explosion mode: Preserve AuditData column (full JSON) for downstream processing
+ if ($PartialExplode) {
+ try {
+ Add-Member -InputObject $rowObj -NotePropertyName 'AuditData' -NotePropertyValue $Record.AuditData -Force
+ } catch {}
+ }
+
+ # DSPM for AI: 2-level explosion for ConnectedAIAppInteraction (AppIdentity.Plugins[])
+ if ($activityType -eq 'ConnectedAIAppInteraction' -and $plugins) {
+ try {
+ if ($PartialExplode) {
+ # Partial mode: Semi-colon-joined JSON for all plugins
+ $pluginsList = ($plugins | ForEach-Object { try { script:ToJsonIfObjectFast $_ } catch { '' } }) -join ';'
+ if (-not $rowObj.PSObject.Properties['AppIdentity_Plugins']) {
+ Add-Member -InputObject $rowObj -NotePropertyName 'AppIdentity_Plugins' -NotePropertyValue $pluginsList -Force
+ if (-not $script:DeepExtraColumns.Contains('AppIdentity_Plugins')) { [void]$script:DeepExtraColumns.Add('AppIdentity_Plugins') }
+ }
+ } else {
+ # Full mode: One plugin per row
+ if ($i -lt $plugins.Count) {
+ $plugin = $plugins[$i]
+ $pluginJson = try { script:ToJsonIfObjectFast $plugin } catch { '' }
+ if (-not $rowObj.PSObject.Properties['AppIdentity_Plugin']) {
+ Add-Member -InputObject $rowObj -NotePropertyName 'AppIdentity_Plugin' -NotePropertyValue $pluginJson -Force
+ if (-not $script:DeepExtraColumns.Contains('AppIdentity_Plugin')) { [void]$script:DeepExtraColumns.Add('AppIdentity_Plugin') }
+ }
+ }
+ }
+ } catch {}
+ }
+
+ if ($Deep) {
+ if ($ced) {
+ $flat = ConvertTo-FlatColumns -Node $ced -Prefix '' -MaxDepth $FlatDepthDeep
+ foreach ($k in $flat.Keys) { if ($baseSet.Contains($k)) { continue }; if (-not $rowObj.PSObject.Properties[$k]) { if (-not $script:DeepExtraColumns.Contains($k)) { [void]$script:DeepExtraColumns.Add($k) }; try { Add-Member -InputObject $rowObj -NotePropertyName $k -NotePropertyValue $flat[$k] -Force } catch {} } }
+ }
+ if ($auditData) {
+ $auditDataClone = [PSCustomObject]@{}
+ foreach ($prop in $auditData.PSObject.Properties) { if ($prop.Name -ne 'CopilotEventData') { Add-Member -InputObject $auditDataClone -NotePropertyName $prop.Name -NotePropertyValue $prop.Value -Force } }
+ $flatAudit = ConvertTo-FlatColumns -Node $auditDataClone -Prefix '' -MaxDepth $FlatDepthDeep
+ foreach ($k in $flatAudit.Keys) { if ($baseSet.Contains($k)) { continue }; if (-not $rowObj.PSObject.Properties[$k]) { if (-not $script:DeepExtraColumns.Contains($k)) { [void]$script:DeepExtraColumns.Add($k) }; try { Add-Member -InputObject $rowObj -NotePropertyName $k -NotePropertyValue $flatAudit[$k] -Force } catch {} } }
+ }
+ }
+ $rows.Add($rowObj) | Out-Null
+ }
+ if (-not $SkipMetrics -and $rows.Count -gt 1) { try { $script:metrics.ExplosionEvents += 1; $script:metrics.ExplosionRowsFromEvents += ($rows.Count - 1); if ($rows.Count -gt $script:metrics.ExplosionMaxPerRecord) { $script:metrics.ExplosionMaxPerRecord = $rows.Count } } catch {} }
+ return $rows
+ }
+ catch {
+ if (-not $SkipMetrics) {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringParseFailures++
+ }
+ Write-LogHost "Failed Purview explosion: $($_.Exception.Message)" -ForegroundColor Red
+ return @()
+ }
+}
+
+function Select-FirstNonNull { param([object[]]$Values) foreach ($v in $Values) { if ($null -ne $v -and ('' -ne [string]$v)) { return $v } } return $null }
+
+function Convert-ToStructuredRecord {
+ # Uses proven stable implementation for record conversion
+ param(
+ [Parameter(Mandatory = $true)] $Record,
+ [bool]$EnableExplosion = $false
+ )
+ try {
+ function Local:Get-Num([object]$v) { if ($null -eq $v) { return $null }; try { if ($v -is [string] -and [string]::IsNullOrWhiteSpace($v)) { return $null }; return [double]$v } catch { return $null } }
+ function Local:Add-OrUpdate([pscustomobject]$obj, [string]$name, $value) { try { if ($obj.PSObject.Properties[$name]) { $obj.PSObject.Properties[$name].Value = $value } else { Add-Member -InputObject $obj -NotePropertyName $name -NotePropertyValue $value -Force } } catch {} }
+ # Use pre-parsed AuditData if available
+ $auditData = if ($Record.PSObject.Properties['_ParsedAuditData']) { $Record._ParsedAuditData } else { try { $Record.AuditData | ConvertFrom-Json -ErrorAction Stop } catch { $null } }
+ if (-not $auditData) {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringMissingAuditData++
+ return @()
+ }
+
+ # NON-EXPLOSION MODE: Return 7-column compact record with raw AuditData only
+ if (-not $EnableExplosion -and -not $ExplodeDeep) {
+ $compactRecord = [pscustomobject]@{
+ RecordId = $(if ($Record.RecordId) { $Record.RecordId } elseif ($Record.Identity) { $Record.Identity } elseif ($Record.Id) { $Record.Id } else { $auditData.Id })
+ CreationDate = $Record.CreationDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ RecordType = $Record.RecordType
+ Operation = $(try { $auditData.Operation } catch { $Record.Operations })
+ AuditData = $Record.AuditData
+ AssociatedAdminUnits = $null
+ AssociatedAdminUnitsNames = $null
+ }
+ return @($compactRecord)
+ }
+
+ # EXPLOSION MODE: Extract and flatten all fields from AuditData (no raw JSON columns)
+ $ced = Get-SafeProperty $auditData 'CopilotEventData'
+ $modelId = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelId'), (Get-SafeProperty $ced 'ModelID'), (Get-SafeProperty $auditData 'ModelId'))
+ $modelProvider = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelProvider'), (Get-SafeProperty $ced 'Provider'), (Get-SafeProperty $ced 'ModelVendor'))
+ $modelFamily = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ModelFamily'), (Get-SafeProperty $ced 'ModelType'))
+ $usageNode = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'Usage'), (Get-SafeProperty $ced 'TokenUsage'), (Get-SafeProperty $ced 'Tokens'), (Get-SafeProperty $auditData 'Usage'))
+ $tokensTotal = $null; $tokensInput = $null; $tokensOutput = $null
+ if ($usageNode) {
+ $tokensTotal = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Total'), (Get-SafeProperty $usageNode 'TotalTokens'), (Get-SafeProperty $usageNode 'TokensTotal')))
+ $tokensInput = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Input'), (Get-SafeProperty $usageNode 'Prompt'), (Get-SafeProperty $usageNode 'InputTokens'), (Get-SafeProperty $usageNode 'TokensInput')))
+ $tokensOutput = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $usageNode 'Output'), (Get-SafeProperty $usageNode 'Completion'), (Get-SafeProperty $usageNode 'OutputTokens'), (Get-SafeProperty $usageNode 'TokensOutput')))
+ }
+ if (-not $tokensTotal -and ($tokensInput -or $tokensOutput)) { try { $tokensTotal = ($tokensInput + $tokensOutput) } catch {} }
+ $durationMs = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'DurationMs'), (Get-SafeProperty $ced 'ElapsedMs'), (Get-SafeProperty $ced 'ProcessingTimeMs'), (Get-SafeProperty $ced 'LatencyMs')))
+ $outcomeStatus = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'OutcomeStatus'), (Get-SafeProperty $ced 'Outcome'), (Get-SafeProperty $ced 'Result'), (Get-SafeProperty $ced 'Status'))
+ if ($outcomeStatus -is [bool]) { $outcomeStatus = if ($outcomeStatus) { 'Success' } else { 'Failure' } }
+ $conversationId = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ConversationId'), (Get-SafeProperty $ced 'ConversationID'), (Get-SafeProperty $ced 'SessionId'))
+ $turnNumber = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'TurnNumber'), (Get-SafeProperty $ced 'TurnIndex'), (Get-SafeProperty $ced 'MessageIndex')))
+ $retryCount = Local:Get-Num (Select-FirstNonNull -Values @((Get-SafeProperty $ced 'RetryCount'), (Get-SafeProperty $ced 'Retries')))
+ $clientVersion = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ClientVersion'), (Get-SafeProperty $ced 'Version'), (Get-SafeProperty $ced 'Build'))
+ $clientPlatform = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ClientPlatform'), (Get-SafeProperty $ced 'Platform'), (Get-SafeProperty $ced 'OS'))
+ $agentId = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'AgentId'), (Get-SafeProperty $ced 'AgentID'), (Get-SafeProperty $ced 'AssistantId'))
+ $agentName = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'AgentName'), (Get-SafeProperty $ced 'AssistantName'))
+ $agentVersion = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'AgentVersion'), (Get-SafeProperty $ced 'Version'))
+
+ # Agent categorization based on AgentId pattern
+ $agentCategory = ""
+ if ($agentId) {
+ if ($agentId -like "CopilotStudio.Declarative.*") {
+ $agentCategory = "Declarative Agent"
+ } elseif ($agentId -like "CopilotStudio.CustomEngine.*") {
+ $agentCategory = "Custom Engine Agent"
+ } elseif ($agentId -like "P_*") {
+ $agentCategory = "Declarative Agent (Purview)"
+ } elseif ($agentId) {
+ $agentCategory = "Other Agent"
+ }
+ }
+
+ # With -IncludeUserInfo, license data now appears only in EntraUsers output
+ # (or in combined mode via left join)
+
+ $appIdentity = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'AppIdentity'), (Get-SafeProperty $ced 'ApplicationId'), (Get-SafeProperty $ced 'HostAppId'))
+ $applicationName = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'ApplicationName'), (Get-SafeProperty $ced 'HostAppName'), (Get-SafeProperty $ced 'ClientAppName'))
+ $suggestions = (Get-SafeProperty $ced 'Suggestions'); if (-not $suggestions) { $suggestions = Get-SafeProperty $ced 'SuggestionList' }
+ $actions = Get-SafeProperty $ced 'Actions'
+ $references = Select-FirstNonNull -Values @((Get-SafeProperty $ced 'References'), (Get-SafeProperty $ced 'Sources'), (Get-SafeProperty $ced 'Citations'))
+ $participants = Get-SafeProperty $ced 'Participants'
+ function Local:Measure-Collection($items, [string]$prefix) {
+ $result = @{}; if (-not $items) { return $result }; $arr = @($items); if ($arr.Count -eq 0) { return $result }
+ $result["${prefix}Count"] = $arr.Count; $types = New-Object System.Collections.Generic.HashSet[string]; $latencies = @(); $edits = @(); $accepted = 0; $success = 0; $failure = 0
+ foreach ($s in $arr) {
+ foreach ($cand in @('Type', 'SuggestionType', 'Name', 'Kind', 'ActionType')) { try { if ($s.PSObject.Properties[$cand]) { [void]$types.Add([string]$s.$cand); break } } catch {} }
+ foreach ($lat in @('LatencyMs', 'DurationMs', 'ElapsedMs')) { try { if ($s.PSObject.Properties[$lat]) { $v = Local:Get-Num $s.$lat; if ($null -ne $v) { $latencies += $v; break } } } catch {} }
+ foreach ($ed in @('EditCount', 'Edits', 'EditsCount')) { try { if ($s.PSObject.Properties[$ed]) { $v = Local:Get-Num $s.$ed; if ($null -ne $v) { $edits += $v; break } } } catch {} }
+ foreach ($acc in @('Accepted', 'IsAccepted', 'Success', 'Succeeded')) { try { if ($s.PSObject.Properties[$acc]) { $val = $s.$acc; if ($val -is [bool]) { if ($val) { $accepted++ } } elseif ($val -match '^(?i:true|yes|1|success)') { $accepted++ } } } catch {} }
+ foreach ($succ in @('Success', 'Succeeded')) { try { if ($s.PSObject.Properties[$succ]) { $val = $s.$succ; if ($val -is [bool]) { if ($val) { $success++ } else { $failure++ } } elseif ($val -match '^(?i:true|yes|1|success)') { $success++ } else { $failure++ } } } catch {} }
+ }
+ if ($types.Count -gt 0) { $result["${prefix}Types"] = [string]::Join(';', [array]$types) }
+ if ($latencies.Count -gt 0) { $result["${prefix}AvgLatencyMs"] = [math]::Round(($latencies | Measure-Object -Average).Average, 2) }
+ if ($edits.Count -gt 0) { $result["${prefix}AvgEdits"] = [math]::Round(($edits | Measure-Object -Average).Average, 2); $result["${prefix}TotalEdits"] = ($edits | Measure-Object -Sum).Sum }
+ if ($accepted -gt 0) { $result["${prefix}Accepted"] = $accepted; $result["${prefix}AcceptanceRate"] = [math]::Round(($accepted / $arr.Count) * 100, 2) }
+ if ($success -gt 0 -or $failure -gt 0) { $result["${prefix}Success"] = $success; $result["${prefix}Failure"] = $failure }
+ return $result
+ }
+ $suggestAgg = Local:Measure-Collection $suggestions 'Suggestions'
+ $actionAgg = Local:Measure-Collection $actions 'Actions'
+ $refAgg = Local:Measure-Collection $references 'References'
+ $partAgg = Local:Measure-Collection $participants 'Participants'
+ $baseRecord = [pscustomobject]@{
+ RecordId = $(if ($Record.RecordId) { $Record.RecordId } elseif ($Record.Identity) { $Record.Identity } elseif ($Record.Id) { $Record.Id } else { $auditData.Id })
+ RecordType = $Record.RecordType
+ CreationDate = $Record.CreationDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ UserIds = $Record.UserIds
+ Operations = $Record.Operations
+ ResultStatus = $Record.ResultStatus
+ ResultCount = $Record.ResultCount
+ Identity = $Record.Identity
+ IsValid = $Record.IsValid
+ ObjectState = $Record.ObjectState
+ Id = $auditData.Id
+ CreationTime = & { $ct = script:Parse-DateSafe $auditData.CreationTime; if ($ct) { $ct.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { $auditData.CreationTime } }
+ Operation = $auditData.Operation
+ OrganizationId = $auditData.OrganizationId
+ RecordTypeNum = $auditData.RecordType
+ ResultStatus_Audit = $auditData.ResultStatus
+ UserKey = $auditData.UserKey
+ UserType = $auditData.UserType
+ Version = $auditData.Version
+ Workload = $auditData.Workload
+ UserId = $auditData.UserId
+ AppId = $auditData.AppId
+ ClientAppId = $auditData.ClientAppId
+ CorrelationId = $auditData.CorrelationId
+ ModelId = $modelId
+ ModelProvider = $modelProvider
+ ModelFamily = $modelFamily
+ TokensTotal = $tokensTotal
+ TokensInput = $tokensInput
+ TokensOutput = $tokensOutput
+ DurationMs = $durationMs
+ OutcomeStatus = $outcomeStatus
+ ConversationId = $conversationId
+ TurnNumber = $turnNumber
+ RetryCount = $retryCount
+ ClientVersion = $clientVersion
+ ClientPlatform = $clientPlatform
+ AgentId = $agentId
+ AgentName = $agentName
+ AgentVersion = $agentVersion
+ AgentCategory = $agentCategory
+ AppIdentity = $appIdentity
+ ApplicationName = $applicationName
+ }
+ # Flatten AppAccessContext for Copilot/AI records as well
+ try {
+ $aac = Get-SafeProperty $auditData 'AppAccessContext'
+ if ($aac -and -not (Test-ScalarValue $aac)) {
+ $flatAac = ConvertTo-FlatColumns -Node $aac -Prefix 'AppAccessContext.' -MaxDepth $FlatDepthStandard
+ foreach ($k in $flatAac.Keys) { if (-not $baseRecord.PSObject.Properties[$k]) { Add-Member -InputObject $baseRecord -NotePropertyName $k -NotePropertyValue $flatAac[$k] -Force } }
+ if ($baseRecord.PSObject.Properties['AppAccessContext']) { $baseRecord.PSObject.Members.Remove('AppAccessContext') }
+ }
+ elseif ($aac -and (Test-ScalarValue $aac)) {
+ if (-not $baseRecord.PSObject.Properties['AppAccessContext']) { Add-Member -InputObject $baseRecord -NotePropertyName 'AppAccessContext' -NotePropertyValue $aac -Force }
+ }
+ } catch {}
+ foreach ($k in $suggestAgg.Keys) { Add-OrUpdate $baseRecord $k $suggestAgg[$k] }
+ foreach ($k in $actionAgg.Keys) { Add-OrUpdate $baseRecord $k $actionAgg[$k] }
+ foreach ($k in $refAgg.Keys) { Add-OrUpdate $baseRecord $k $refAgg[$k] }
+ foreach ($k in $partAgg.Keys) { Add-OrUpdate $baseRecord $k $partAgg[$k] }
+
+ # If not doing array explosion, return base record now
+ if (-not $EnableExplosion) { return @($baseRecord) }
+ $rows = @($baseRecord)
+ $arraysToExplode = @(
+ @{ Name = 'Suggestions'; Data = $suggestions; Prefix = 'Suggestion'; Enabled = $suggestions },
+ @{ Name = 'Actions'; Data = $actions; Prefix = 'Action'; Enabled = $actions },
+ @{ Name = 'References'; Data = $references; Prefix = 'Reference'; Enabled = $references },
+ @{ Name = 'Participants'; Data = $participants; Prefix = 'Participant'; Enabled = $participants }
+ )
+ $maxRows = $ExplosionPerRecordRowCap
+ foreach ($entry in $arraysToExplode) {
+ if (-not $entry.Enabled) { continue }
+ $dataArr = @($entry.Data); if ($dataArr.Count -eq 0) { continue }
+ $newRows = New-Object System.Collections.ArrayList
+ foreach ($r in $rows) {
+ $idx = 0
+ foreach ($el in $dataArr) {
+ $nr = [pscustomobject]@{}
+ foreach ($p in $r.PSObject.Properties) { Add-Member -InputObject $nr -NotePropertyName $p.Name -NotePropertyValue $p.Value -Force }
+ Add-OrUpdate $nr ("ArrayIndex_{0}" -f $entry.Name) $idx
+ if ($el) {
+ foreach ($prop in $el.PSObject.Properties) {
+ $pname = ("{0}_{1}" -f $entry.Prefix, $prop.Name)
+ if ($nr.PSObject.Properties[$pname]) { continue }
+ $val = $prop.Value
+ if (Test-ScalarValue $val) { Add-OrUpdate $nr $pname $val } else { try { Add-OrUpdate $nr $pname ($val | ConvertTo-Json -Depth $JsonDepth -Compress) } catch {} }
+ }
+ }
+ [void]$newRows.Add($nr); $idx++
+ if ($newRows.Count -gt $maxRows) { break }
+ }
+ if ($newRows.Count -gt $maxRows) { break }
+ }
+ $rows = @($newRows)
+ if ($rows.Count -gt $maxRows) { break }
+ }
+ if ($rows.Count -gt $maxRows) { foreach ($r in $rows) { Add-OrUpdate $r 'ExplosionTruncated' $true }; $rows = $rows[0..($maxRows - 1)]; try { $script:metrics.ExplosionTruncated = $true } catch {} }
+ if ($ExplodeDeep -and $ced) {
+ for ($i = 0; $i -lt $rows.Count; $i++) {
+ $r = $rows[$i]
+ $flat = ConvertTo-FlatColumns -Node $ced -Prefix '' -MaxDepth $FlatDepthStandard
+ foreach ($ck in $flat.Keys) { if (-not $r.PSObject.Properties[$ck]) { Add-OrUpdate $r $ck $flat[$ck] } }
+ }
+ }
+ return $rows
+ }
+ catch {
+ $script:metrics.FilteringSkippedRecords++
+ $script:metrics.FilteringParseFailures++
+ Write-LogHost "Failed to process record: $($_.Exception.Message)" -ForegroundColor Red
+ return @()
+ }
+}
+
+try {
+ # Unregister the early exit handler since catch/finally will handle Ctrl+C from this point
+ # This prevents duplicate "Script Interrupted" messages
+ Unregister-Event -SourceIdentifier PowerShell.Exiting -ErrorAction SilentlyContinue
+
+ # ============================================================
+ # RESUME MODE VALIDATION - Ensure no conflicting parameters
+ # ============================================================
+ if ($PSBoundParameters.ContainsKey('Resume')) {
+ # Resume mode is standalone - only auth-related parameters allowed
+ $allowedWithResume = @(
+ 'Resume',
+ 'Force',
+ 'Auth',
+ 'TenantId',
+ 'ClientId',
+ 'ClientSecret',
+ 'ClientCertificateThumbprint',
+ 'ClientCertificateStoreLocation',
+ 'ClientCertificatePath',
+ 'ClientCertificatePassword',
+ # Standard PowerShell common parameters
+ 'Verbose',
+ 'Debug',
+ 'ErrorAction',
+ 'WarningAction',
+ 'InformationAction',
+ 'ErrorVariable',
+ 'WarningVariable',
+ 'InformationVariable',
+ 'OutVariable',
+ 'OutBuffer',
+ 'PipelineVariable'
+ )
+
+ $invalidParams = @($PSBoundParameters.Keys | Where-Object { $_ -notin $allowedWithResume })
+
+ if ($invalidParams.Count -gt 0) {
+ Write-Host ""
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host " ERROR: Invalid parameters used with -Resume" -ForegroundColor Red
+ Write-Host "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-Host ""
+ Write-Host " Resume mode restores ALL settings from the checkpoint file." -ForegroundColor Yellow
+ Write-Host " You cannot specify other parameters (they would be ignored or cause inconsistency)." -ForegroundColor Yellow
+ Write-Host ""
+ Write-Host " Invalid parameters:" -ForegroundColor White
+ foreach ($p in $invalidParams) {
+ Write-Host " - $p" -ForegroundColor Red
+ }
+ Write-Host ""
+ Write-Host " ALLOWED with -Resume:" -ForegroundColor Green
+ Write-Host " -Resume [path] Checkpoint file (or auto-discover)" -ForegroundColor Gray
+ Write-Host " -Force Use most recent checkpoint without prompting" -ForegroundColor Gray
+ Write-Host " -Auth Override authentication method" -ForegroundColor Gray
+ Write-Host " -TenantId Tenant ID (for AppRegistration)" -ForegroundColor Gray
+ Write-Host " -ClientId Client ID (for AppRegistration)" -ForegroundColor Gray
+ Write-Host " -ClientSecret Client secret (for AppRegistration)" -ForegroundColor Gray
+ Write-Host ""
+ Write-Host " Example usage:" -ForegroundColor Cyan
+ Write-Host ' .\Script.ps1 -Resume' -ForegroundColor White
+ Write-Host ' .\Script.ps1 -Resume -Auth DeviceCode' -ForegroundColor White
+ Write-Host ' .\Script.ps1 -Resume "C:\path\.pax_checkpoint_xxx.json" -Force' -ForegroundColor White
+ Write-Host ""
+ exit 1
+ }
+ }
+
+ # ============================================================
+ # RESUME MODE DETECTION - Check for checkpoint to resume
+ # ============================================================
+ if ($ResumeSpecified) {
+ Write-LogHost ""
+ Write-LogHost "========================================" -ForegroundColor Cyan
+ Write-LogHost " RESUME MODE DETECTED" -ForegroundColor Cyan
+ Write-LogHost "========================================" -ForegroundColor Cyan
+ Write-LogHost ""
+
+ if ($Resume -ne '') {
+ # Explicit checkpoint path provided
+ Write-LogHost "Loading checkpoint from explicit path: $Resume" -ForegroundColor Yellow
+ $checkpointLoadSuccess = Read-Checkpoint -CheckpointPath $Resume
+ if (-not $checkpointLoadSuccess) {
+ Write-LogHost "ERROR: Failed to load checkpoint file. Cannot resume." -ForegroundColor Red
+ exit 1
+ }
+ $script:CheckpointPath = $Resume
+ # Read-Checkpoint sets $script:CheckpointData on success
+ $checkpointData = $script:CheckpointData
+ }
+ else {
+ # Auto-discover checkpoints in OutputPath
+ $searchPath = if ($OutputPath) { $OutputPath } else { (Get-Location).Path }
+ Write-LogHost "Searching for checkpoints in: $searchPath" -ForegroundColor Yellow
+
+ $checkpoints = Find-Checkpoints -OutputPath $searchPath
+
+ if ($checkpoints.Count -eq 0) {
+ Write-LogHost ""
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost " NO CHECKPOINT FILES FOUND" -ForegroundColor Red
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost ""
+ Write-LogHost " Searched in: $searchPath" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost " Checkpoint files are named: .pax_checkpoint_YYYYMMDD_HHMMSS.json" -ForegroundColor Gray
+ Write-LogHost " They are saved in the same folder as the _PARTIAL.csv output file." -ForegroundColor Gray
+ Write-LogHost ""
+ Write-LogHost " COMMON LOCATIONS TO CHECK:" -ForegroundColor Yellow
+ Write-LogHost " • The 'output' folder where you typically save exports" -ForegroundColor White
+ Write-LogHost " • The folder shown in the Ctrl+C message when the run was interrupted" -ForegroundColor White
+ Write-LogHost " • Look for _PARTIAL.csv files - the checkpoint is in the same folder" -ForegroundColor White
+ Write-LogHost ""
+ Write-LogHost " HOW TO RESUME:" -ForegroundColor Cyan
+ Write-LogHost ""
+ Write-LogHost " Option 1: Specify the folder containing the checkpoint:" -ForegroundColor White
+ Write-LogHost " -Resume -OutputPath `"C:\path\to\output\folder`"" -ForegroundColor Green
+ Write-LogHost ""
+ Write-LogHost " Option 2: Specify the checkpoint file directly:" -ForegroundColor White
+ Write-LogHost " -Resume `"C:\path\to\.pax_checkpoint_20260120_123456.json`"" -ForegroundColor Green
+ Write-LogHost ""
+ Write-LogHost " Option 3: Run from the folder containing the checkpoint:" -ForegroundColor White
+ Write-LogHost " cd `"C:\path\to\output\folder`"" -ForegroundColor Green
+ Write-LogHost " pwsh -File `"...\PAX_Purview_Audit_Log_Processor.ps1`" -Resume" -ForegroundColor Green
+ Write-LogHost ""
+ Write-LogHost "════════════════════════════════════════════════════════════════════════════════" -ForegroundColor Red
+ Write-LogHost ""
+ exit 1
+ }
+ elseif ($checkpoints.Count -eq 1) {
+ $selectedCheckpoint = $checkpoints[0]
+ Write-LogHost "Found checkpoint: $($selectedCheckpoint.FileName)" -ForegroundColor Green
+ }
+ else {
+ # Multiple checkpoints found
+ if ($Force) {
+ # Use most recent without prompting
+ $selectedCheckpoint = $checkpoints | Sort-Object { $_.LastUpdated } -Descending | Select-Object -First 1
+ Write-LogHost "Multiple checkpoints found. -Force specified, using most recent:" -ForegroundColor Yellow
+ Write-LogHost " $($selectedCheckpoint.FileName)" -ForegroundColor White
+ }
+ else {
+ # Prompt user to select
+ $selectedCheckpoint = Select-Checkpoint -Checkpoints $checkpoints
+ if (-not $selectedCheckpoint) {
+ Write-LogHost "No checkpoint selected. Exiting." -ForegroundColor Yellow
+ exit 0
+ }
+ }
+ }
+
+ $script:CheckpointPath = $selectedCheckpoint.Path
+ $checkpointLoadSuccess = Read-Checkpoint -CheckpointPath $script:CheckpointPath
+ if (-not $checkpointLoadSuccess) {
+ Write-LogHost "ERROR: Failed to load checkpoint file. Cannot resume." -ForegroundColor Red
+ exit 1
+ }
+ # Read-Checkpoint sets $script:CheckpointData on success
+ $checkpointData = $script:CheckpointData
+ }
+
+ # Note: $script:CheckpointData and $script:IsResumeMode already set by Read-Checkpoint
+
+ # Display resume summary
+ $completedCount = if ($checkpointData.partitions.completed) { $checkpointData.partitions.completed.Count } else { 0 }
+ $queryCreatedCount = if ($checkpointData.partitions.queryCreated) { $checkpointData.partitions.queryCreated.Count } else { 0 }
+ $totalPartitions = if ($checkpointData.partitions.total) { $checkpointData.partitions.total } else { 0 }
+
+ Write-LogHost ""
+ Write-LogHost "Checkpoint loaded successfully:" -ForegroundColor Green
+ Write-LogHost " Original Run: $($checkpointData.runTimestamp)" -ForegroundColor White
+ $cpStartDate = if ($checkpointData.parameters.startDate) { $d = script:Parse-DateSafe $checkpointData.parameters.startDate; if ($d) { $d.ToString('yyyy-MM-dd') } else { 'Unknown' } } else { 'Unknown' }
+ $cpEndDate = if ($checkpointData.parameters.endDate) { $d = script:Parse-DateSafe $checkpointData.parameters.endDate; if ($d) { $d.ToString('yyyy-MM-dd') } else { 'Unknown' } } else { 'Unknown' }
+ Write-LogHost " Date Range: $cpStartDate to $cpEndDate" -ForegroundColor White
+ Write-LogHost " Total Partitions: $totalPartitions" -ForegroundColor White
+ Write-LogHost " Completed: $completedCount" -ForegroundColor Green
+ Write-LogHost " Query Created: $queryCreatedCount (will attempt data fetch)" -ForegroundColor Yellow
+ Write-LogHost " Remaining: $($totalPartitions - $completedCount)" -ForegroundColor Cyan
+ Write-LogHost ""
+
+ # ============================================================
+ # RESTORE ALL PARAMETERS FROM CHECKPOINT
+ # ============================================================
+ Write-LogHost "Restoring parameters from checkpoint..." -ForegroundColor DarkGray
+ $cp = $checkpointData.parameters
+
+ # Restore original run timestamp so incremental files use consistent naming
+ # This ensures all partition files (original run + resumes) share the same timestamp
+ if ($checkpointData.runTimestamp) {
+ $global:ScriptRunTimestamp = $checkpointData.runTimestamp
+ Write-LogHost " Restored original run timestamp: $($global:ScriptRunTimestamp)" -ForegroundColor DarkGray
+ }
+
+ # Date range (required) - using locale-safe parsing
+ $parsedStart = script:Parse-DateSafe $cp.startDate
+ if (-not $parsedStart) { throw "Failed to parse checkpoint startDate: $($cp.startDate)" }
+ $StartDate = $parsedStart.ToString('yyyy-MM-dd')
+
+ $parsedEnd = script:Parse-DateSafe $cp.endDate
+ if (-not $parsedEnd) { throw "Failed to parse checkpoint endDate: $($cp.endDate)" }
+ $EndDate = $parsedEnd.ToString('yyyy-MM-dd')
+
+ # Activity/Record filtering
+ if ($cp.activityTypes -and $cp.activityTypes.Count -gt 0) { $ActivityTypes = $cp.activityTypes }
+ if ($cp.recordTypes -and $cp.recordTypes.Count -gt 0) { $RecordTypes = $cp.recordTypes }
+ if ($cp.serviceTypes -and $cp.serviceTypes.Count -gt 0) { $ServiceTypes = $cp.serviceTypes }
+ if ($cp.userIds -and $cp.userIds.Count -gt 0) { $UserIds = $cp.userIds }
+ if ($cp.groupNames -and $cp.groupNames.Count -gt 0) { $GroupNames = $cp.groupNames }
+
+ # Agent filtering
+ if ($cp.agentId -and $cp.agentId.Count -gt 0) { $AgentId = $cp.agentId }
+ if ($cp.agentsOnly) { $AgentsOnly = [switch]$true }
+ if ($cp.excludeAgents) { $ExcludeAgents = [switch]$true }
+
+ # Prompt filtering
+ if ($cp.promptFilter) { $PromptFilter = $cp.promptFilter }
+
+ # Schema/Explosion settings
+ if ($cp.explodeArrays) { $ExplodeArrays = [switch]$true }
+ if ($cp.explodeDeep) { $ExplodeDeep = [switch]$true }
+ if ($cp.flatDepth) { $FlatDepth = $cp.flatDepth }
+ if ($cp.streamingSchemaSample) { $StreamingSchemaSample = $cp.streamingSchemaSample }
+ if ($cp.streamingChunkSize) { $StreamingChunkSize = $cp.streamingChunkSize }
+ # Allow user to override explosion threads on resume (different machine/load)
+ if (-not $PSBoundParameters.ContainsKey('ExplosionThreads') -and $cp.explosionThreads) { $ExplosionThreads = $cp.explosionThreads }
+
+ # M365/User info bundles
+ if ($cp.includeM365Usage) { $IncludeM365Usage = [switch]$true }
+ if ($cp.includeUserInfo) { $IncludeUserInfo = [switch]$true }
+ if ($cp.includeDSPMForAI) { $IncludeDSPMForAI = [switch]$true }
+ if ($cp.includeCopilotInteraction) { $IncludeCopilotInteraction = [switch]$true }
+ if ($cp.excludeCopilotInteraction) { $ExcludeCopilotInteraction = [switch]$true }
+
+ # Partitioning
+ if ($cp.blockHours) { $BlockHours = $cp.blockHours }
+ if ($cp.partitionHours) { $PartitionHours = $cp.partitionHours }
+ if ($cp.maxPartitions) { $MaxPartitions = $cp.maxPartitions }
+
+ # Output settings
+ if ($cp.outputPath) { $OutputPath = $cp.outputPath }
+ if ($cp.exportWorkbook) { $ExportWorkbook = [switch]$true }
+ if ($cp.combineOutput) { $CombineOutput = [switch]$true }
+
+ # Auth - only restore if user didn't override
+ if (-not $PSBoundParameters.ContainsKey('Auth') -and $cp.auth) { $Auth = $cp.auth }
+ if (-not $PSBoundParameters.ContainsKey('TenantId') -and $cp.tenantId) { $TenantId = $cp.tenantId }
+ if (-not $PSBoundParameters.ContainsKey('ClientId') -and $cp.clientId) { $ClientId = $cp.clientId }
+
+ # Other settings
+ if ($cp.resultSize) { $ResultSize = $cp.resultSize }
+ if ($cp.maxConcurrency) { $MaxConcurrency = $cp.maxConcurrency }
+ # MaxMemoryMB: Allow user override on resume (different machine may have different RAM)
+ if (-not $PSBoundParameters.ContainsKey('MaxMemoryMB') -and $null -ne $cp.maxMemoryMB) {
+ $MaxMemoryMB = $cp.maxMemoryMB
+ Write-LogHost " Restored MaxMemoryMB from checkpoint: $MaxMemoryMB" -ForegroundColor DarkGray
+ }
+ if ($cp.useEOM) { $UseEOM = [switch]$true }
+ if ($cp.autoCompleteness) { $AutoCompleteness = [switch]$true }
+ if ($cp.includeTelemetry) { $IncludeTelemetry = [switch]$true }
+
+ # Set the partial output path from checkpoint
+ $checkpointDir = Split-Path $script:CheckpointPath -Parent
+ $script:PartialOutputPath = Join-Path $checkpointDir $checkpointData.outputFiles.partialCsv
+
+ # Set FinalOutputPath by stripping _PARTIAL from the partial path
+ $partialBaseName = [System.IO.Path]::GetFileNameWithoutExtension($script:PartialOutputPath)
+ $partialExt = [System.IO.Path]::GetExtension($script:PartialOutputPath)
+ $finalBaseName = $partialBaseName -replace '_PARTIAL$', ''
+ $script:FinalOutputPath = Join-Path $checkpointDir "${finalBaseName}${partialExt}"
+
+ # Set OutputFile and CsvOutputFile to the partial path for execution
+ $OutputFile = $script:PartialOutputPath
+ # For ExportWorkbook mode, CsvOutputFile must use .csv extension (not .xlsx from PartialOutputPath)
+ if ($ExportWorkbook) {
+ $csvBaseName = [System.IO.Path]::GetFileNameWithoutExtension($script:PartialOutputPath)
+ $script:CsvOutputFile = Join-Path $checkpointDir "${csvBaseName}.csv"
+ } else {
+ $script:CsvOutputFile = $script:PartialOutputPath
+ }
+
+ # Check for incremental data files
+ $incrementalDir = Join-Path $checkpointDir ".pax_incremental"
+ $hasIncrementalData = (Test-Path $incrementalDir) -and @(Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue).Count -gt 0
+
+ if (-not (Test-Path $script:PartialOutputPath) -and -not $hasIncrementalData) {
+ Write-LogHost "WARNING: No partial data found (neither _PARTIAL.csv nor .pax_incremental files)" -ForegroundColor Yellow
+ Write-LogHost " Will start fresh data collection." -ForegroundColor Yellow
+ }
+ elseif ($hasIncrementalData) {
+ $incrementalFiles = @(Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue)
+ Write-LogHost "Found $($incrementalFiles.Count) incremental data file(s) in .pax_incremental/" -ForegroundColor Green
+ }
+
+ # Set log file to match the _PARTIAL output file (deferred from earlier setup)
+ $logBaseName = [System.IO.Path]::GetFileNameWithoutExtension($script:PartialOutputPath)
+ $logDir = Split-Path $script:PartialOutputPath -Parent
+ $script:LogFile = Join-Path $logDir ("{0}.log" -f $logBaseName)
+ $LogFile = $script:LogFile
+
+ # Check if original log exists
+ $logFileExisted = Test-Path $script:LogFile
+
+ # Add clear resume session marker to the log (appends to existing log, or creates new)
+ $resumeMarker = @"
+$(if (-not $logFileExisted) { "=== Portable Audit eXporter (PAX) - Purview Audit Log Exporter ===`n(Original log file was not found - this is a resumed session)`n" })
+============================================================================================================
+ RESUME SESSION STARTED
+============================================================================================================
+ Resume Time (UTC): $((Get-Date).ToUniversalTime().ToString('yyyy-MM-dd HH:mm:ss')) UTC
+ Resume Time (Local): $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')
+ Original Run: $($checkpointData.runTimestamp)
+ Checkpoint File: $(Split-Path $script:CheckpointPath -Leaf)
+ Partitions Completed: $completedCount / $totalPartitions
+ Partitions Remaining: $($totalPartitions - $completedCount)
+============================================================================================================
+
+"@
+ Add-Content -Path $script:LogFile -Value $resumeMarker -Encoding UTF8 -ErrorAction SilentlyContinue
+
+ # Flush any buffered log entries now that log file is set
+ if ($script:LogBuffer -and $script:LogBuffer.Count -gt 0) {
+ foreach ($entry in $script:LogBuffer) {
+ try { Add-Content -Path $script:LogFile -Value $entry -Encoding UTF8 -ErrorAction SilentlyContinue } catch {}
+ }
+ $script:LogBuffer.Clear()
+ }
+
+ # Display restored settings summary
+ Write-LogHost ""
+ Write-LogHost "Restored settings from checkpoint:" -ForegroundColor DarkGray
+ Write-LogHost " Date Range: $StartDate to $EndDate" -ForegroundColor DarkGray
+ # Custom Activities: Show user-specified activity types (not including auto-added CopilotInteraction)
+ $customActivities = @($ActivityTypes | Where-Object { $_ -ne 'CopilotInteraction' })
+ if ($customActivities.Count -gt 0) {
+ Write-LogHost " Custom Activities: $($customActivities -join ', ')" -ForegroundColor DarkGray
+ } else {
+ Write-LogHost " Custom Activities: None" -ForegroundColor DarkGray
+ }
+ # CopilotInteraction status
+ $copilotStatus = if ($ExcludeCopilotInteraction) { 'Excluded' } elseif ($ActivityTypes -contains 'CopilotInteraction') { 'Included' } else { 'Not included' }
+ Write-LogHost " CopilotInteraction: $copilotStatus" -ForegroundColor DarkGray
+ # M365 Usage status
+ $m365Status = if ($IncludeM365Usage) { 'Included' } else { 'Not included' }
+ Write-LogHost " M365 Usage: $m365Status" -ForegroundColor DarkGray
+ if ($ExplodeArrays -or $ExplodeDeep) { Write-LogHost " Explosion: $(if ($ExplodeDeep) { 'ExplodeDeep' } elseif ($ExplodeArrays) { 'ExplodeArrays' })" -ForegroundColor DarkGray }
+ if ($PSBoundParameters.ContainsKey('Auth')) {
+ Write-LogHost " Auth (override): $Auth" -ForegroundColor Yellow
+ } else {
+ Write-LogHost " Auth (restored): $Auth" -ForegroundColor DarkGray
+ }
+ Write-LogHost ""
+
+ Write-LogHost "Resume mode initialized. Will continue from last checkpoint." -ForegroundColor Cyan
+ Write-LogHost ""
+
+ # Re-run MaxMemoryMB resolution after checkpoint restore (may have restored -1 for auto-detect)
+ $script:ResolvedMaxMemoryMB = $MaxMemoryMB
+ if ($MaxMemoryMB -eq -1) {
+ try {
+ $totalRAM = [math]::Round((Get-CimInstance -ClassName Win32_ComputerSystem -ErrorAction SilentlyContinue).TotalPhysicalMemory / 1MB, 0)
+ $script:ResolvedMaxMemoryMB = [math]::Round($totalRAM * 0.75, 0)
+ Write-LogHost "Memory management (resume): Auto-detected ${totalRAM}MB total RAM -> limit $($script:ResolvedMaxMemoryMB)MB (75%)" -ForegroundColor Cyan
+ } catch {
+ $script:ResolvedMaxMemoryMB = 4096
+ Write-LogHost "Memory management (resume): Could not detect system RAM, defaulting to 4096MB limit" -ForegroundColor Yellow
+ }
+ } elseif ($MaxMemoryMB -eq 0) {
+ Write-LogHost "Memory management (resume): DISABLED (-MaxMemoryMB 0)" -ForegroundColor DarkGray
+ } else {
+ Write-LogHost "Memory management (resume): Using $($script:ResolvedMaxMemoryMB)MB limit" -ForegroundColor DarkGray
+ }
+ # Re-evaluate memoryFlushEnabled with restored/resolved value
+ $script:memoryFlushEnabled = ($script:ResolvedMaxMemoryMB -gt 0) -and (-not $ExplodeDeep) -and (-not $ExplodeArrays) -and (-not $ForcedRawInputCsvExplosion)
+ }
+
+ # Authentication and Entra data collection (live mode only)
+ if (-not $RAWInputCSV) {
+ $existingEOM = Get-Module -ListAvailable -Name ExchangeOnlineManagement | Sort-Object Version -Descending | Select-Object -First 1
+ if (-not $existingEOM -and $UseEOM) {
+ Write-LogHost "Installing ExchangeOnlineManagement module..." -ForegroundColor Yellow
+ try { Install-Module -Name ExchangeOnlineManagement -Scope CurrentUser -Force -AllowClobber; Write-LogHost "Module installed successfully." -ForegroundColor Green } catch { Write-LogHost "Failed to install ExchangeOnlineManagement module: $($_.Exception.Message)" -ForegroundColor Red; exit 1 }
+ }
+ if ($UseEOM) {
+ Import-Module ExchangeOnlineManagement -Force
+ }
+
+ # Use unified authentication function
+ Connect-PurviewAudit -AuthMethod $Auth -UseEOMMode $UseEOM
+
+ # Fetch user directory and license data if requested (Graph API mode only)
+ $script:LicenseData = $null
+ $script:EntraUsersData = $null
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ Write-LogHost "Fetching Entra user directory and license data..." -ForegroundColor Cyan
+ $script:LicenseData = Get-UserLicenseData
+ $script:EntraUsersData = Get-EntraUsersData
+ }
+ elseif ($IncludeUserInfo -and $UseEOM) {
+ Write-LogHost "WARNING: -IncludeUserInfo requires Graph API mode (not supported with -UseEOM)" -ForegroundColor Yellow
+ Write-LogHost " EntraUsers output will not be generated" -ForegroundColor Yellow
+ Write-LogHost ""
+ }
+ }
+
+ # Skip all audit log queries when only exporting user data
+ if (-not $OnlyUserInfo) {
+ $allLogs = New-Object System.Collections.ArrayList
+ if ($RAWInputCSV) {
+ Write-LogHost "Replay mode enabled: ingesting raw Purview CSV '$RAWInputCSV' (no Graph/EOM connections)" -ForegroundColor Yellow
+ if (-not (Test-Path $RAWInputCSV)) { Write-LogHost "Replay file not found: $RAWInputCSV" -ForegroundColor Red; exit 1 }
+ $csvData = Import-Csv -Path $RAWInputCSV
+ Write-LogHost ("Replay rows ingested: {0}" -f $csvData.Count) -ForegroundColor DarkGray
+ try {
+ $sampleRow = $csvData | Select-Object -First 1
+ $identity = if ($sampleRow.Id) { $sampleRow.Id } elseif ($sampleRow.RecordId) { $sampleRow.RecordId } else { [guid]::NewGuid().ToString() }
+ $rec = [pscustomobject]@{
+ RecordType = $(try { [int]$sampleRow.RecordType } catch { 0 })
+ CreationDate = $(if ($sampleRow.CreationDate) { $d = script:Parse-DateSafe $sampleRow.CreationDate; if ($d) { $d } else { Get-Date } } else { Get-Date })
+ UserIds = @($sampleRow.UserId)
+ Operations = $sampleRow.Operation
+ ResultStatus = $(try { $sampleRow.ResultStatus } catch { '' })
+ ResultCount = 0
+ Identity = $identity
+ IsValid = $true
+ ObjectState = ''
+ AuditData = $sampleRow.AuditData
+ Operation = $sampleRow.Operation
+ UserId = $sampleRow.UserId
+ }
+ $sampleOut = Convert-ToPurviewExplodedRecords -Record $rec -SkipMetrics
+ # sample row count preview removed (verbosity reduction)
+ # sample columns preview removed (verbosity reduction)
+ } catch {
+ Write-LogHost ("Replay sample conversion failed: {0}" -f $_.Exception.Message) -ForegroundColor DarkYellow
+ }
+ $applyDateFilter = ($PSBoundParameters.ContainsKey('StartDate') -or $PSBoundParameters.ContainsKey('EndDate'))
+ $applyActivityFilter = ($PSBoundParameters.ContainsKey('ActivityTypes') -and $ActivityTypes -and $ActivityTypes.Count -gt 0)
+ $startFilter = $null; $endFilter = $null
+ if ($applyDateFilter) {
+ if ($PSBoundParameters.ContainsKey('StartDate')) { try { $startFilter = [datetime]::ParseExact($StartDate, 'yyyy-MM-dd', $null) } catch {} }
+ if ($PSBoundParameters.ContainsKey('EndDate')) { try { $endFilter = [datetime]::ParseExact($EndDate, 'yyyy-MM-dd', $null) } catch {} }
+ }
+ $activitySet = $null
+ if ($applyActivityFilter) { $activitySet = New-Object System.Collections.Generic.HashSet[string] ([System.StringComparer]::OrdinalIgnoreCase); foreach ($a in $ActivityTypes) { if ($a) { [void]$activitySet.Add($a) } } }
+ $filteredRows = New-Object System.Collections.Generic.List[object]
+ foreach ($row in $csvData) {
+ $keep = $true
+ $creationRaw = $row.CreationDate
+ $creation = if ($creationRaw) { script:Parse-DateSafe $creationRaw } else { $null }
+ if ($applyDateFilter -and $creation) {
+ if ($startFilter -and $creation -lt $startFilter) { $keep = $false }
+ if ($endFilter -and $creation -ge $endFilter) { $keep = $false }
+ }
+ if ($keep -and $applyActivityFilter) {
+ $op = $row.Operation
+ if (-not $op -or -not $activitySet.Contains([string]$op)) { $keep = $false }
+ }
+ if (-not $keep) { continue }
+ $auditData = $row.AuditData
+ $identity = if ($row.Id) { $row.Id } elseif ($row.RecordId) { $row.RecordId } else { [guid]::NewGuid().ToString() }
+ $rec = [pscustomobject]@{
+ RecordType = $(try { [int]$row.RecordType } catch { 0 })
+ CreationDate = $(if ($creation) { $creation } else { Get-Date })
+ UserIds = @($row.UserId)
+ Operations = $row.Operation
+ ResultStatus = $(try { $row.ResultStatus } catch { '' })
+ ResultCount = 0
+ Identity = $identity
+ IsValid = $true
+ ObjectState = ''
+ AuditData = $auditData
+ Operation = $row.Operation
+ UserId = $row.UserId
+ }
+ [void]$filteredRows.Add($row)
+ [void]$allLogs.Add($rec)
+ }
+ $ingested = $allLogs.Count
+ Write-LogHost ("Replay rows after filters: {0}" -f $ingested) -ForegroundColor DarkGray
+ # Inline export for replay: execute immediately, bypass downstream pipeline
+ try {
+ Invoke-ReplayInlineExport -Logs $allLogs
+ return
+ }
+ catch {
+ }
+ $queryPlan = @(); $sequentialGroups = 0; $parallelDecision = @{ Enabled = $false; Reason = 'Replay'; AutoEligible = $false }; $parallelOverallEnabled = $false
+ $script:metrics.TotalRecordsFetched = $ingested
+ $script:progressState.Query.Total = 1; $script:progressState.Query.Current = 1
+ }
+ else {
+ # Live audit log query mode
+
+ # Diagnostic query removed to reduce throttling overhead
+ # Authentication and connectivity will be validated on first actual query
+
+ $script:targetUsers = @()
+ if ($UserIds -or $GroupNames) {
+ Write-LogHost ""; Write-LogHost "User/Group Filtering Enabled:" -ForegroundColor Cyan
+ if ($UserIds) { $script:targetUsers += $UserIds; Write-LogHost " Individual users: $($UserIds.Count)" -ForegroundColor DarkCyan }
+ if ($GroupNames) {
+ Write-LogHost " Expanding groups to individual users..." -ForegroundColor DarkCyan
+ foreach ($group in $GroupNames) {
+ $members = Expand-GroupToUsers -GroupIdentity $group -UseEOMMode $UseEOM
+ if ($members.Count -gt 0) {
+ $script:targetUsers += $members
+ }
+ }
+ }
+ $script:targetUsers = $script:targetUsers | Select-Object -Unique
+ Write-LogHost " Total target users after deduplication: $($script:targetUsers.Count)" -ForegroundColor Green; Write-LogHost ""
+ }
+ $startDateObj = [datetime]::ParseExact($StartDate, 'yyyy-MM-dd', $null)
+ $endDateObj = [datetime]::ParseExact($EndDate, 'yyyy-MM-dd', $null)
+
+ if ($OnlyUserInfo) {
+ Write-LogHost "Fetching Entra user directory and license data only (no audit logs)..." -ForegroundColor Cyan
+ Write-LogHost ""
+ } else {
+ Write-LogHost "Starting enterprise-grade audit log search..." -ForegroundColor Yellow
+ Write-LogHost "Date range: $($startDateObj.ToString('yyyy-MM-dd')) (inclusive) to $($endDateObj.ToString('yyyy-MM-dd')) (exclusive)" -ForegroundColor Gray
+ Write-LogHost "Processing mode: $(if ($ExplodeDeep){'Deep Column Explosion (with Row Explosion)'} elseif ($ExplodeArrays){'Array Explosion'} else {'Standard 1:1'})" -ForegroundColor Gray
+ }
+
+ # Adaptive block sizing only applies to EOM mode (Graph API uses partitioning instead)
+ if ($UseEOM -and -not $OnlyUserInfo) {
+ Write-LogHost ""; Write-LogHost "Initializing adaptive block sizing (EOM mode)..." -ForegroundColor Cyan
+ }
+
+ # --- DSPM for AI: Build final ActivityTypes array (additive logic with exclusion override) ---
+ if ($IncludeDSPMForAI) {
+ Write-LogHost ""; Write-LogHost "=== DSPM for AI Configuration ===" -ForegroundColor Cyan; Write-LogHost ""
+ } else {
+ Write-LogHost "" # preserve a blank spacer line for readability without header
+ }
+
+ $finalActivityTypes = @()
+
+ # Step 1: Add explicit -ActivityTypes parameter values (if provided and not default)
+ if ($PSBoundParameters.ContainsKey('ActivityTypes') -and $ActivityTypes) {
+ foreach ($actType in $ActivityTypes) {
+ if ($actType -and $actType -ne '') {
+ $finalActivityTypes += $actType
+ }
+ }
+ if ($finalActivityTypes.Count -gt 0) {
+ Write-LogHost "Custom ActivityTypes provided: $($finalActivityTypes -join ', ')" -ForegroundColor Gray
+ }
+ }
+
+ # Step 2: Add DSPM for AI activity types if switch enabled
+ if ($IncludeDSPMForAI) {
+ $finalActivityTypes += 'ConnectedAIAppInteraction'
+ $finalActivityTypes += 'AIInteraction'
+ # Only add AIAppInteraction if user didn't decline at PAYG prompt
+ if (-not $script:RemoveAIAppInteraction) {
+ $finalActivityTypes += 'AIAppInteraction'
+ }
+ if ($script:RemoveAIAppInteraction) {
+ Write-LogHost "DSPM for AI: Adding ConnectedAIAppInteraction, AIInteraction (AIAppInteraction removed per user choice)" -ForegroundColor Cyan
+ } else {
+ Write-LogHost "DSPM for AI: Adding ConnectedAIAppInteraction, AIInteraction, AIAppInteraction" -ForegroundColor Cyan
+ }
+ }
+
+ # Step 3: Add CopilotInteraction when explicitly requested
+ if ($IncludeCopilotInteraction -and -not ($finalActivityTypes -contains $copilotBaseActivityType)) {
+ $finalActivityTypes += $copilotBaseActivityType
+ Write-LogHost "IncludeCopilotInteraction: Adding $copilotBaseActivityType (explicit request)" -ForegroundColor Cyan
+ }
+
+ # Step 4: Add Microsoft 365 usage bundle when requested
+ if ($IncludeM365Usage) {
+ $finalActivityTypes += $m365UsageActivityBundle
+ Write-LogHost ("M365 Usage bundle: Adding {0} activity types across Exchange/SharePoint/OneDrive/Teams" -f $m365UsageActivityBundle.Count) -ForegroundColor Cyan
+
+ $RecordTypes = @(
+ if ($RecordTypes) { $RecordTypes }
+ $m365UsageRecordBundle
+ ) | Where-Object { $_ } | Select-Object -Unique
+ if ($RecordTypes.Count -eq 0) { $RecordTypes = $null }
+
+ # CRITICAL: Set ServiceTypes to NULL for IncludeM365Usage mode
+ # This prevents splitting into 4 workload passes (Exchange, SharePoint, OneDrive, Teams)
+ # and instead creates a single workload pass that queries all M365 operations together
+ $ServiceTypes = $null
+ Write-LogHost "M365 Usage bundle: ServiceTypes => NULL (single workload pass)" -ForegroundColor Gray
+
+ if ($RecordTypes) {
+ Write-LogHost "M365 Usage bundle: RecordTypes => $($RecordTypes -join ', ')" -ForegroundColor Gray
+ }
+ }
+ if ($RAWInputCSV) {
+ Invoke-ReplayInlineExport -Logs $allLogs
+ # Skip the rest of the pipeline; replay handled
+ return
+ }
+
+ # Step 5: BASE ACTIVITY TYPE - Add CopilotInteraction as default base type
+ # This is the core Microsoft 365 Copilot activity type (FREE, included in M365 Copilot licensing)
+ # Captures ALL M365 Copilot usage including Teams meetings, Word, Excel, PowerPoint, Outlook, etc.
+ # Auto-add when:
+ # 1. User didn't explicitly provide -ActivityTypes parameter (default behavior), OR
+ # 2. User specified any DSPM switch (implies Copilot context needed)
+ # Exception: Always respect -ExcludeCopilotInteraction (handled in Step 6)
+ $userProvidedCustomTypes = $PSBoundParameters.ContainsKey('ActivityTypes')
+ $userWantsDSPM = $IncludeDSPMForAI
+ if (-not $ExcludeCopilotInteraction) {
+ # Auto-add if no custom types provided OR if DSPM switches used (implies Copilot data needed)
+ if (-not $userProvidedCustomTypes -or $userWantsDSPM) {
+ # Add CopilotInteraction if not already present
+ if (-not ($finalActivityTypes -contains $copilotBaseActivityType)) {
+ $finalActivityTypes = @($copilotBaseActivityType) + $finalActivityTypes
+ }
+ }
+ }
+
+ # Step 6: EXCLUSION OVERRIDE - Remove CopilotInteraction if -ExcludeCopilotInteraction is true
+ if ($ExcludeCopilotInteraction) {
+ $beforeExclusion = $finalActivityTypes.Count
+ $finalActivityTypes = $finalActivityTypes | Where-Object { $_ -ne $copilotBaseActivityType }
+ $afterExclusion = $finalActivityTypes.Count
+ if ($beforeExclusion -ne $afterExclusion) {
+ $removedCount = $beforeExclusion - $afterExclusion
+ Write-LogHost "EXCLUSION: Removed $removedCount M365 Copilot activity type (ExcludeCopilotInteraction switch)" -ForegroundColor Red
+ }
+ else {
+ Write-LogHost "EXCLUSION: No M365 Copilot type in list (ExcludeCopilotInteraction switch active)" -ForegroundColor DarkGray
+ }
+ }
+
+ # Step 7: Deduplicate
+ $finalActivityTypes = $finalActivityTypes | Select-Object -Unique
+
+ # Step 8: Validate array not empty
+ if ($finalActivityTypes.Count -eq 0) {
+ Write-LogHost ""
+ Write-LogHost "ERROR: No activity types remain after processing." -ForegroundColor Red
+ Write-LogHost "You excluded CopilotInteraction but provided no other activity types." -ForegroundColor Yellow
+ Write-LogHost "Please specify activity types using -ActivityTypes or -IncludeDSPMForAI switch." -ForegroundColor Yellow
+ Write-LogHost ""
+ exit 1
+ }
+
+ # Step 9: Apply to ActivityTypes variable
+ $ActivityTypes = $finalActivityTypes
+
+ # Step 9b: Update Parameter Snapshot with final ActivityTypes (if it exists)
+ if ($paramSnapshot -and $paramSnapshot.Contains('ActivityTypes')) {
+ $paramSnapshot['ActivityTypes'] = ($ActivityTypes -join ';')
+ }
+
+ # Step 10b: Display detailed output filenames now that ActivityTypes is finalized (if activity type switches were used)
+ if ($IncludeDSPMForAI -or $ExcludeCopilotInteraction) {
+ Write-LogHost ""
+ Write-LogHost "=== Output Files ===" -ForegroundColor Cyan
+ if ($ExportWorkbook) {
+ # Excel mode
+ $outputDir = if ($OutputPath) { $OutputPath } else { "C:\Temp\" }
+ if ($CombineOutput) {
+ $baseName = "Purview_Audit_CombinedUsageActivity"
+ if ($IncludeUserInfo -and -not $UseEOM) { $baseName += "_EntraUsers" }
+ $excelDescriptor = if ($IncludeUserInfo -and -not $UseEOM) { 'multi-tab workbook (CombinedActivity + EntraUsers_MAClicensing)' } else { 'single-tab workbook' }
+ Write-LogHost "Output File: ${outputDir}${baseName}_.xlsx ($excelDescriptor)" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) { Write-LogHost " Tabs: CombinedActivity, EntraUsers_MAClicensing" -ForegroundColor Gray }
+ } else {
+ Write-LogHost "Output File: ${outputDir}Purview_Audit_MultiTab_.xlsx (multi-tab workbook)" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) { Write-LogHost " Entra Users Tab: EntraUsers_MAClicensing" -ForegroundColor Gray }
+ }
+ } else {
+ # CSV mode
+ if ($CombineOutput) {
+ # Single combined CSV file
+ $displayPath = if ($script:FinalOutputPath) { $script:FinalOutputPath } else { $OutputFile }
+ Write-LogHost "Output File: $displayPath (combined - all activity types)" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) { $entraFileLater = (Join-Path (Split-Path $displayPath -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"); Write-LogHost " Entra Users File: $entraFileLater" -ForegroundColor Gray }
+ } else {
+ # Separate CSV files per activity type
+ $outputDir = Split-Path $OutputFile -Parent
+ $timestamp = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile) -replace '.*_(\d{8}_\d{6}).*', '$1'
+ Write-LogHost "Output Directory: $outputDir\" -ForegroundColor White
+ Write-LogHost "Output Files: ${outputDir}\Purview_Audit__${timestamp}.csv" -ForegroundColor Gray
+ if ($IncludeUserInfo -and -not $UseEOM) { $entraFileSplit = "${outputDir}\EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"; Write-LogHost " Entra Users: $entraFileSplit" -ForegroundColor Gray }
+ }
+ }
+ }
+
+ # Display Parameter Snapshot (now shows final ActivityTypes after DSPM processing)
+ Write-LogHost ""
+ Write-LogHost "Parameter Snapshot:" -ForegroundColor Cyan
+ foreach ($k in $paramSnapshot.Keys) { Write-LogHost (" {0} = {1}" -f $k, $paramSnapshot[$k]) -ForegroundColor DarkGray }
+ Write-LogHost ""
+
+ # Step 10: Display DSPM-specific options (only if DSPM switches are used)
+ if ($IncludeDSPMForAI -or $ExcludeCopilotInteraction) {
+ Write-LogHost ""
+ Write-LogHost "=== DSPM for AI Options ===" -ForegroundColor Cyan
+ if ($IncludeDSPMForAI) {
+ Write-LogHost " DSPM for AI activity types enabled (See billing information for details)" -ForegroundColor Cyan
+ }
+ if ($ExcludeCopilotInteraction) {
+ Write-LogHost " [!] M365 Copilot activity type excluded (CopilotInteraction)" -ForegroundColor Red
+ }
+} # Step 11: Log conflict resolution if it occurred
+ if ($script:ConflictResolved) {
+ Write-LogHost ""
+ Write-LogHost "Conflict Resolution:" -ForegroundColor Yellow
+ if ($script:ConflictChoice -eq 'INCLUDE') {
+ Write-LogHost " User resolved conflict by choosing to INCLUDE M365 Copilot activity type." -ForegroundColor Green
+ Write-LogHost " -ExcludeCopilotInteraction switch overridden." -ForegroundColor Green
+ }
+ elseif ($script:ConflictChoice -eq 'EXCLUDE') {
+ Write-LogHost " User resolved conflict by choosing to EXCLUDE M365 Copilot activity type." -ForegroundColor Red
+ Write-LogHost " CopilotInteraction removed from ActivityTypes." -ForegroundColor Red
+ }
+ elseif ($script:ConflictChoice -eq 'EXCLUDE (Force mode)') {
+ Write-LogHost " Force mode enabled: M365 Copilot activity type excluded without prompt." -ForegroundColor Red
+ Write-LogHost " CopilotInteraction removed from ActivityTypes." -ForegroundColor Red
+ }
+ Write-LogHost "===================================" -ForegroundColor Cyan
+}
+
+ # --- Validate AppendFile requires single-file output ---
+ if ($AppendFile) {
+ # Count activity types being processed
+ $activityTypeCount = $ActivityTypes.Count
+
+ # Validate single-file output requirement
+ $isExcelMode = $ExportWorkbook.IsPresent
+ $isCombineMode = $CombineOutput.IsPresent
+ $isSingleActivity = ($activityTypeCount -eq 1)
+
+ if (-not ($isExcelMode -or $isCombineMode -or $isSingleActivity)) {
+ Write-Host "ERROR: -AppendFile requires single-file output mode" -ForegroundColor Red
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "You have $activityTypeCount activity types selected, which would create multiple output files." -ForegroundColor Yellow
+ Write-Host "Activity types: $($ActivityTypes -join ', ')" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Solutions (choose one):" -ForegroundColor Green
+ Write-Host " 1. Add -ExportWorkbook to create multi-tab Excel file (recommended)" -ForegroundColor Green
+ Write-Host " 2. Add -CombineOutput to merge all types into single CSV file" -ForegroundColor Green
+ Write-Host " 3. Specify only ONE activity type (use -ActivityTypes parameter)" -ForegroundColor Green
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Examples:" -ForegroundColor Cyan
+ Write-Host " # Excel multi-tab (all activity types in one .xlsx file):" -ForegroundColor DarkGray
+ Write-Host " -AppendFile 'Report.xlsx' -ExportWorkbook" -ForegroundColor White
+ Write-Host "" -ForegroundColor DarkGray
+ Write-Host " # Combined CSV (all activity types merged into one .csv file):" -ForegroundColor DarkGray
+ Write-Host " -AppendFile 'Report.csv' -CombineOutput" -ForegroundColor White
+ Write-Host "" -ForegroundColor DarkGray
+ Write-Host " # Single activity type:" -ForegroundColor DarkGray
+ Write-Host " -AppendFile 'Report.csv' -ActivityTypes CopilotInteraction" -ForegroundColor White
+ exit 1
+ }
+
+ Write-LogHost "AppendFile validation: Single-file output confirmed" -ForegroundColor Green
+ if ($isExcelMode) {
+ Write-LogHost " Mode: Excel multi-tab workbook" -ForegroundColor DarkGray
+ } elseif ($isCombineMode) {
+ Write-LogHost " Mode: Combined CSV (all activity types merged)" -ForegroundColor DarkGray
+ } elseif ($isSingleActivity) {
+ Write-LogHost " Mode: Single activity type ($($ActivityTypes[0]))" -ForegroundColor DarkGray
+ }
+ Write-LogHost ""
+ }
+ # --- End AppendFile Validation ---
+
+ # --- DSPM for AI: Excel Export Data Storage Initialization ---
+ if ($ExportWorkbook) {
+ $isDSPMEnabled = Test-DSPMFeaturesEnabled
+
+ if ($CombineOutput) {
+ # Single-tab mode: Store all rows in one array
+ $combinedTabName = if ($isDSPMEnabled) { 'Combined_DSPM_Data' } else { 'CombinedUsageActivity' }
+ $script:ExcelExportData = @{
+ $combinedTabName = @()
+ }
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ Write-LogHost "Excel export: Combined mode (multi-tab: $combinedTabName + EntraUsers_MAClicensing)" -ForegroundColor Cyan
+ } else {
+ Write-LogHost "Excel export: Combined mode (single tab: $combinedTabName)" -ForegroundColor Cyan
+ }
+ } else {
+ # Multi-tab mode: Store rows by activity type
+ $script:ExcelExportData = @{}
+ Write-LogHost "Excel export: Multi-tab mode (separate tab per activity type)" -ForegroundColor Cyan
+ }
+ }
+ # --- End Excel Export Initialization ---
+
+ # === Activity Types for This Run ===
+ if (-not $OnlyUserInfo) {
+Write-LogHost ""
+Write-LogHost "=== Activity Types for This Run ===" -ForegroundColor Cyan
+# Sort activity types: non-DSPM first, then DSPM types at the bottom
+$dspmTypes = @('AIAppInteraction', 'AIInteraction', 'ConnectedAIAppInteraction')
+$nonDspmTypes = $ActivityTypes | Where-Object { $_ -notin $dspmTypes }
+$dspmInList = $ActivityTypes | Where-Object { $_ -in $dspmTypes }
+$sortedActivityTypes = @($nonDspmTypes) + @($dspmInList)
+foreach ($act in $sortedActivityTypes) {
+ Write-LogHost " • $act" -ForegroundColor White
+}
+Write-LogHost "" # Output mode display with format-specific defaults
+ # CSV: Default SEPARATE (granular analysis - separate files per activity type)
+ # Excel: Default SEPARATE (separate tabs per activity type)
+ # Both: -CombineOutput switch forces combined mode
+
+ # Determine effective combine mode based on format and user input
+ if ($CombineOutput) {
+ # User explicitly specified -CombineOutput switch: use combined mode for both CSV and Excel
+ $csvCombineMode = $true
+ $excelSeparateMode = $false
+ } else {
+ # User didn't specify: use format-specific defaults (both default to separate)
+ $csvCombineMode = $false # CSV defaults to separate files
+ $excelSeparateMode = $true # Excel defaults to separated tabs (inverse logic)
+ }
+
+ if ($ExportWorkbook) {
+ $outputMode = if ($excelSeparateMode) {
+ 'Separated tabs (one tab per activity type)'
+ } else {
+ 'Combined (single tab with all activity types)'
+ }
+ } else {
+ $outputMode = if ($csvCombineMode) {
+ 'Combined (single CSV file with all activity types)'
+ } else {
+ 'Separated files (one CSV per activity type)'
+ }
+ }
+
+ Write-LogHost "Output mode: $outputMode" -ForegroundColor Gray
+ if (-not $ExportWorkbook) {
+ if ($csvCombineMode) {
+ $displayPath = if ($script:FinalOutputPath) { $script:FinalOutputPath } else { $OutputFile }
+ $outputDir = Split-Path $displayPath -Parent
+ Write-LogHost "Output file: $displayPath" -ForegroundColor Gray
+ if ($IncludeUserInfo) { Write-LogHost "EntraUsers file (separate): $($OutputDir)\EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv" -ForegroundColor Gray }
+ } else {
+ $outputDir = Split-Path $OutputFile -Parent
+ # Prefix for per-activity CSV files
+ $filePrefix = "Purview_Audit"
+ Write-LogHost "Output directory: $outputDir" -ForegroundColor Gray
+ Write-LogHost "Activity file pattern: ${filePrefix}__${global:ScriptRunTimestamp}.csv" -ForegroundColor Gray
+ if ($IncludeUserInfo) { Write-LogHost "EntraUsers file: EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv" -ForegroundColor Gray }
+ }
+ }
+ Write-LogHost "===================================" -ForegroundColor Cyan
+ } # End if (-not $OnlyUserInfo) - Activity types display and output configuration
+
+ Write-LogHost ""
+
+ if (-not $RAWInputCSV) {
+ $allLogs = New-Object System.Collections.ArrayList
+ if (-not $UseEOM -and $ServiceTypes -and $ServiceTypes.Count -gt 0) {
+ $serviceRuns = $ServiceTypes
+ } else {
+ $serviceRuns = @($null)
+ }
+ if (-not $UseEOM -and $serviceRuns.Count -gt 1) {
+ Write-LogHost ("NOTE: security audit API accepts a single workload per query. Splitting run into {0} workload pass(es): {1}" -f $serviceRuns.Count, ($serviceRuns -join ', ')) -ForegroundColor Yellow
+ }
+ $servicePassIndex = 0
+ }
+ else {
+ if (-not $serviceRuns) { $serviceRuns = @($null) }
+ }
+ foreach ($currentServiceFilter in $serviceRuns) {
+ $servicePassIndex++
+ if (-not $UseEOM -and $currentServiceFilter) {
+ $workloadLabel = $currentServiceFilter
+ # Enhanced workload label when first Exchange pass includes cross-workload M365 usage types
+ if ($IncludeM365Usage -and $currentServiceFilter -eq 'Exchange' -and $servicePassIndex -eq 1) {
+ $workloadLabel = "$currentServiceFilter + M365 Usage (Office apps, Forms, Stream, Planner, PowerApps)"
+ }
+ Write-LogHost ("--- Processing workload {0}/{1}: {2} ---" -f $servicePassIndex, $serviceRuns.Count, $workloadLabel) -ForegroundColor DarkCyan
+ }
+
+ $serviceActivities = $ActivityTypes
+ if ($currentServiceFilter -and $serviceOperationMap.ContainsKey($currentServiceFilter)) {
+ $mappedOps = @($serviceOperationMap[$currentServiceFilter] | Where-Object { $ActivityTypes -contains $_ })
+ if ($mappedOps.Count -gt 0 -and $mappedOps.Count -lt $ActivityTypes.Count) {
+ $otherOps = @($ActivityTypes | Where-Object { $mappedOps -notcontains $_ })
+ if ($otherOps.Count -gt 0) {
+ Write-LogHost (" Aligning operationFilters for {0}: queued for this workload only; other operations run in their own pass -> {1}" -f $currentServiceFilter, ($otherOps -join ', ')) -ForegroundColor DarkGray
+ }
+ }
+ if ($mappedOps.Count -gt 0) {
+ $serviceActivities = $mappedOps
+ }
+ }
+
+ $serviceRecordTypes = $RecordTypes
+ if ($currentServiceFilter -and $RecordTypes) {
+ $matched = @()
+ $unmatched = @()
+ foreach ($rt in $RecordTypes) {
+ $rtServices = $recordTypeWorkloadMap[$rt]
+ if ($null -eq $rtServices -or $rtServices.Count -eq 0) {
+ $matched += $rt
+ continue
+ }
+
+ if ($rtServices -contains $currentServiceFilter) {
+ $matched += $rt
+ } else {
+ $unmatched += $rt
+ }
+ }
+ if ($matched.Count -gt 0) {
+ # Enhanced output for M365 Usage bundle in first Exchange pass
+ if ($IncludeM365Usage -and $currentServiceFilter -eq 'Exchange' -and $servicePassIndex -eq 1) {
+ $m365RecordTypes = @('OfficeNative','MicrosoftForms','MicrosoftStream','PlannerPlan','PlannerTask','PowerAppsApp')
+ $m365Matched = @($matched | Where-Object { $m365RecordTypes -contains $_ })
+ $exchangeMatched = @($matched | Where-Object { $m365RecordTypes -notcontains $_ })
+
+ if ($exchangeMatched.Count -gt 0) {
+ Write-LogHost (" Applying recordTypeFilters for Exchange -> {0}" -f ($exchangeMatched -join ', ')) -ForegroundColor DarkGray
+ }
+ if ($m365Matched.Count -gt 0) {
+ Write-LogHost (" Including M365 Usage recordTypes (cross-workload) -> {0}" -f ($m365Matched -join ', ')) -ForegroundColor Cyan
+ }
+ } else {
+ Write-LogHost (" Applying recordTypeFilters for {0} -> {1}" -f $currentServiceFilter, ($matched -join ', ')) -ForegroundColor DarkGray
+ }
+ $serviceRecordTypes = $matched | Select-Object -Unique
+ } else {
+ $serviceRecordTypes = $null
+ }
+
+ if ($unmatched.Count -gt 0) {
+ Write-LogHost (" Queueing recordTypeFilters for future workload pass(es) -> {0}" -f ($unmatched -join ', ')) -ForegroundColor DarkGray
+ }
+ }
+
+ $script:CurrentServiceFilter = $currentServiceFilter
+ $queryPlan = @(Get-QueryPlan -RequestedActivities $serviceActivities) # Force array wrapper
+ $script:progressBlocksCompleted = 0; $script:progressBlockHoursSum = 0.0
+ $script:progressState.Query.Current = 0
+
+ # Initial rough estimate for display - will be updated as each group is processed
+ $totalEstimatedBlocks = $queryPlan.Count
+ $script:progressState.Query.Total = [int]$totalEstimatedBlocks
+ Set-ProgressPhase -Phase 'Query' -Status "Planning queries: $($queryPlan.Count) groups"
+ Write-LogHost "Planned $($queryPlan.Count) query groups" -ForegroundColor DarkCyan
+ $sequentialGroups = 0
+ $ps7 = ($PSVersionTable.PSVersion.Major -ge 7)
+ if (-not $ps7 -and $ParallelMode -ne 'Off') { $ParallelMode = 'Off' }
+ $parallelDecision = Get-ParallelActivationDecision -QueryPlan $queryPlan -ParallelMode $ParallelMode -MaxParallelGroups $MaxParallelGroups -MaxConcurrency $MaxConcurrency
+ $parallelOverallEnabled = $parallelDecision.Enabled
+ Write-LogHost ("ParallelMode requested: {0} | Effective: {1} ({2})" -f $ParallelMode, ($(if ($parallelOverallEnabled) { 'Enabled' } else { 'Disabled' })), $parallelDecision.Reason) -ForegroundColor DarkCyan
+ if ($ParallelMode -eq 'Auto' -and -not $parallelOverallEnabled) { Write-LogHost "WARNING: ParallelMode Auto requested but heuristics not met -> running sequential. Reason: $($parallelDecision.Reason)." -ForegroundColor Yellow }
+ if ($enableParallelSwitchUsed) { Write-LogHost "-EnableParallel switch detected -> setting ParallelMode to On" -ForegroundColor DarkYellow }
+ $groupIndex = 0
+ foreach ($grp in $queryPlan) {
+ $groupIndex++
+
+ # Calculate partition hours using default or user override (Graph API mode only)
+ $effectivePartitionHours = $PartitionHours
+ $userSpecifiedPartitionHours = ($PartitionHours -gt 0)
+ if (-not $UseEOM -and $effectivePartitionHours -eq 0) {
+ # Default to 12-hour partitions for Graph API mode
+ $effectivePartitionHours = 12
+ if ($paramSnapshot -and $paramSnapshot.Contains('PartitionHours')) {
+ $paramSnapshot['PartitionHours'] = "${effectivePartitionHours} (auto)"
+ }
+ }
+
+ # Calculate partitions from PartitionHours (Graph API) or use degree of parallelism (EOM)
+ if (-not $UseEOM -and $effectivePartitionHours -gt 0) {
+ $totalHours = ($endDateObj - $startDateObj).TotalHours
+ $calculatedPartitions = [Math]::Ceiling($totalHours / $effectivePartitionHours)
+
+ # Apply MaxPartitions cap with auto-adjustment
+ if ($calculatedPartitions -gt $MaxPartitions) {
+ if (-not $userSpecifiedPartitionHours) {
+ # User didn't specify PartitionHours, so auto-adjust to 12h to stay under cap
+ $effectivePartitionHours = 12
+ $calculatedPartitions = [Math]::Ceiling($totalHours / $effectivePartitionHours)
+ Write-LogHost " Auto-adjusted to ${effectivePartitionHours}h partitions to respect MaxPartitions cap ($MaxPartitions)" -ForegroundColor Cyan
+ }
+ # If still over cap after adjustment, hard cap it
+ if ($calculatedPartitions -gt $MaxPartitions) {
+ $calculatedPartitions = $MaxPartitions
+ # CRITICAL: Recalculate effectivePartitionHours to ensure equal time slices
+ $effectivePartitionHours = $totalHours / $calculatedPartitions
+ Write-LogHost " Applying MaxPartitions cap: $calculatedPartitions partitions @ ${effectivePartitionHours}h each" -ForegroundColor Magenta
+ }
+ }
+
+ $degree = $calculatedPartitions
+ } else {
+ # EOM mode or PartitionHours not enabled: use concurrency-based degree
+ $degree = [Math]::Min($grp.Concurrency, $MaxConcurrency)
+ }
+
+ # Large query warning (Graph API mode only)
+ if (-not $UseEOM -and $effectivePartitionHours -gt 0) {
+ $daySpan = ($endDateObj - $startDateObj).TotalDays
+ if ($daySpan -gt 30 -or $degree -gt 50) {
+ Write-LogHost ""
+ Write-LogHost " [!] LARGE QUERY DETECTED" -ForegroundColor Yellow
+ Write-LogHost " Date Range: $([Math]::Round($daySpan, 1)) days | Partitions: $degree @ ${effectivePartitionHours}h" -ForegroundColor Yellow
+ Write-LogHost " Large queries may take several hours to complete." -ForegroundColor Yellow
+ Write-LogHost " Smaller date ranges or -PartitionHours will reduce processing time." -ForegroundColor Yellow
+
+ # Warn about interactive auth token expiration for long-running queries
+ $authMethod = $script:SharedAuthState.AuthMethod
+ if ($authMethod -in 'weblogin', 'devicecode') {
+ Write-LogHost ""
+ Write-LogHost " ℹ️ TOKEN REFRESH NOTE (Interactive Auth)" -ForegroundColor Cyan
+ Write-LogHost " Tokens expire after ~60-90 minutes. For queries exceeding 1 hour:" -ForegroundColor Cyan
+ Write-LogHost " • Token will be automatically refreshed from MSAL cache" -ForegroundColor Gray
+ Write-LogHost " • If cache is unavailable, browser prompt may appear" -ForegroundColor Gray
+ Write-LogHost " • Progress is saved automatically - use -Resume if interrupted" -ForegroundColor Gray
+ }
+ Write-LogHost ""
+ }
+ }
+
+ if (-not $UseEOM) {
+ # For single query group, show simplified message without listing all activity types
+ # (activity types are already shown in "Activity Types for This Run" section)
+ if ($queryPlan.Count -eq 1) {
+ Write-LogHost "Query Group: Combined ($($grp.Activities.Count) activity types, partitions=$degree @ ${effectivePartitionHours}h)" -ForegroundColor Yellow
+ } else {
+ Write-LogHost "Group: $($grp.Name) (partitions=$degree @ ${effectivePartitionHours}h)" -ForegroundColor Yellow
+ # Show activity types per group when there are multiple groups
+ Write-LogHost " Activity Types: $($grp.Activities -join ', ')" -ForegroundColor Gray
+ }
+ }
+ $requestedDegree = $degree
+ $totalPartitions = $degree # Total number of time partitions to create
+ $maxConcurrentPartitions = $degree # Maximum concurrent execution
+ if ($degree -gt $MaxConcurrency) {
+ $maxConcurrentPartitions = $MaxConcurrency # Cap concurrent execution
+ try { $script:metrics.PartitionCapsApplied++; if ($script:metrics.PartitionCapHighestRequested -lt $requestedDegree) { $script:metrics.PartitionCapHighestRequested = $requestedDegree } } catch {}
+ Write-LogHost " Applying concurrency cap ($MaxConcurrency): requested $requestedDegree -> $maxConcurrentPartitions concurrent (all $totalPartitions queued)" -ForegroundColor Magenta
+ }
+ $withinCap = $groupIndex -le $MaxParallelGroups
+ $canParallel = $parallelOverallEnabled -and $withinCap -and ($PSVersionTable.PSVersion.Major -ge 7) -and ($degree -gt 1)
+
+ # Update progress total now that we know parallel mode and partition count
+ # For parallel: each partition = 1 progress unit, For sequential: use BlockHours
+ if ($canParallel) {
+ # Parallel mode: total progress = number of partitions
+ $script:progressState.Query.Total = $degree
+ } else {
+ # Sequential mode: calculate blocks based on BlockHours
+ $sequentialBlocks = 0
+ foreach ($act in $grp.Activities) {
+ try {
+ $initialBlock = Get-OptimalBlockSize -ActivityType $act
+ if (-not $initialBlock -or $initialBlock -le 0) { $initialBlock = $BlockHours }
+ $rangeHours = ($endDateObj - $startDateObj).TotalHours
+ $blocks = [int][Math]::Ceiling($rangeHours / $initialBlock)
+ if ($blocks -lt 1) { $blocks = 1 }
+ $sequentialBlocks += $blocks
+ } catch {
+ $sequentialBlocks += 1
+ }
+ }
+ if ($sequentialBlocks -lt 1) { $sequentialBlocks = 1 }
+ $script:progressState.Query.Total = $sequentialBlocks
+ }
+
+ # Adaptive memory pressure logic - ONLY for EOM mode (Graph API has low memory footprint)
+ if (-not $DisableAdaptive -and $UseEOM) {
+ try {
+ $workingSetMB = [math]::Round(([System.Diagnostics.Process]::GetCurrentProcess().WorkingSet64 / 1MB),0)
+ if ($workingSetMB -gt $MemoryPressureMB -and $MaxConcurrency -gt 1) {
+ $old = $MaxConcurrency
+ $MaxConcurrency = [Math]::Max(1, $MaxConcurrency - 1)
+ # Reduce concurrent execution, not total partitions
+ if ($maxConcurrentPartitions -gt $MaxConcurrency) { $maxConcurrentPartitions = $MaxConcurrency }
+ $script:metrics.AdaptiveMemoryReductions++
+ $script:metrics.AdaptiveEvents += "Memory pressure detected (${workingSetMB}MB > ${MemoryPressureMB}MB) reduced MaxConcurrency $old -> $MaxConcurrency"
+ Write-LogHost "Adaptive: Memory pressure ($workingSetMB MB) reducing MaxConcurrency to $MaxConcurrency" -ForegroundColor DarkYellow
+ }
+ } catch {}
+ }
+
+ # Concurrency guidance handled earlier; no additional adjustment section required
+
+ # Graph API mode: Can query multiple activities in single request
+ # EOM mode: Should only have one activity per group (enforced by Get-QueryPlan)
+ $activities = $grp.Activities # Array of activity types for this group
+ $activity = $grp.Activities[0] # Backward compatibility for single-activity logic
+
+ $partitions = @()
+ if ($totalPartitions -gt 1) {
+ $totalHours = ($endDateObj - $startDateObj).TotalHours
+ # Use effectivePartitionHours for Graph API mode, or calculate from totalPartitions for EOM mode
+ if (-not $UseEOM -and $effectivePartitionHours -gt 0) {
+ $sliceHours = $effectivePartitionHours
+ } else {
+ $sliceHours = $totalHours / $totalPartitions
+ }
+
+ # CRITICAL: When IncludeM365Usage is active, NEVER send recordTypes or serviceFilter
+ # Graph API rejects mixed cross-workload recordTypes with workload-specific serviceFilter
+ $partitionRecordTypes = $serviceRecordTypes
+ $partitionServiceFilter = $currentServiceFilter
+ if ($IncludeM365Usage) {
+ $partitionRecordTypes = $null
+ $partitionServiceFilter = $null
+ }
+
+ for ($pi = 0; $pi -lt $totalPartitions; $pi++) {
+ $pStart = $startDateObj.AddHours($sliceHours * $pi)
+ $pEnd = if ($pi -eq ($totalPartitions - 1)) { $endDateObj } else { $startDateObj.AddHours($sliceHours * ($pi + 1)) }
+ $partitions += [pscustomobject]@{
+ Activities = $activities # Pass all activities for Graph API
+ Activity = $activity # Backward compatibility
+ PStart = $pStart
+ PEnd = $pEnd
+ Index = ($pi + 1)
+ Total = $totalPartitions
+ RecordTypes = $partitionRecordTypes
+ ServiceFilter = $partitionServiceFilter
+ }
+ }
+ } else {
+ $partitions += [pscustomobject]@{
+ Activities = $activities # Pass all activities for Graph API
+ Activity = $activity # Backward compatibility
+ PStart = $startDateObj
+ PEnd = $endDateObj
+ Index = 1
+ Total = 1
+ RecordTypes = $partitionRecordTypes
+ ServiceFilter = $partitionServiceFilter
+ }
+ }
+
+ # ========================================
+ # RESUME MODE: FILTER PARTITIONS
+ # ========================================
+ # Skip already-completed partitions from checkpoint
+ $originalPartitionCount = $partitions.Count
+ $skippedPartitions = @()
+ $fetchOnlyPartitions = @()
+
+ if ($script:CheckpointData -and $script:IsResumeMode) {
+ $partitionCategories = Get-PartitionsToProcess -AllPartitions $partitions
+
+ if ($partitionCategories.ToSkip.Count -gt 0) {
+ $skippedPartitions = $partitionCategories.ToSkip
+ # Store the originally-skipped partition indices for summary display
+ # (This is captured BEFORE processing, so it only includes checkpoint-completed partitions)
+ $script:OriginallySkippedPartitionIndices = @($skippedPartitions | ForEach-Object { $_.Index })
+ Write-LogHost " [RESUME] Skipping $($skippedPartitions.Count) already-completed partition(s): $($skippedPartitions.Index -join ', ')" -ForegroundColor Green
+ }
+
+ if ($partitionCategories.ToFetchOnly.Count -gt 0) {
+ $fetchOnlyPartitions = $partitionCategories.ToFetchOnly
+ Write-LogHost " [RESUME] $($fetchOnlyPartitions.Count) partition(s) have pending queries - will fetch data only" -ForegroundColor Yellow
+ }
+
+ if ($partitionCategories.ToCreateAndFetch.Count -gt 0) {
+ Write-LogHost " [RESUME] $($partitionCategories.ToCreateAndFetch.Count) partition(s) need full processing" -ForegroundColor Cyan
+ }
+
+ # Replace partitions with only those needing work (fetch-only + create-and-fetch)
+ $partitions = @()
+ $partitions += $partitionCategories.ToFetchOnly
+ $partitions += $partitionCategories.ToCreateAndFetch
+
+ if ($partitions.Count -eq 0) {
+ Write-LogHost " [RESUME] All partitions already completed! Skipping query group." -ForegroundColor Green
+ continue # Skip to next query group
+ }
+
+ Write-LogHost " [RESUME] Processing $($partitions.Count) of $originalPartitionCount partition(s)" -ForegroundColor Cyan
+ }
+
+ # Parallel processing using Start-ThreadJob
+ # CRITICAL: EOM mode is NOT compatible with parallel processing (implicit remoting session conflicts)
+ # Graph API mode uses REST calls which are thread-safe and session-independent
+ if ($canParallel) {
+ # ========================================
+ # DUAL-MODE PARALLEL VALIDATION
+ # ========================================
+
+ if ($UseEOM) {
+ # EOM mode + parallel = guaranteed failure due to implicit remoting
+ Write-LogHost " ERROR: Parallel processing is not compatible with -UseEOM mode" -ForegroundColor Red
+ Write-LogHost " Reason: Exchange Online implicit remoting cannot be safely shared across ThreadJobs" -ForegroundColor Yellow
+ Write-LogHost " This combination should have been blocked by validation - falling back to sequential" -ForegroundColor Yellow
+ $canParallel = $false
+ }
+ else {
+ # Graph API mode - parallel is safe
+ Write-LogHost " Processing partitions in parallel (Graph API ThreadJobs, Max=$maxConcurrentPartitions)..." -ForegroundColor Cyan
+ }
+ }
+
+ if ($canParallel) {
+ try {
+ # ========================================
+ # GRAPH API PARALLEL EXECUTION
+ # ========================================
+ # Uses ThreadJobs with REST API calls (no session dependency)
+
+ # Get Graph context and access token to pass to threads
+ $mgContext = Get-MgContext -ErrorAction Stop
+ if (-not $mgContext) {
+ Write-LogHost " ERROR: Not connected to Microsoft Graph" -ForegroundColor Red
+ $canParallel = $false
+ }
+
+ # Get access token for Graph API calls in threads
+ $accessToken = $null
+ try {
+ # For AppRegistration auth ONLY: proactively refresh if token is approaching expiration
+ # AppRegistration can refresh automatically without user interaction
+ # Token lifetime is typically 60-90 minutes; refresh proactively at 30 minutes for safety buffer
+ if ($script:AuthConfig.CanReauthenticate -and $script:AuthConfig.Method -eq 'AppRegistration') {
+ $tokenAge = $null
+ if ($script:AuthConfig.TokenIssueTime) {
+ $tokenAge = (Get-Date) - $script:AuthConfig.TokenIssueTime
+ }
+
+ # Refresh if token is older than 30 minutes (proactive, well before ~60 min expiry)
+ if ($tokenAge -and $tokenAge.TotalMinutes -gt 30) {
+ Write-LogHost " [TOKEN] Token age: $([Math]::Round($tokenAge.TotalMinutes, 1)) minutes - proactively refreshing..." -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $accessToken = $refreshResult.NewToken
+ $script:AuthConfig.TokenIssueTime = Get-Date # Reset age timer
+ Write-LogHost " [TOKEN] Fresh token obtained for partition launch" -ForegroundColor Cyan
+ }
+ else {
+ Write-LogHost " [TOKEN] Proactive refresh failed, using current token: $($refreshResult.Message)" -ForegroundColor Yellow
+ }
+ }
+ elseif ($tokenAge) {
+ Write-LogHost " [TOKEN] Token age: $([Math]::Round($tokenAge.TotalMinutes, 1)) minutes - using current token" -ForegroundColor DarkGray
+ }
+ }
+
+ # If we don't have a token yet, use reliable extraction helper
+ if (-not $accessToken) {
+ $accessToken = Get-GraphAccessToken
+ }
+ }
+ catch {
+ Write-LogHost " WARNING: Could not retrieve access token, parallel execution may fail" -ForegroundColor Yellow
+ }
+
+ if ($canParallel) {
+ $jobs = @()
+ $jobMeta = @{}
+
+ # Progress tracking uses actual partition count
+ $script:progressState.Query.Total = $partitions.Count
+ $script:progressState.Query.Current = 0
+
+ # Conditional message based on whether all partitions launch initially or some are queued
+ if ($maxConcurrentPartitions -ge $partitions.Count) {
+ Write-LogHost " Launching all $($partitions.Count) queries in parallel (initially may take several minutes)..." -ForegroundColor DarkCyan
+ } else {
+ Write-LogHost " Launching initial $maxConcurrentPartitions/$($partitions.Count) queries, remaining queued as slots free (initially may take several minutes)..." -ForegroundColor DarkCyan
+ }
+
+ # Launch jobs asynchronously - we'll start monitoring after first batch is queued
+ $launchStartTime = Get-Date
+ $monitoringStarted = $false
+ $firstBatchSize = [Math]::Min($maxConcurrentPartitions, $partitions.Count)
+
+ # Track which messages we've already shown (global deduplication)
+ $script:shownJobMessages = @{}
+
+ # Track partition status for retry logic and final summary
+ $script:partitionStatus = @{}
+ foreach ($pt in $partitions) {
+ $script:partitionStatus[$pt.Index] = @{
+ Partition = $pt
+ AttemptNumber = 1
+ QueryId = $null
+ QueryName = $null
+ Status = 'NotStarted' # NotStarted, Sent, Complete, Failed, Subdivided, JobCreated
+ LastError = $null
+ RecordCount = 0
+ }
+ }
+
+ # Update checkpoint with partition count - only update total for fresh runs
+ if ($script:CheckpointData) {
+ if (-not $script:IsResumeMode) {
+ # Fresh run: set total from actual partition count
+ $script:CheckpointData.partitions.total = $partitions.Count
+ }
+ # For resume mode, keep original total from checkpoint
+ $script:CheckpointData.statistics.partitionsRemaining = $partitions.Count
+ Save-CheckpointToDisk
+ }
+
+ # Track which partitions have had jobs created for them (for dynamic subdivision support)
+ $script:partitionsWithJobs = New-Object System.Collections.Generic.HashSet[int]
+
+ # Define the ThreadJob scriptblock once for reuse in both initial and retry attempts
+ $queryJobScriptBlock = {
+ param($pStart, $pEnd, [array]$activity, $resultSize, $userIds, $idx, $tot, $sharedAuthState, $partition, $maxOutageMinutes, $apiVersion, $logPath, $existingQueryId)
+ # Suppress web request progress bar in job runspace
+ $ProgressPreference = 'SilentlyContinue'
+
+ # Helper function to build audit API URIs with correct version
+ function Get-AuditUri { param($path) return "https://graph.microsoft.com/$apiVersion/security/auditLog/$path" }
+
+ # Helper function to get current headers with fresh token from shared state
+ # This enables token refresh by main thread while job is running
+ function Get-CurrentHeaders {
+ param([string]$ClientRequestId)
+ return @{
+ 'Authorization' = "Bearer $($sharedAuthState.Token)"
+ 'Content-Type' = 'application/json'
+ 'client-request-id' = $ClientRequestId
+ }
+ }
+
+ # FIX C: Helper function to check if token is expired or near-expiry
+ # Returns $true if token is still valid with at least 2 minute buffer
+ function Test-TokenValid {
+ if (-not $sharedAuthState.ExpiresOn) {
+ # No expiry info - assume valid (will fail with 401 if not)
+ return $true
+ }
+ $expiresOn = $sharedAuthState.ExpiresOn
+ $now = (Get-Date).ToUniversalTime()
+ $bufferMinutes = 2
+ $remainingMinutes = ($expiresOn - $now).TotalMinutes
+ return ($remainingMinutes -gt $bufferMinutes)
+ }
+
+ $allRecords = @()
+ $t0 = Get-Date
+ $queryId = $existingQueryId # Use existing QueryId if provided (for retry after 403 fetch failure)
+ $debugInfo = $null
+
+ # Initialize telemetry tracking
+ $telemetry = [PSCustomObject]@{
+ PartitionIndex = $idx
+ PartitionTotal = $tot
+ PartitionStart = $pStart.ToString('yyyy-MM-dd HH:mm:ss')
+ PartitionEnd = $pEnd.ToString('yyyy-MM-dd HH:mm:ss')
+ PartitionHours = [Math]::Round(($pEnd - $pStart).TotalHours, 2)
+ QueryCreatedAt = $null
+ InitialPollDelaySeconds = 0
+ FirstRunningAt = $null
+ SucceededAt = $null
+ FirstPageAt = $null
+ LastPageAt = $null
+ PageCount = 0
+ RowCount = 0
+ ThrottledCount = 0
+ RetryAfterTotalSeconds = 0
+ ElapsedMinutes = 0
+ Status = 'unknown'
+ SplitRequired = $false
+ PostFetch10KLimit = $false
+ PreemptiveSubdivision = $false
+ PreemptiveCount = 0
+ SubdivisionReason = $null
+ PreviewRecordCount = 0
+ ThrottleRetriesDuringCreation = 0
+ }
+
+ try {
+ # FIX C: Check token validity BEFORE starting any API work
+ # If token is expired or near-expiry, return early so main thread can retry with fresh token
+ if (-not (Test-TokenValid)) {
+ $telemetry.Status = 'token_expired'
+ Write-Output "[TOKEN-EXPIRED] Partition $idx/$tot - Token expired or near-expiry, returning for retry with fresh token"
+ return [pscustomobject]@{
+ Activity = $activity
+ Logs = @()
+ RetrievedCount = 0
+ ElapsedMs = 0
+ Partition = $idx
+ Total = $tot
+ QueryId = $existingQueryId
+ DebugInfo = $null
+ Telemetry = $telemetry
+ TokenExpired = $true
+ }
+ }
+
+ $activeRecordFilters = if ($partition.RecordTypes -and $partition.RecordTypes.Count -gt 0) { @($partition.RecordTypes) } else { $null }
+ $activeServiceFilter = $partition.ServiceFilter
+
+ # Use all activities from partition
+ $queryActivities = if ($partition.Activities) { $partition.Activities } else { @($partition.Activity) }
+ # Log helper to capture current query payload state in debug stream (processed by parent thread)
+ function Write-GraphQueryDebug {
+ param(
+ [string]$Header,
+ [array]$Operations,
+ [array]$RecordFilters,
+ [string]$ServiceFilter,
+ [string]$PayloadJson
+ )
+ $debugLines = @()
+ $debugLines += "$Header"
+ if ($RecordFilters -and $RecordFilters.Count -gt 0) {
+ $debugLines += " recordTypeFilters: $($RecordFilters -join ', ')"
+ }
+ if ($ServiceFilter) {
+ $debugLines += " serviceFilter: $ServiceFilter"
+ }
+ $debugLines += $PayloadJson
+ foreach ($line in $debugLines) {
+ Write-Output $line
+ }
+ }
+
+ # Generate unique client-request-id for traceability (critical for Microsoft support)
+ $clientRequestId = [guid]::NewGuid().ToString()
+
+ # Get headers with current token (will be refreshed via Get-CurrentHeaders for each API call)
+ $headers = Get-CurrentHeaders -ClientRequestId $clientRequestId
+
+# PREEMPTIVE SUBDIVISION CHECK: Disabled - Graph API queries endpoint doesn't support
+ # filtering by query payload properties, so we can't preemptively count records.
+ # Subdivision will happen post-fetch based on actual record counts.
+ $shouldSubdividePreemptively = $false
+ $preemptiveCountCheck = $null
+
+ # Construct displayName with partition info: PAX_Query_YYYYMMDD_HHMM-YYYYMMDD_HHMM_PartX/Y
+ # Use last included minute (pEnd - 1 minute) since end date is exclusive
+ $pEndDisplay = $pEnd.AddMinutes(-1)
+ $displayName = "PAX_Query_$($pStart.ToString('yyyyMMdd_HHmm'))-$($pEndDisplay.ToString('yyyyMMdd_HHmm'))_Part$idx/$tot"
+
+ # If preemptive subdivision is needed, skip query creation and return subdivision signal
+ if ($shouldSubdividePreemptively) {
+ $telemetry.Status = 'subdivided_preemptively'
+ $telemetry.SplitReason = "Count check returned $preemptiveCountCheck records (>= 9500 threshold)"
+ return [pscustomobject]@{
+ Activity = $activity
+ Logs = @()
+ RetrievedCount = 0
+ ElapsedMs = 0
+ Partition = $idx
+ Total = $tot
+ QueryId = $null
+ DebugInfo = $null
+ Telemetry = $telemetry
+ }
+ }
+
+ # SKIP CREATE if we already have a QueryId (retry after 403 fetch failure)
+ $skipCreate = $false
+ if ($queryId) {
+ $skipCreate = $true
+ Write-Output "[RETRY-FETCH] Partition $idx/$tot - Reusing existing QueryId: $queryId (skipping CREATE)"
+ $telemetry.QueryCreatedAt = 'reused_existing'
+ }
+
+ # Retry loop for query creation with 429 handling (unlimited retries for throttling)
+ # Also handles transient network errors (502, 503, connection failures) with time-based tolerance
+ if (-not $skipCreate) {
+ try { # CREATE phase try-catch wrapper - handles CREATE-specific failures separately from FETCH errors
+ $createRetries = 0
+ $createSuccess = $false
+ $networkErrorStart = $null
+ $maxNetworkOutageSeconds = $maxOutageMinutes * 60 # Convert minutes to seconds
+
+ # Build query body ONCE before retry loop
+ $queryBody = @{
+ displayName = $displayName
+ filterStartDateTime = $pStart.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ filterEndDateTime = $pEnd.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ')
+ operationFilters = @($queryActivities)
+ }
+
+ $queryBodyJson = $queryBody | ConvertTo-Json -Depth 5
+ Write-GraphQueryDebug -Header "Graph API Query Body for partition $idx/$tot (Operations: $($queryActivities -join ', ')):" -Operations $queryActivities -RecordFilters $activeRecordFilters -ServiceFilter $activeServiceFilter -PayloadJson $queryBodyJson
+
+ $attemptTimestamp = Get-Date -Format 'HH:mm:ss'
+ $attemptMessage = "[ATTEMPT] [$attemptTimestamp] Partition $idx/$tot - Starting query creation..."
+ Write-Host $attemptMessage -ForegroundColor DarkGray
+
+ while (-not $createSuccess) {
+ try {
+ $queryUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries"
+ $createResponse = Invoke-RestMethod -Method POST -Uri $queryUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -Body $queryBodyJson -ErrorAction Stop
+ $queryId = $createResponse.id
+ $telemetry.QueryCreatedAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
+
+ # LOG: Query successfully sent to server (output stream only, displayed by main thread)
+ $sentTimestamp = Get-Date -Format 'HH:mm:ss'
+ $sentMessage = "[SENT] [$sentTimestamp] Partition $idx/$tot - Query sent to Purview (QueryId: $queryId)"
+ Write-Output $sentMessage
+
+ $createSuccess = $true
+ $networkErrorStart = $null # Reset network error timer on success
+ }
+ catch {
+ # CRITICAL: Check for transient network errors FIRST (502, 503, 504) - must be prioritized
+ # before 429 check to ensure these errors are always caught and retried correctly
+ $isNetworkError = $false
+ $errorMessage = $_.Exception.Message
+ $errorSummary = "Unknown error"
+
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode.value__
+ if ($statusCode -in @(502, 503, 504)) {
+ $isNetworkError = $true
+ $errorSummary = switch ($statusCode) {
+ 502 { "502 Bad Gateway" }
+ 503 { "503 Service Unavailable" }
+ 504 { "504 Gateway Timeout" }
+ }
+ }
+ }
+ if (-not $isNetworkError -and ($errorMessage -match '502|503|504|Bad Gateway|Service Unavailable|Gateway Timeout')) {
+ $isNetworkError = $true
+ $errorSummary = if ($errorMessage -match '502') { "502 Bad Gateway" }
+ elseif ($errorMessage -match '503') { "503 Service Unavailable" }
+ elseif ($errorMessage -match '504') { "504 Gateway Timeout" }
+ else { "Network infrastructure error" }
+ }
+ if (-not $isNetworkError -and ($errorMessage -match 'timed out|connection|unable to connect|could not be resolved')) {
+ $isNetworkError = $true
+ $errorSummary = "Network connectivity issue"
+ }
+
+ # Check for 429 throttling (AFTER network error check)
+ $is429Create = $false
+ if (-not $isNetworkError) {
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 429 -or $statusCode -eq 'TooManyRequests' -or $statusCode.value__ -eq 429) {
+ $is429Create = $true
+ }
+ }
+ if (-not $is429Create -and ($errorMessage -match '429' -or $errorMessage -match 'Too Many Requests')) {
+ $is429Create = $true
+ }
+ }
+
+ # Check for 403 Forbidden (Microsoft service-side issue)
+ $is403Create = $false
+ if (-not $isNetworkError -and -not $is429Create) {
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 403 -or $statusCode -eq 'Forbidden' -or $statusCode.value__ -eq 403) {
+ $is403Create = $true
+ }
+ }
+ if (-not $is403Create -and ($errorMessage -match '403' -or $errorMessage -match 'Forbidden')) {
+ $is403Create = $true
+ }
+ }
+
+ # Handle different error types
+ if ($is403Create) {
+ # Extract diagnostic info from 403 response (critical for Microsoft support)
+ $responseBody403 = $null
+ $requestId403 = $null
+ $wwwAuth403 = $null
+ $isPermanent403 = $false
+
+ # PowerShell 7 error handling: ErrorDetails.Message contains response body
+ try {
+ if ($_.ErrorDetails -and $_.ErrorDetails.Message) {
+ $responseBody403 = $_.ErrorDetails.Message
+ }
+ } catch {}
+
+ # Try to get headers from the response
+ try {
+ if ($_.Exception.Response) {
+ # Try different header access patterns for PS7 compatibility
+ try {
+ $requestId403 = $_.Exception.Response.Headers['request-id']
+ } catch {
+ try {
+ $requestId403 = $_.Exception.Response.Headers.GetValues('request-id') | Select-Object -First 1
+ } catch {}
+ }
+ try {
+ $wwwAuth403 = $_.Exception.Response.Headers['WWW-Authenticate']
+ } catch {
+ try {
+ $wwwAuth403 = $_.Exception.Response.Headers.GetValues('WWW-Authenticate') | Select-Object -First 1
+ } catch {}
+ }
+
+ # Fallback: try to read response stream if ErrorDetails was empty
+ if (-not $responseBody403) {
+ try {
+ $respStream = $_.Exception.Response.GetResponseStream()
+ if ($respStream -and $respStream.CanRead) {
+ $reader = New-Object System.IO.StreamReader($respStream)
+ $responseBody403 = $reader.ReadToEnd()
+ $reader.Dispose()
+ }
+ } catch {}
+ }
+ }
+ } catch {}
+
+ # Also capture the full exception message as fallback
+ $exceptionMessage403 = $_.Exception.Message
+
+ # Check if this is a PERMANENT 403 (don't retry these)
+ if ($responseBody403 -match 'InsufficientPrivileges|Authorization_RequestDenied|AccessDenied|InvalidAuthenticationToken') {
+ $isPermanent403 = $true
+ }
+ # Claims challenge indicates CAE - token needs refresh, not retry
+ if ($wwwAuth403 -match 'claims') {
+ $isPermanent403 = $true # Don't retry with same token, need fresh token
+ }
+
+ # Log diagnostic info for Microsoft support
+ try {
+ $diagLog = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-DIAG] Partition $idx/$tot`n"
+ $diagLog += " client-request-id: $clientRequestId`n"
+ $diagLog += " request-id: $requestId403`n"
+ $diagLog += " exception: $exceptionMessage403`n"
+ if ($wwwAuth403) { $diagLog += " WWW-Authenticate: $wwwAuth403`n" }
+ if ($responseBody403) { $diagLog += " Response body: $responseBody403`n" }
+ $diagLog += " Permanent error: $isPermanent403"
+ $diagLog | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ if ($isPermanent403) {
+ # Permanent 403 - don't retry, fail immediately for partition retry with fresh token
+ Write-Output "[403-PERM] Partition $idx/$tot - PERMANENT 403 (permissions/CAE) - Failing partition | request-id: $requestId403 | client-request-id: $clientRequestId"
+ throw "403 Forbidden (permanent) - $responseBody403"
+ }
+
+ # Transient 403 - retry with exponential backoff
+ $createRetries++
+ $max403Retries = 3 # Limited retries since we can't refresh token inside ThreadJob
+
+ if ($createRetries -le $max403Retries) {
+ # Exponential backoff: 15s, 30s, 60s
+ $retryAfter = [Math]::Min(15 * [Math]::Pow(2, $createRetries - 1), 60)
+
+ # Thread-safe file logging
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-CREATE] Partition $idx/$tot - Transient 403 (Attempt $createRetries/$max403Retries) - Retrying in ${retryAfter}s"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ Write-Output "[403-CREATE] Partition $idx/$tot - Transient 403 (Attempt $createRetries/$max403Retries) - Retrying in ${retryAfter}s | request-id: $requestId403 | client-request-id: $clientRequestId"
+ Start-Sleep -Seconds $retryAfter
+ } else {
+ # Max 403 retries exceeded - throw to trigger partition-level retry (which will refresh token)
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-CREATE] Partition $idx/$tot - Max transient 403 retries exceeded ($max403Retries), failing partition for retry with fresh token"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 retries exceeded - Failing partition | request-id: $requestId403 | client-request-id: $clientRequestId"
+ throw "403 Forbidden (transient) - max retries exceeded (partition will retry with fresh token)"
+ }
+ }
+ elseif ($is429Create) {
+ # Throttling - get retry-after value
+ if ($_.Exception.Response.Headers -and $_.Exception.Response.Headers['Retry-After']) {
+ $retryAfter = [int]$_.Exception.Response.Headers['Retry-After']
+ }
+ else {
+ # Exponential backoff: 60s, 120s, 240s, then cap at 300s (5 min)
+ $retryAfter = [Math]::Min(60 * [Math]::Pow(2, $createRetries - 1), 300)
+ }
+
+ # Send real-time throttle notification to parent process
+ try {
+ $throttleNotification = [PSCustomObject]@{
+ Partition = "$idx/$tot"
+ Attempt = $createRetries
+ RetryAfter = $retryAfter
+ Timestamp = (Get-Date)
+ }
+ $script:throttleNotifications.Enqueue($throttleNotification)
+ }
+ catch {
+ # Fallback to output if synchronized collection fails
+ Write-Output "[THROTTLE] Partition $idx/$tot - Query creation throttled (Attempt $createRetries) - Waiting $retryAfter seconds..."
+ }
+
+ Start-Sleep -Seconds $retryAfter
+ }
+ elseif ($isNetworkError) {
+ # Network error - check if we're still within the outage tolerance window
+ if (-not $networkErrorStart) {
+ $networkErrorStart = Get-Date
+ # Log to file only (no terminal spam)
+ Write-Output "[NETWORK] Partition $idx/$tot - $errorSummary - Starting retry window (max ${maxOutageMinutes}m)"
+ Write-Output "[NETWORK] First network error for partition $idx/$tot : $errorMessage"
+ }
+
+ $elapsedOutageSeconds = ((Get-Date) - $networkErrorStart).TotalSeconds
+
+ if ($elapsedOutageSeconds -lt $maxNetworkOutageSeconds) {
+ $remainingMinutes = [Math]::Ceiling(($maxNetworkOutageSeconds - $elapsedOutageSeconds) / 60)
+ $retryDelay = 30 + (Get-Random -Minimum 10 -Maximum 30) # 30-60s random delay
+
+ # Suppress subsequent retry messages to terminal (first error already shown)
+ # Full error details to log file only for troubleshooting
+ $elapsedFormatted = [Math]::Round($elapsedOutageSeconds, 1)
+ Write-Output "[NETWORK] Retry attempt for partition $idx/$tot (${elapsedFormatted}s elapsed) : $errorMessage"
+
+ Start-Sleep -Seconds $retryDelay
+ }
+ else {
+ # Network outage exceeded tolerance
+ $outageMinutes = [Math]::Round($elapsedOutageSeconds / 60, 1)
+ Write-Output "[CREATE-FAILED] Partition $idx/$tot - Network outage exceeded $maxOutageMinutes minute tolerance (${outageMinutes}m elapsed) - will retry at end of run"
+ throw "Network outage exceeded $maxOutageMinutes minute tolerance during query creation"
+ }
+ }
+ else {
+ # Non-retriable error - log it and re-throw
+ $statusCode = $null
+ $bodyText = $null
+ try {
+ if ($_.Exception.Response) {
+ $statusCode = [int]$_.Exception.Response.StatusCode.value__
+ }
+ } catch {}
+ if (-not $statusCode -and $_.Exception.Response) {
+ try { $statusCode = [int]$_.Exception.Response.StatusCode } catch {}
+ }
+ try {
+ if ($_.Exception.Response) {
+ $respStream = $_.Exception.Response.GetResponseStream()
+ if ($respStream) {
+ $reader = New-Object System.IO.StreamReader($respStream)
+ $bodyText = $reader.ReadToEnd()
+ $reader.Dispose()
+ }
+ }
+ } catch {}
+
+ # No automatic filter fallback allowed – capture diagnostics only
+
+ $errorDetails = "StatusCode: $(if ([string]::IsNullOrEmpty($statusCode)) { $_.Exception.Response.StatusCode } else { $statusCode }), Message: $($_.Exception.Message)"
+ Write-Host "[CREATE-FAILED] Partition $idx/$tot - Query creation failed: $errorDetails" -ForegroundColor Red
+ Write-Output "[ERROR] Partition $idx/$tot - Query creation failed (will retry at end of run): $errorDetails"
+ if ($bodyText) {
+ Write-Output "[GRAPH-ERROR] Partition $idx/$tot - Response body: $bodyText"
+ }
+ throw # Re-throw non-network, non-throttle errors
+ }
+ }
+ }
+
+ # Track throttle retries for summary
+ $telemetry.ThrottleRetriesDuringCreation = $createRetries
+
+ if (-not $queryId) {
+ throw "Failed to create query"
+ }
+
+ # Log query details
+ $debugInfo = "Query ID: $queryId | DateRange: $($pStart.ToString('yyyy-MM-dd HH:mm')) to $($pEnd.ToString('yyyy-MM-dd HH:mm')) UTC | Activities: $($queryActivities -join ', ')"
+ } # End of CREATE phase try block
+ catch {
+ # CREATE phase failed - re-throw with CREATE-FAILED marker so outer catch knows this is not a FETCH error
+ $createError = $_.Exception.Message
+ $createStack = $_.ScriptStackTrace
+
+ # Log CREATE failure
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [CREATE-FAILED] Partition $idx/$tot - Query creation failed: $createError"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ # Re-throw with marker for outer catch to identify as CREATE failure
+ throw "[CREATE-FAILED] $createError"
+ }
+ } # End of if (-not $skipCreate) block
+
+ # Step 2: Poll for completion with PRODUCTION-SCALE backoff + timeout detection
+ # Microsoft guidance: 30-60s intervals for enterprise-scale parallel execution
+ # Timeout: Dynamic limit based on partition count (DELETE hung queries to keep tenant clean)
+ # Scale maxPolls and timeout based on partition count (more partitions = longer backend processing)
+ # Network outage tolerance (adaptive) using passed-in MaxNetworkOutageMinutes parameter
+ $effectiveOutage = if ($maxOutageMinutes -and $maxOutageMinutes -gt 0) { $maxOutageMinutes } else { 30 }
+ $netOutageStart = $null
+ $netErrorStreak = 0
+ $netPatterns = @('timed out','connection','unable to connect','remote name could not be resolved','temporarily unavailable','network','502','503','504','bad gateway','gateway timeout','service unavailable')
+ $lastNetHeartbeat = Get-Date
+ $lastNetMessage = $null # Throttle repetitive network messages
+ $netMessageMinInterval = 60 # Minimum seconds between network status messages
+ $pollCount = 0
+ $basePolls = 80 # 80 polls base (80 minutes with 60s intervals)
+ $extraPollsPerPartition = 5 # Add 5 polls per partition above 5 (~5 min each)
+ $partitionScaling = [Math]::Max(0, $tot - 5)
+ $maxPolls = $basePolls + ($partitionScaling * $extraPollsPerPartition) # 80 for 5, 90 for 7, 105 for 10
+
+ # Bounded timeout: 180 minutes per slice (acts as guardrail, not scaling crutch)
+ $maxWaitMinutes = 180
+
+ $pollStartTime = Get-Date
+ $queryComplete = $false
+
+ # Initial wait before first poll with randomization to prevent synchronization
+ $initialWaitSeconds = Get-Random -Minimum 30 -Maximum 60
+ $telemetry.InitialPollDelaySeconds = $initialWaitSeconds
+ Start-Sleep -Seconds $initialWaitSeconds
+
+ while ($pollCount -lt $maxPolls -and -not $queryComplete) {
+ $pollCount++
+
+ # Check for hard timeout (hung query detection)
+ $elapsedMinutes = ((Get-Date) - $pollStartTime).TotalMinutes
+ if ($elapsedMinutes -gt $maxWaitMinutes) {
+ # DELETE hung query to keep tenant clean
+ try {
+ $deleteUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries/$queryId"
+ Invoke-RestMethod -Method DELETE -Uri $deleteUri `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {
+ # Silently continue - cleanup failure shouldn't block error reporting
+ }
+ throw "Query timeout after $([Math]::Round($elapsedMinutes, 1)) minutes - query deleted"
+ }
+
+ # Deadline splitter: Check if we're at 90 min and query still running/notStarted
+ if ($elapsedMinutes -ge 90) {
+ $statusCheckResponse = $null
+ try {
+ $statusUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries/$queryId"
+ $statusCheckResponse = Invoke-RestMethod -Method GET -Uri $statusUri `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
+ } catch {
+ # If we can't check status, continue with normal flow
+ }
+
+ if ($statusCheckResponse -and $statusCheckResponse.status -in @('running','notStarted')) {
+ # DELETE query and return SplitRequired signal
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {
+ # Silently continue
+ }
+
+ # Return structured object indicating split required
+ return @{
+ SplitRequired = $true
+ ElapsedMinutes = $elapsedMinutes
+ Status = $statusCheckResponse.status
+ PartitionStart = $pStart
+ PartitionEnd = $pEnd
+ Records = @()
+ QueryId = $queryId
+ }
+ }
+ }
+
+ # Poll query status with 429 throttling detection
+ try {
+ $statusResponse = Invoke-RestMethod -Method GET -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
+ # Reset outage tracking on success
+ if ($netOutageStart) {
+ $duration = (Get-Date) - $netOutageStart
+ # Only log recovery if outage lasted > 1 minute (ignore brief connection blips)
+ if ($duration.TotalMinutes -ge 1) {
+ Write-Host "[NET] Connectivity restored after $([Math]::Round($duration.TotalMinutes,1)) minutes - Partition $idx/$tot" -ForegroundColor Green
+ }
+ $netOutageStart = $null; $netErrorStreak = 0
+ $lastNetHeartbeat = Get-Date # Reset heartbeat timer on recovery
+ # NOTE: Do NOT reset $lastNetMessage here - keep throttle window active
+ # to prevent message flooding during intermittent connectivity
+ }
+ }
+ catch {
+ # Check for 429 throttling using safe detection
+ $is429 = $false
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 429 -or $statusCode -eq 'TooManyRequests' -or $statusCode.value__ -eq 429) {
+ $is429 = $true
+ }
+ }
+ if (-not $is429 -and ($_.Exception.Message -match '429' -or $_.Exception.Message -match 'Too Many Requests')) {
+ $is429 = $true
+ }
+
+ if ($is429) {
+ # Track throttling in telemetry
+ $telemetry.ThrottledCount++
+ # Respect Retry-After header if present, otherwise use 60s default
+ $retryAfter = 60
+ if ($_.Exception.Response.Headers -and $_.Exception.Response.Headers['Retry-After']) {
+ $retryAfter = [int]$_.Exception.Response.Headers['Retry-After']
+ }
+ $telemetry.RetryAfterTotalSeconds += $retryAfter
+
+ # Log throttling event to user
+ Write-Host "[!] API Rate Limit (429) - Partition $idx/$tot - Retry in $retryAfter seconds (Throttle #$($telemetry.ThrottledCount))" -ForegroundColor Yellow
+
+ Start-Sleep -Seconds $retryAfter
+ continue # Retry this poll
+ }
+ else {
+ # Check for 403 Forbidden (Microsoft service-side issue)
+ $is403Poll = $false
+ $pollErrMsg = $_.Exception.Message
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 403 -or $statusCode -eq 'Forbidden' -or $statusCode.value__ -eq 403) {
+ $is403Poll = $true
+ }
+ }
+ if (-not $is403Poll -and ($pollErrMsg -match '403' -or $pollErrMsg -match 'Forbidden')) {
+ $is403Poll = $true
+ }
+
+ if ($is403Poll) {
+ # Extract diagnostic info from 403 response
+ $responseBody403Poll = $null
+ $requestId403Poll = $null
+ $wwwAuth403Poll = $null
+ $isPermanent403Poll = $false
+
+ try {
+ if ($_.Exception.Response) {
+ $requestId403Poll = $_.Exception.Response.Headers['request-id']
+ $wwwAuth403Poll = $_.Exception.Response.Headers['WWW-Authenticate']
+ $respStream = $_.Exception.Response.GetResponseStream()
+ if ($respStream) {
+ $reader = New-Object System.IO.StreamReader($respStream)
+ $responseBody403Poll = $reader.ReadToEnd()
+ $reader.Dispose()
+ }
+ }
+ } catch {}
+
+ # Check if permanent 403
+ if ($responseBody403Poll -match 'InsufficientPrivileges|Authorization_RequestDenied|AccessDenied|InvalidAuthenticationToken') {
+ $isPermanent403Poll = $true
+ }
+ if ($wwwAuth403Poll -match 'claims') {
+ $isPermanent403Poll = $true
+ }
+
+ # Log diagnostic info
+ try {
+ $diagLog = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-POLL-DIAG] Partition $idx/$tot`n"
+ $diagLog += " client-request-id: $clientRequestId`n"
+ $diagLog += " request-id: $requestId403Poll`n"
+ if ($wwwAuth403Poll) { $diagLog += " WWW-Authenticate: $wwwAuth403Poll`n" }
+ if ($responseBody403Poll) { $diagLog += " Response body: $responseBody403Poll`n" }
+ $diagLog += " Permanent error: $isPermanent403Poll"
+ $diagLog | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ if ($isPermanent403Poll) {
+ Write-Output "[403-PERM] Partition $idx/$tot - PERMANENT 403 on POLL - Failing partition | request-id: $requestId403Poll | client-request-id: $clientRequestId"
+ }
+
+ # 403 during status polling - increment counter and retry with backoff
+ if (-not $script:poll403Count) { $script:poll403Count = 0 }
+ $script:poll403Count++
+ $max403Polls = 3
+
+ if ($script:poll403Count -le $max403Polls) {
+ $retryAfter = [Math]::Min(15 * [Math]::Pow(2, $script:poll403Count - 1), 60)
+
+ # Thread-safe file logging
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-POLL] Partition $idx/$tot - Transient 403 (Attempt $($script:poll403Count)/$max403Polls) - Retrying in ${retryAfter}s"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ Write-Output "[403-POLL] Partition $idx/$tot - Transient 403 (Attempt $($script:poll403Count)/$max403Polls) - Retrying in ${retryAfter}s | request-id: $requestId403Poll | client-request-id: $clientRequestId"
+ Start-Sleep -Seconds $retryAfter
+ continue # Retry this poll
+ } else {
+ # Max 403 retries exceeded - fail partition for retry with fresh token
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-POLL] Partition $idx/$tot - Max transient 403 poll retries exceeded ($max403Polls), failing partition"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 poll retries exceeded - Failing partition | request-id: $requestId403Poll | client-request-id: $clientRequestId"
+ throw "403 Forbidden during status poll - max retries exceeded"
+ }
+ }
+
+ # Detect transient network outage vs hard failure
+ $errMsg = $_.Exception.Message
+ if ($netPatterns | Where-Object { $errMsg.ToLower().Contains($_) }) {
+ if (-not $netOutageStart) { $netOutageStart = Get-Date }
+ $netErrorStreak++
+ $elapsedOutage = (Get-Date) - $netOutageStart
+
+ # Throttle messages: only show if outage > 1 min OR no recent message
+ $shouldShowMessage = $false
+ if ($elapsedOutage.TotalMinutes -ge 1) {
+ # Sustained outage - show message but throttle to once per minute
+ if (-not $lastNetMessage -or ((Get-Date) - $lastNetMessage).TotalSeconds -ge $netMessageMinInterval) {
+ $shouldShowMessage = $true
+ }
+ } elseif ($netErrorStreak -eq 1 -and (-not $lastNetMessage -or ((Get-Date) - $lastNetMessage).TotalSeconds -ge $netMessageMinInterval)) {
+ # First error in a new outage window - show initial message
+ $shouldShowMessage = $true
+ }
+
+ if ($shouldShowMessage) {
+ Write-Host "[NET] Transient network issue (streak $netErrorStreak, outage $([Math]::Round($elapsedOutage.TotalMinutes,1))m) - Partition $idx/$tot" -ForegroundColor Yellow
+ $lastNetMessage = Get-Date
+ }
+
+ # Adaptive backoff during outage
+ $waitSec = [Math]::Min(95, [Math]::Round(35 * [Math]::Pow(1.3, $netErrorStreak)))
+ Start-Sleep -Seconds $waitSec
+
+ # Heartbeat every ~5 minutes of sustained outage (only if outage > 2 min)
+ if ($elapsedOutage.TotalMinutes -ge 2 -and ((Get-Date) - $lastNetHeartbeat).TotalMinutes -ge 5) {
+ Write-Host "[NET] Still waiting on network recovery (outage $([Math]::Round($elapsedOutage.TotalMinutes,1))m, tolerance $effectiveOutage m)" -ForegroundColor DarkYellow
+ $lastNetHeartbeat = Get-Date
+ }
+
+ if ($elapsedOutage.TotalMinutes -ge $effectiveOutage) {
+ throw "Network outage exceeded tolerance ($effectiveOutage minutes)"
+ }
+ continue
+ } else {
+ throw # Non-transient, abort partition
+ }
+ }
+ }
+
+ # Process status response
+ switch ($statusResponse.status) {
+ 'succeeded' {
+ $telemetry.Status = 'succeeded'
+ $telemetry.SucceededAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
+
+ # RECORD COUNT PREVIEW: Get exact count before fetching data (enables preemptive subdivision)
+ try {
+ $countUri = "https://graph.microsoft.com/$apiVersion/security/auditLog/queries?`$count=true&`$filter=queryId eq '$queryId'"
+ $countResponse = Invoke-RestMethod -Method GET -Uri $countUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -TimeoutSec 10 -ErrorAction Stop
+ $previewCount = $countResponse.'@odata.count'
+ Write-Output "[COUNT] Query $queryId succeeded - Actual record count: $previewCount"
+ $telemetry.PreviewRecordCount = $previewCount
+ if ($previewCount -ge 9500) {
+ $partitionHours = ($pEnd - $pStart).TotalHours
+ $minSubdivisionDays = 0.001389 # 2 minutes
+ $minSubdivisionHours = $minSubdivisionDays * 24
+
+ if ($partitionHours -gt $minSubdivisionHours) {
+ # Partition can be subdivided - flag for subdivision and skip record retrieval
+ $telemetry.PreemptiveSubdivision = $true
+ $telemetry.SubdivisionReason = "preview_count_$previewCount"
+
+ Write-Host "[SUBDIVISION] Partition $idx/$tot - Preview count $previewCount >= 9500 - Subdividing partition ($([Math]::Round($partitionHours,2))h window)" -ForegroundColor Yellow
+
+ # Return subdivision signal to parent
+ return [PSCustomObject]@{
+ QueryId = $queryId
+ Status = 'needs_subdivision'
+ PreviewCount = $previewCount
+ PartitionStart = $pStart
+ PartitionEnd = $pEnd
+ PartitionIndex = $idx
+ PartitionTotal = $tot
+ RetrievedCount = 0
+ Telemetry = $telemetry
+ }
+ } else {
+ Write-Host "[COUNT] Partition $idx/$tot - Preview count $previewCount (at minimum subdivision window $([Math]::Round($partitionHours,2))h, proceeding with fetch)" -ForegroundColor Cyan
+ }
+ } else {
+ Write-Host "[COUNT] Partition $idx/$tot - Preview count $previewCount (below threshold, proceeding with fetch)" -ForegroundColor DarkCyan
+ }
+ } catch {
+ # Count preview failed - proceed with normal fetch (non-critical failure)
+ Write-Output "[COUNT-WARN] Partition $idx/$tot - Count preview failed, proceeding with fetch: $($_.Exception.Message)"
+ }
+
+ # Note: Don't write "Query succeeded" yet - wait until records are actually retrieved and validated
+ $queryComplete = $true
+ break
+ }
+ 'failed' {
+ $telemetry.Status = 'failed'
+ Write-Host "✗ Query failed - Partition $idx/$tot - Query ID: $queryId" -ForegroundColor Red
+ # Delete failed query from Purview to free server slot
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {}
+ }
+ throw "Query failed"
+ }
+ 'cancelled' {
+ $telemetry.Status = 'cancelled'
+ Write-Host "✗ Query cancelled - Partition $idx/$tot - Query ID: $queryId" -ForegroundColor Red
+ # Delete cancelled query from Purview to free server slot
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {}
+ }
+ throw "Query cancelled"
+ }
+ 'queued' {
+ # Query is waiting in backend queue for available execution slot
+ $telemetry.Status = 'queued'
+ Write-Host "⏳ Query queued (waiting for backend capacity) - Partition $idx/$tot - Retry in 60-90s" -ForegroundColor Cyan
+ # Use longer polling interval for queued state (60-90s) since backend capacity is limited
+ $waitSeconds = Get-Random -Minimum 60 -Maximum 90
+ Start-Sleep -Seconds $waitSeconds
+ }
+ 'notStarted' {
+ # Query accepted but not yet started processing
+ $telemetry.Status = 'notStarted'
+ # Randomized polling interval (35-60s)
+ $waitSeconds = Get-Random -Minimum 35 -Maximum 60
+ Start-Sleep -Seconds $waitSeconds
+ }
+ 'running' {
+ # Track first time we see running status
+ if (-not $telemetry.FirstRunningAt) {
+ $telemetry.FirstRunningAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
+ Write-Output "[STATUS] Query running - Partition $idx/$tot - Started processing"
+ }
+ $telemetry.Status = 'running'
+ # Randomized polling interval (35-60s) to prevent synchronization
+ $waitSeconds = Get-Random -Minimum 35 -Maximum 60
+ Start-Sleep -Seconds $waitSeconds
+ }
+ default {
+ $telemetry.Status = $statusResponse.status
+ Write-Host "? Query status: $($statusResponse.status) - Partition $idx/$tot - Retry in 35-60s" -ForegroundColor Magenta
+ # Randomized polling interval (35-60s) to prevent synchronization
+ $waitSeconds = Get-Random -Minimum 35 -Maximum 60
+ Start-Sleep -Seconds $waitSeconds
+ }
+ }
+ } if (-not $queryComplete) {
+ # Clean up orphaned query from Purview before failing
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ Write-Output "[CLEANUP] Partition $idx/$tot - Deleted query $queryId from Purview (poll exhausted)"
+ } catch {
+ # Silently continue - cleanup failure shouldn't block retry
+ }
+ }
+ throw "Query timed out"
+ }
+
+ # Step 3: Retrieve records with pagination
+ $recordsUri = Get-AuditUri -path "queries/$queryId/records"
+ $fetchNetworkErrorStart = $null
+ $unexpectedProcessingError = $false
+ $unexpectedProcessingMessage = $null
+ $fetchErrorRetryCount = 0
+ $maxFetchErrorRetries = 3 # Retry unexpected errors 3 times before giving up
+
+ # CRITICAL: When resultSize=0, fetch unlimited records (don't check count)
+ # When resultSize>0, stop when we reach the limit (EOM mode behavior)
+ while ($recordsUri -and ($resultSize -eq 0 -or $allRecords.Count -lt $resultSize)) {
+ # Retry loop for record fetching with 429 and network error handling
+ $fetchRetries = 0
+ $maxFetchRetries = 5
+ $fetchSuccess = $false
+
+ while ($fetchRetries -lt $maxFetchRetries -and -not $fetchSuccess) {
+ try {
+ $recordsResponse = Invoke-RestMethod -Method GET -Uri $recordsUri -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction Stop
+ $fetchSuccess = $true
+ # Reset network error tracking on success
+ $fetchNetworkErrorStart = $null
+ }
+ catch {
+ # Check for 429 throttling
+ $is429Fetch = $false
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 429 -or $statusCode -eq 'TooManyRequests' -or $statusCode.value__ -eq 429) {
+ $is429Fetch = $true
+ }
+ }
+ if (-not $is429Fetch -and ($_.Exception.Message -match '429' -or $_.Exception.Message -match 'Too Many Requests')) {
+ $is429Fetch = $true
+ }
+
+ # Check for network errors (502, 503, 504, connection issues)
+ $isNetworkFetch = $false
+ $fetchErrorMessage = $_.Exception.Message
+ $fetchErrorSummary = "Unknown error"
+
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode.value__
+ if ($statusCode -in @(502, 503, 504)) {
+ $isNetworkFetch = $true
+ $fetchErrorSummary = switch ($statusCode) {
+ 502 { "502 Bad Gateway" }
+ 503 { "503 Service Unavailable" }
+ 504 { "504 Gateway Timeout" }
+ }
+ }
+ }
+ if (-not $isNetworkFetch -and ($fetchErrorMessage -match '502|503|504|Bad Gateway|Service Unavailable|Gateway Timeout')) {
+ $isNetworkFetch = $true
+ $fetchErrorSummary = if ($fetchErrorMessage -match '502') { "502 Bad Gateway" }
+ elseif ($fetchErrorMessage -match '503') { "503 Service Unavailable" }
+ elseif ($fetchErrorMessage -match '504') { "504 Gateway Timeout" }
+ else { "Network infrastructure error" }
+ }
+ if (-not $isNetworkFetch -and ($fetchErrorMessage -match 'timed out|connection|unable to connect|could not be resolved')) {
+ $isNetworkFetch = $true
+ $fetchErrorSummary = "Network connectivity issue"
+ }
+
+ # Check for 403 Forbidden (Microsoft service-side issue)
+ $is403Fetch = $false
+ if (-not $is429Fetch -and -not $isNetworkFetch) {
+ if ($_.Exception.Response) {
+ $statusCode = $_.Exception.Response.StatusCode
+ if ($statusCode -eq 403 -or $statusCode -eq 'Forbidden' -or $statusCode.value__ -eq 403) {
+ $is403Fetch = $true
+ }
+ }
+ if (-not $is403Fetch -and ($fetchErrorMessage -match '403' -or $fetchErrorMessage -match 'Forbidden')) {
+ $is403Fetch = $true
+ }
+ }
+
+ if ($is403Fetch) {
+ # Extract diagnostic info from 403 response
+ $responseBody403Fetch = $null
+ $requestId403Fetch = $null
+ $wwwAuth403Fetch = $null
+ $isPermanent403Fetch = $false
+ $exceptionMessage403Fetch = $_.Exception.Message
+
+ # PowerShell 7 error handling: ErrorDetails.Message contains response body
+ try {
+ if ($_.ErrorDetails -and $_.ErrorDetails.Message) {
+ $responseBody403Fetch = $_.ErrorDetails.Message
+ }
+ } catch {}
+
+ # Try to get headers from the response
+ try {
+ if ($_.Exception.Response) {
+ # Try different header access patterns for PS7 compatibility
+ try {
+ $requestId403Fetch = $_.Exception.Response.Headers['request-id']
+ } catch {
+ try {
+ $requestId403Fetch = $_.Exception.Response.Headers.GetValues('request-id') | Select-Object -First 1
+ } catch {}
+ }
+ try {
+ $wwwAuth403Fetch = $_.Exception.Response.Headers['WWW-Authenticate']
+ } catch {
+ try {
+ $wwwAuth403Fetch = $_.Exception.Response.Headers.GetValues('WWW-Authenticate') | Select-Object -First 1
+ } catch {}
+ }
+
+ # Fallback: try to read response stream if ErrorDetails was empty
+ if (-not $responseBody403Fetch) {
+ try {
+ $respStream = $_.Exception.Response.GetResponseStream()
+ if ($respStream -and $respStream.CanRead) {
+ $reader = New-Object System.IO.StreamReader($respStream)
+ $responseBody403Fetch = $reader.ReadToEnd()
+ $reader.Dispose()
+ }
+ } catch {}
+ }
+ }
+ } catch {}
+
+ # Check if permanent 403
+ if ($responseBody403Fetch -match 'InsufficientPrivileges|Authorization_RequestDenied|AccessDenied|InvalidAuthenticationToken') {
+ $isPermanent403Fetch = $true
+ }
+ if ($wwwAuth403Fetch -match 'claims') {
+ $isPermanent403Fetch = $true
+ }
+
+ # Log diagnostic info
+ try {
+ $diagLog = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-FETCH-DIAG] Partition $idx/$tot Page $($telemetry.PageCount + 1)`n"
+ $diagLog += " client-request-id: $clientRequestId`n"
+ $diagLog += " request-id: $requestId403Fetch`n"
+ $diagLog += " exception: $exceptionMessage403Fetch`n"
+ if ($wwwAuth403Fetch) { $diagLog += " WWW-Authenticate: $wwwAuth403Fetch`n" }
+ if ($responseBody403Fetch) { $diagLog += " Response body: $responseBody403Fetch`n" }
+ $diagLog += " Permanent error: $isPermanent403Fetch"
+ $diagLog | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ if ($isPermanent403Fetch) {
+ Write-Output "[403-PERM] Partition $idx/$tot - PERMANENT 403 on FETCH - Failing partition | request-id: $requestId403Fetch | client-request-id: $clientRequestId"
+ }
+
+ # 403 Forbidden during FETCH - limited retry with exponential backoff
+ $fetchRetries++
+ $max403FetchRetries = 3 # Limited retries since we can't refresh token inside ThreadJob
+
+ if ($fetchRetries -le $max403FetchRetries) {
+ # Exponential backoff: 15s, 30s, 60s
+ $retryAfter = [Math]::Min(15 * [Math]::Pow(2, $fetchRetries - 1), 60)
+
+ # Thread-safe file logging
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-FETCH] Partition $idx/$tot Page $($telemetry.PageCount + 1) - Transient 403 (Attempt $fetchRetries/$max403FetchRetries) - Retrying in ${retryAfter}s"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ Write-Output "[403-FETCH] Partition $idx/$tot Page $($telemetry.PageCount + 1) - Transient 403 (Attempt $fetchRetries/$max403FetchRetries) - Retrying in ${retryAfter}s | request-id: $requestId403Fetch | client-request-id: $clientRequestId"
+ Start-Sleep -Seconds $retryAfter
+ } else {
+ # Max 403 retries exceeded - throw to trigger partition-level retry
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [403-FETCH] Partition $idx/$tot - Max transient 403 fetch retries exceeded ($max403FetchRetries), failing partition for retry with fresh token"
+ $logMsg | Add-Content -Path $logPath -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+
+ Write-Output "[403-MAX] Partition $idx/$tot - Max transient 403 fetch retries exceeded - Failing partition | request-id: $requestId403Fetch | client-request-id: $clientRequestId"
+ throw "403 Forbidden (transient) on fetch - max retries exceeded (partition will retry with fresh token)"
+ }
+ }
+ elseif ($is429Fetch) {
+ $fetchRetries++
+ $telemetry.ThrottledCount++
+ # Respect Retry-After header if present, otherwise use 60s default
+ $retryAfter = 60
+ if ($_.Exception.Response.Headers -and $_.Exception.Response.Headers['Retry-After']) {
+ $retryAfter = [int]$_.Exception.Response.Headers['Retry-After']
+ }
+ $telemetry.RetryAfterTotalSeconds += $retryAfter
+
+ # Log throttling event to user
+ Write-Host "[!] API Rate Limit (429) during record fetch - Partition $idx/$tot Page $($telemetry.PageCount + 1) - Retry in $retryAfter seconds (Attempt $fetchRetries/$maxFetchRetries)" -ForegroundColor Yellow
+
+ Start-Sleep -Seconds $retryAfter
+ }
+ elseif ($isNetworkFetch) {
+ # Network error - check if we're still within the outage tolerance window
+ if (-not $fetchNetworkErrorStart) {
+ $fetchNetworkErrorStart = Get-Date
+ # Log to file only (no terminal spam)
+ Write-Output "[NETWORK] Partition $idx/$tot Page $($telemetry.PageCount + 1) - $fetchErrorSummary - Starting retry window (max ${maxOutageMinutes}m)"
+ }
+
+ # Calculate elapsed outage time
+ $fetchElapsedOutageSeconds = ((Get-Date) - $fetchNetworkErrorStart).TotalSeconds
+
+ if ($fetchElapsedOutageSeconds -lt $maxNetworkOutageSeconds) {
+ $fetchRemainingMinutes = [Math]::Ceiling(($maxNetworkOutageSeconds - $fetchElapsedOutageSeconds) / 60)
+ $fetchRetryDelay = 30 + (Get-Random -Minimum 10 -Maximum 30) # 30-60s random delay
+
+ # Suppress subsequent retry messages to terminal (first error already shown)
+ # Log to file for troubleshooting
+ $fetchElapsedFormatted = [Math]::Round($fetchElapsedOutageSeconds, 1)
+ Write-Output "[NETWORK] Retry attempt for partition $idx/$tot Page $($telemetry.PageCount + 1) (${fetchElapsedFormatted}s elapsed) : $fetchErrorSummary"
+
+ Start-Sleep -Seconds $fetchRetryDelay
+ }
+ else {
+ # Network outage exceeded tolerance
+ $fetchOutageMinutes = [Math]::Round($fetchElapsedOutageSeconds / 60, 1)
+ Write-Host "[ERROR] Partition $idx/$tot - Record fetch failed: Network outage exceeded $maxOutageMinutes minute tolerance (${fetchOutageMinutes}m elapsed)" -ForegroundColor Red
+ throw "Network outage exceeded $maxOutageMinutes minute tolerance during record fetch"
+ }
+ }
+ else {
+ throw # Re-throw non-throttling, non-network errors
+ }
+ }
+ }
+
+ if (-not $fetchSuccess) {
+ throw "Failed to fetch records after $maxFetchRetries throttle retries"
+ }
+
+ # Track page retrieval
+ $telemetry.PageCount++
+ if ($telemetry.PageCount -eq 1) {
+ $telemetry.FirstPageAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
+ }
+ $telemetry.LastPageAt = (Get-Date).ToString('yyyy-MM-dd HH:mm:ss')
+
+ if ($recordsResponse.value) {
+ foreach ($record in $recordsResponse.value) {
+ # Normalize to EOM-compatible format inline
+ # PERF: Store _ParsedAuditData to avoid re-parsing JSON during explosion
+ # NOTE: Using InvariantCulture directly here since Parse-DateSafe isn't available in ThreadJob scope
+ $normalized = [PSCustomObject]@{
+ RecordType = $record.auditLogRecordType
+ CreationDate = if ($record.createdDateTime) { try { [datetime]::Parse($record.createdDateTime, [System.Globalization.CultureInfo]::InvariantCulture) } catch { $null } } else { $null }
+ UserIds = $record.userPrincipalName
+ Operations = $record.operation
+ AuditData = if ($record.auditData) { $record.auditData | ConvertTo-Json -Depth 100 -Compress } else { '{}' }
+ _ParsedAuditData = $record.auditData # Already-parsed object from Graph API
+ ResultIndex = $allRecords.Count + 1
+ ResultCount = 1
+ Identity = $record.id
+ IsValid = $true
+ ObjectState = 'Unchanged'
+ } # Apply UserIds filter if specified
+ $includeRecord = $true
+ if ($userIds -and $userIds.Count -gt 0) {
+ $includeRecord = $userIds -contains $normalized.UserIds
+ }
+
+ if ($includeRecord) {
+ $allRecords += $normalized
+ }
+
+ # Only break if resultSize > 0 and we've reached the limit
+ if ($resultSize -gt 0 -and $allRecords.Count -ge $resultSize) {
+ break
+ }
+ }
+ }
+
+ # Check for next page
+ $recordsUri = if ($recordsResponse.'@odata.nextLink') { $recordsResponse.'@odata.nextLink' } else { $null }
+ }
+ }
+ catch {
+ # Check if this is a CREATE-FAILED error bubbling up (not a fetch error)
+ $errorMsg = $_.Exception.Message
+ if ($errorMsg -match '^\[CREATE-FAILED\]') {
+ # Re-throw CREATE errors as-is - they should propagate out of the scriptblock
+ throw
+ }
+
+ # This catch is for unexpected errors during FETCH phase (not throttling or network errors, which are handled above)
+ # Examples: JSON parsing failures, unexpected response format, etc.
+ $unexpectedError = $errorMsg
+ $unexpectedStack = $_.ScriptStackTrace
+
+ # Increment retry counter and check if retries remain
+ $fetchErrorRetryCount++
+
+ if ($fetchErrorRetryCount -lt $maxFetchErrorRetries) {
+ # Retries remain - log and retry the same page
+ Write-Output "[FETCH-RETRY] Partition $idx/$tot - Unexpected error ($fetchErrorRetryCount/$maxFetchErrorRetries) - Retrying in 30s: $unexpectedError"
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [FETCH-RETRY] Partition $idx/$tot (Query $queryId) - Retry $fetchErrorRetryCount/$maxFetchErrorRetries for: $unexpectedError"
+ $logMsg | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {}
+ Start-Sleep -Seconds 30
+ continue # Retry pagination loop with same $recordsUri
+ }
+
+ # All retries exhausted - fail the partition
+ $unexpectedProcessingError = $true
+ $unexpectedProcessingMessage = $unexpectedError
+
+ # Output error message (will be deduplicated by parent)
+ Write-Output "[ERROR] Partition $idx/$tot - Unexpected error during record processing after $fetchErrorRetryCount retries - will retry at end of run"
+
+ # Full error details to ERROR stream (will be captured by main thread)
+ Write-Error "[ERROR] Partition $idx/$tot (Query $queryId) - Unexpected record processing error: $unexpectedError`n Stack: $unexpectedStack" -ErrorAction Continue
+ # Thread-safe file logging
+ try {
+ $logMsg = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] [ERROR] Partition $idx/$tot (Query $queryId) - Unexpected record processing error after $fetchErrorRetryCount retries: $unexpectedError"
+ $logMsg | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
+ " Stack trace: $unexpectedStack" | Add-Content -Path $using:LogFile -Encoding UTF8 -ErrorAction SilentlyContinue
+ } catch {
+ # Ignore logging errors in job
+ }
+
+ # Break pagination loop - retries exhausted
+ break
+ }
+ $t1 = Get-Date
+
+ # Finalize telemetry
+ $telemetry.RowCount = $allRecords.Count
+ $telemetry.ElapsedMinutes = [Math]::Round(($t1 - $t0).TotalMinutes, 2)
+
+ # POST-FETCH 10K LIMIT DETECTION: Only applies to EOM mode (resultSize > 0)
+ if ($resultSize -gt 0 -and $allRecords.Count -eq 10000) {
+ $partitionHours = ($pEnd - $pStart).TotalHours
+ $minSubdivisionDays = 0.001389 # 2 minutes
+ $minSubdivisionHours = $minSubdivisionDays * 24
+
+ if ($partitionHours -gt $minSubdivisionHours) {
+ # Partition can be subdivided - flag for subdivision
+ $telemetry.PostFetch10KLimit = $true
+ $telemetry.SubdivisionReason = "postfetch_10k_limit"
+ $script:Hit10KLimit = $true
+
+ Write-Host "[SUBDIVISION] Partition $idx/$tot - Fetched exactly 10,000 records (EOM limit reached) - Needs subdivision ($([Math]::Round($partitionHours,2))h window)" -ForegroundColor Yellow
+
+ # Return subdivision signal
+ return [PSCustomObject]@{
+ QueryId = $queryId
+ Status = 'needs_subdivision'
+ PreviewCount = 10000
+ PartitionStart = $pStart
+ PartitionEnd = $pEnd
+ PartitionIndex = $idx
+ PartitionTotal = $tot
+ RetrievedCount = 10000
+ Telemetry = $telemetry
+ }
+ } else {
+ Write-Host "[LIMIT] Partition $idx/$tot - Fetched 10,000 records at minimum subdivision window ($([Math]::Round($partitionHours,2))h, cannot subdivide further)" -ForegroundColor Yellow
+ $script:Hit10KLimit = $true
+ }
+ }
+ # POST-FETCH 1M LIMIT DETECTION: Graph API has 1,000,000 record limit per query
+ elseif ($resultSize -eq 0 -and $allRecords.Count -ge 1000000) {
+ $partitionHours = ($pEnd - $pStart).TotalHours
+ $minSubdivisionDays = 0.001389 # 2 minutes
+ $minSubdivisionHours = $minSubdivisionDays * 24
+
+ if ($partitionHours -gt $minSubdivisionHours) {
+ # Partition can be subdivided - flag for subdivision
+ $telemetry.PostFetch1MLimit = $true
+ $telemetry.SubdivisionReason = "postfetch_1m_limit"
+ $script:Hit1MLimit = $true
+
+ Write-Host "[SUBDIVISION] Partition $idx/$tot - Fetched 1,000,000 records (Graph API limit reached) - Needs subdivision ($([Math]::Round($partitionHours,2))h window)" -ForegroundColor Yellow
+
+ # Return subdivision signal
+ return [PSCustomObject]@{
+ QueryId = $queryId
+ Status = 'needs_subdivision'
+ PreviewCount = 1000000
+ PartitionStart = $pStart
+ PartitionEnd = $pEnd
+ PartitionIndex = $idx
+ PartitionTotal = $tot
+ RetrievedCount = 1000000
+ Telemetry = $telemetry
+ }
+ } else {
+ Write-Host "[LIMIT] Partition $idx/$tot - Fetched 1,000,000 records at minimum subdivision window ($([Math]::Round($partitionHours,2))h, cannot subdivide further)" -ForegroundColor Yellow
+ $script:Hit1MLimit = $true
+ }
+ }
+
+ if ($unexpectedProcessingError) {
+ $telemetry.Status = 'failed'
+ # Clean up Purview query to free server slot after all fetch retries exhausted
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ Write-Output "[CLEANUP] Partition $idx/$tot - Deleted query $queryId after $fetchErrorRetryCount failed fetch retries"
+ } catch {
+ # Silently continue - cleanup failure shouldn't block retry
+ }
+ }
+ throw [System.Exception]::new("Unexpected record processing error: $unexpectedProcessingMessage")
+ }
+
+ # Emit success notification for outer monitor (display handled once outside the job)
+ Write-Output "[SUCCESS] Query succeeded - Partition $idx/$tot - Query ID: $queryId - Retrieved $($allRecords.Count) records"
+
+ # Clean up query after successful record retrieval (best-effort)
+ if ($queryId) {
+ try {
+ Invoke-RestMethod -Method DELETE -Uri (Get-AuditUri -path "queries/$queryId") `
+ -Headers (Get-CurrentHeaders -ClientRequestId $clientRequestId) -ErrorAction SilentlyContinue | Out-Null
+ } catch {
+ # Silently continue - cleanup failure shouldn't block results
+ }
+ }
+
+ # Return results with telemetry
+ [pscustomobject]@{
+ Activity = $activity
+ Logs = $allRecords
+ RetrievedCount = $allRecords.Count
+ ElapsedMs = [int]($t1 - $t0).TotalMilliseconds
+ Partition = $idx
+ Total = $tot
+ QueryId = $queryId
+ DebugInfo = $debugInfo
+ Telemetry = $telemetry
+ }
+ }
+ }
+ # Skip parallel execution machinery when sequential mode is active
+ # The sequential fallback (if -not $canParallel) handles processing below
+ if (-not $canParallel) {
+ # Skip to sequential fallback - set flag to bypass the while loop
+ $allPartitionsProcessed = $true
+ }
+
+ # Diagnostic: Show concurrency settings before creating jobs (only for parallel mode)
+ if ($canParallel) {
+ $diagMsg = "[CONCURRENCY] Partitions=$($partitions.Count) MaxConcurrency=$MaxConcurrency"
+ Write-LogHost $diagMsg -ForegroundColor Cyan
+ }
+
+ # Initialize job result tracking
+ $script:processedJobIds = New-Object System.Collections.Generic.HashSet[int]
+
+ # Outer loop: Continue creating jobs until all partitions (including subdivided ones) are processed
+ # This handles dynamic subdivision where new partitions are added during execution
+ $subdivisionPass = 0
+ if (-not $allPartitionsProcessed) { $allPartitionsProcessed = $false }
+
+ while (-not $allPartitionsProcessed) {
+ $subdivisionPass++
+
+ # Find partitions that need jobs created (haven't been processed yet)
+ # FIX E: Include 'Failed' status to enable retry of failed partitions
+ # For Failed partitions, we allow retry even if they're in partitionsWithJobs (their previous job failed)
+ $pendingPartitions = @($partitions | Where-Object {
+ $statusObj = $script:partitionStatus[$_.Index]
+ if (-not $statusObj) { return $false }
+ if ($statusObj.Status -eq 'NotStarted' -and -not $script:partitionsWithJobs.Contains($_.Index)) { return $true }
+ if ($statusObj.Status -eq 'Failed') { return $true } # Allow retry of failed partitions
+ return $false
+ })
+
+ if ($pendingPartitions.Count -eq 0) {
+ # No new partitions to process, we're done
+ $allPartitionsProcessed = $true
+ break
+ }
+
+ if ($subdivisionPass -gt 1) {
+ Write-LogHost "" -ForegroundColor Yellow
+ Write-LogHost "=== Subdivision Pass $subdivisionPass ===" -ForegroundColor Yellow
+ Write-LogHost "Processing $($pendingPartitions.Count) new sub-partitions from previous subdivisions..." -ForegroundColor Yellow
+ }
+
+ # Create initial jobs for all PENDING partitions with backpressure
+ foreach ($pt in $pendingPartitions) {
+ # ============================================================
+ # PROACTIVE TOKEN REFRESH: Check before each job creation
+ # Job launch phase can take 20-35+ minutes with 60 partitions
+ # Token may expire during this phase if not refreshed
+ # ============================================================
+ $refreshResult = Refresh-GraphTokenIfNeeded -BufferMinutes 5
+ # CRITICAL: Use -is [string] check to avoid PowerShell coercion bug where $true -eq 'Quit' returns True
+ if ($refreshResult -is [string] -and $refreshResult -eq 'Quit') {
+ # User chose to quit at auth prompt - save checkpoint and exit gracefully
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # ============================================================
+ # CHECKPOINT: Reactive token refresh for auth failures (401s)
+ # This ALWAYS runs when AuthFailureDetected is true (not gated by CheckpointEnabled)
+ # AppRegistration: automatic silent refresh (headless)
+ # Interactive modes: user can wait at R/Q prompt indefinitely
+ # ============================================================
+ if (Test-ShouldPromptTokenRefresh) {
+ # AppRegistration: Try automatic silent refresh first
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false
+ Write-LogHost " [AUTH] Token refreshed automatically (AppRegistration)" -ForegroundColor Green
+ } elseif ($Force) {
+ # -Force mode: FATAL exit (true headless operation)
+ Write-LogHost " [AUTH] FATAL: AppRegistration token refresh failed (-Force mode)" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ } else {
+ # No -Force: fall back to interactive prompt
+ Write-LogHost " [AUTH] Silent refresh failed - falling back to interactive prompt" -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+ }
+ } else {
+ # Interactive modes: prompt user
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ # User chose to quit - save checkpoint if enabled and exit
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+ }
+ # Proceed with fresh token
+ }
+
+ # Backpressure: Wait for a slot if we've reached MaxConcurrency
+ # Count only non-completed jobs (Running or NotStarted)
+ $activeJobs = @($jobs | Where-Object { $_.State -in 'Running','NotStarted' })
+ while ($activeJobs.Count -ge $MaxConcurrency) {
+ Write-Verbose "[BACKPRESSURE] Waiting for job slot (active: $($activeJobs.Count)/$MaxConcurrency)..." -Verbose:$VerbosePreference
+
+ # PROACTIVE TOKEN REFRESH: Check while waiting for job slots
+ $refreshResult = Refresh-GraphTokenIfNeeded -BufferMinutes 5
+ # CRITICAL: Use -is [string] check to avoid PowerShell coercion bug where $true -eq 'Quit' returns True
+ if ($refreshResult -is [string] -and $refreshResult -eq 'Quit') {
+ # User chose to quit at auth prompt - save checkpoint and exit gracefully
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # Track when backpressure started (for server-side slot exhaustion detection)
+ if (-not $script:backpressureStartTime) { $script:backpressureStartTime = Get-Date }
+
+ # STATUS UPDATE: Show periodic status during backpressure wait
+ if (-not $script:lastBackpressureStatus) { $script:lastBackpressureStatus = Get-Date }
+ $backpressureElapsed = ((Get-Date) - $script:lastBackpressureStatus).TotalSeconds
+ if ($backpressureElapsed -ge 60) {
+ $completedCount = @($jobs | Where-Object { $_.State -eq 'Completed' }).Count
+ $runningCount = @($jobs | Where-Object { $_.State -eq 'Running' }).Count
+ # Count partitions that have actually sent queries (have QueryId)
+ $sentToServerCount = @($script:partitionStatus.Values | Where-Object { $_.QueryId }).Count
+ $ts = Get-Date -Format 'HH:mm:ss'
+ Write-LogHost "[STATUS] [$ts] Partitions (Queries): $runningCount active | $sentToServerCount sent | $completedCount/$totalPartitions complete" -ForegroundColor Yellow
+ $script:lastBackpressureStatus = Get-Date
+ }
+
+ # SERVER-SIDE SLOT EXHAUSTION DETECTION: Check if we're stuck because Purview has existing queries
+ # Only show once, after 3 minutes of backpressure, if SENT count < MaxConcurrency
+ if (-not $script:serverSlotWarningShown) {
+ $backpressureDuration = ((Get-Date) - $script:backpressureStartTime).TotalMinutes
+ $sentToServerCount = @($script:partitionStatus.Values | Where-Object { $_.QueryId }).Count
+ if ($backpressureDuration -ge 3 -and $sentToServerCount -lt $MaxConcurrency) {
+ $missingSlots = $MaxConcurrency - $sentToServerCount
+ $ts = Get-Date -Format 'HH:mm:ss'
+ Write-LogHost "" -ForegroundColor Cyan
+ Write-LogHost "[$ts] [INFO] Server-side query limit may be reached. Only $sentToServerCount of $MaxConcurrency queries sent to Purview after 3+ minutes." -ForegroundColor Cyan
+ Write-LogHost " This may indicate $missingSlots existing query/queries (from previous runs or the Purview portal) are consuming server slots." -ForegroundColor Cyan
+ Write-LogHost " Check Purview portal -> Audit -> search jobs for stuck/running queries." -ForegroundColor Cyan
+ Write-LogHost " You can cancel previous jobs if they aren't needed for anything else or for anyone else." -ForegroundColor Cyan
+ Write-LogHost "" -ForegroundColor Cyan
+ $script:serverSlotWarningShown = $true
+ }
+ }
+
+ # REACTIVE AUTH CHECK: Handle 401 errors during backpressure wait
+ # AppRegistration: automatic silent refresh (headless)
+ # Interactive modes: prompt user for re-authentication
+ if ($script:AuthFailureDetected) {
+ Write-LogHost "" -ForegroundColor Red
+ Write-LogHost " [AUTH] 401 detected during job launch - initiating token refresh..." -ForegroundColor Red
+
+ # AppRegistration: Use automatic silent refresh (no user interaction)
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false
+ # Update shared auth state for thread jobs
+ $script:SharedAuthState.Token = $refreshResult.NewToken
+ $script:SharedAuthState.ExpiresOn = (Get-Date).ToUniversalTime().AddMinutes(50)
+ $script:SharedAuthState.LastRefresh = Get-Date
+ Write-LogHost " [AUTH] Token refreshed automatically (AppRegistration)" -ForegroundColor Green
+ } elseif ($Force) {
+ # -Force mode: FATAL exit (true headless operation)
+ Write-LogHost " [AUTH] FATAL: AppRegistration token refresh failed (-Force mode)" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to authentication failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ } else {
+ # No -Force: fall back to interactive prompt
+ Write-LogHost " [AUTH] Silent refresh failed - falling back to interactive prompt" -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ # Update shared auth state for thread jobs
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo) {
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ }
+ }
+ } else {
+ # Interactive modes: prompt user
+ $refreshResult = Invoke-TokenRefreshPrompt
+ if ($refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ # Update shared auth state for thread jobs
+ $tokenInfo = Get-GraphAccessTokenWithExpiry
+ if ($tokenInfo) {
+ $script:SharedAuthState.Token = $tokenInfo.Token
+ $script:SharedAuthState.ExpiresOn = $tokenInfo.ExpiresOn
+ $script:SharedAuthState.LastRefresh = Get-Date
+ }
+ }
+ Write-LogHost " [AUTH] Token refreshed - resuming job launch" -ForegroundColor Green
+ }
+
+ Start-Sleep -Milliseconds 500
+
+ # Collect output from active jobs while waiting (with error detection)
+ # FIX: Process ALL jobs including completed ones to ensure JSONL gets saved
+ foreach ($activeJob in $activeJobs) {
+ if ($script:AuthFailureDetected) { break } # IMMEDIATE EXIT on auth failure
+ # REMOVED: if ($activeJob.State -eq 'Completed') { continue }
+ # Completed jobs MUST be processed to get their result objects for JSONL save
+ try {
+ # NO -Keep: process result objects fully here (JSONL save, checkpoint)
+ $waitOutput = Receive-Job -Job $activeJob -ErrorAction SilentlyContinue -ErrorVariable backpressureJobErrors
+
+ # Check for 401 errors in job output
+ if ($backpressureJobErrors) {
+ foreach ($err in $backpressureJobErrors) {
+ $errMsg = if ($err.Exception) { $err.Exception.Message } else { $err.ToString() }
+ if ($errMsg -match '401|Unauthorized|token.*expired') {
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected - stopping to re-authenticate" -ForegroundColor Red
+ }
+ break # IMMEDIATE EXIT
+ }
+ }
+ }
+ if ($script:AuthFailureDetected) { break } # Exit if flag was set
+ if ($waitOutput) {
+ foreach ($output in $waitOutput) {
+ if ($script:AuthFailureDetected) { break } # IMMEDIATE EXIT
+ if ($output -is [string]) {
+ $msgKey = "$($activeJob.Id):$output"
+ $color = $null
+ if ($output -match '^\[ATTEMPT\]') {
+ $msgKey = "$($activeJob.Id):ATTEMPT"
+ $color = 'DarkGray'
+ }
+ elseif ($output -match '^\[SENT\]') {
+ $color = 'DarkGray'
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ if ($output -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $jobPartition = $jobMeta[$activeJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].QueryId = $extractedQueryId
+
+ # CHECKPOINT: Save QueryCreated state so we can resume data fetch if interrupted
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $jobPartition.Index -QueryId $extractedQueryId -State 'QueryCreated'
+ }
+ }
+ }
+ }
+ elseif ($output -match '^\[ERROR\]') {
+ $color = 'Red'
+ # Check for 401 in error messages from thread jobs
+ if ($output -match '401|Unauthorized') {
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected in job output" -ForegroundColor Red
+ }
+ # CRITICAL: Mark this partition as Failed for retry after re-auth
+ $jobPartition = $jobMeta[$activeJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Failed'
+ $script:partitionStatus[$jobPartition.Index].LastError = '401 Unauthorized - token expired'
+ }
+ break # IMMEDIATE EXIT - don't process more output
+ } else {
+ # FIX: Non-401 [ERROR] messages (e.g., "Unexpected error during record processing")
+ # must also mark the partition as Failed for retry
+ $jobPartition = $jobMeta[$activeJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Failed'
+ $script:partitionStatus[$jobPartition.Index].LastError = $output
+ Write-LogHost " [RETRY-QUEUE] Partition $($jobPartition.Index)/$($jobPartition.Total) queued for retry at end of run" -ForegroundColor Yellow
+ }
+ }
+ }
+ elseif ($output -match '^\[403-(CREATE|POLL|FETCH)\]') {
+ # Transient 403 retry messages - dedupe by partition+attempt
+ $msgKey = "$($activeJob.Id):403:$output"
+ $color = 'Magenta'
+ }
+ elseif ($output -match '^\[403-(PERM|MAX)\]') {
+ # Permanent/max retry 403 messages
+ $msgKey = "$($activeJob.Id):403:$output"
+ $color = 'Red'
+ }
+ elseif ($output -match '^\[STATUS\] Query running') {
+ $msgKey = "$($activeJob.Id):STATUS"
+ $color = 'Yellow'
+ }
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($activeJob.Id):SUCCESS"
+ $color = 'Green'
+ }
+ if ($color -and -not $script:shownJobMessages.ContainsKey($msgKey)) {
+ $message = if ($output -match '^\[(STATUS|SUCCESS)\]\s*') {
+ $output -replace '^\[STATUS\]\s*','' -replace '^\[SUCCESS\]\s*',''
+ } else {
+ $output
+ }
+ Write-LogHost $message -ForegroundColor $color
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -isnot [string] -and -not $script:processedJobIds.Contains($activeJob.Id)) {
+ # FULL RESULT PROCESSING: Handle result objects immediately to ensure JSONL save
+ $jobPartition = $jobMeta[$activeJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $currentStatus = $script:partitionStatus[$jobPartition.Index].Status
+ if ($currentStatus -ne 'Complete') {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Complete'
+ $script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
+ $script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
+
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($activeJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($jobPartition.Index)/$($jobPartition.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip when memory flush enabled - data goes to JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
+ $allLogs.AddRange($output.Logs)
+ }
+
+ # Update metrics
+ try {
+ $script:metrics.QueryMs += [int]$output.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$output.RetrievedCount
+ } catch {}
+
+ # Save checkpoint
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $jobPartition.Index -QueryId $output.QueryId -State 'Completed' -RecordCount $output.RetrievedCount
+ }
+
+ # INCREMENTAL SAVE: Write JSONL immediately
+ if ($output.Logs -and $output.Logs.Count -gt 0) {
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
+ $output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Release source reference to allow GC to reclaim memory
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: When enabled, we skip AddRange and use JSONL-only path
+ # Set memoryFlushed flag to signal streaming export
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (limit: $($script:ResolvedMaxMemoryMB)MB)" -ForegroundColor Yellow
+ }
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Mark job as processed
+ [void]$script:processedJobIds.Add($activeJob.Id)
+ }
+ }
+ }
+ }
+ }
+ } catch {}
+ }
+
+ # Recount active jobs
+ $activeJobs = @($jobs | Where-Object { $_.State -in 'Running','NotStarted' })
+ }
+
+ # Create the job now that we have a slot
+ # Graph API: MaxRecords=0 (unlimited) - 10K limit only applies to EOM mode
+ $graphResultSize = if ($UseEOM) { $ResultSize } else { 0 }
+
+ # For fetch-only partitions (resume mode), pass stored QueryId to skip query creation
+ $existingQueryIdForJob = if ($pt.StoredQueryId) { $pt.StoredQueryId } else { $null }
+ if ($existingQueryIdForJob) {
+ Write-LogHost "[RESUME] Partition $($pt.Index)/$($pt.Total) - Using stored QueryId: $existingQueryIdForJob" -ForegroundColor Yellow
+ }
+
+ $job = Start-ThreadJob -ThrottleLimit $maxConcurrentPartitions -ScriptBlock $queryJobScriptBlock -ArgumentList $pt.PStart, $pt.PEnd, $activities, $graphResultSize, $UserIds, $pt.Index, $pt.Total, $script:SharedAuthState, $pt, $MaxNetworkOutageMinutes, $script:GraphAuditApiVersion, $script:LogFile, $existingQueryIdForJob
+ $jobs += $job
+ $jobMeta[$job.Id] = $pt
+
+ # Mark this partition as having a job created for it
+ [void]$script:partitionsWithJobs.Add($pt.Index)
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'JobCreated'
+ }
+
+ # Log each job creation to terminal
+ $createdTimestamp = Get-Date -Format 'HH:mm:ss'
+ Write-LogHost "[CREATED] [$createdTimestamp] Partition $($pt.Index)/$($pt.Total) - Job created" -ForegroundColor DarkGray
+
+ # Poll for output from this job for up to 10 seconds (Purview API can have unpredictable latency)
+ Start-Sleep -Milliseconds 50 # Small initial delay to let job start
+ $pollAttempts = 0
+ $maxPollAttempts = 100 # 100 x 100ms = 10 seconds
+ $gotSentMessage = $false
+
+ while ($pollAttempts -lt $maxPollAttempts -and -not $gotSentMessage) {
+ Start-Sleep -Milliseconds 100
+ $pollAttempts++
+
+ if ($job.State -eq 'Completed') { break }
+
+ try {
+ # NO -Keep here: immediate polling is just waiting for [SENT], job hasn't completed yet
+ # Result objects only appear after job completion, which happens later in main monitoring loop
+ $immediateOutput = Receive-Job -Job $job -ErrorAction SilentlyContinue
+ if ($immediateOutput) {
+ foreach ($output in $immediateOutput) {
+ if ($output -is [string]) {
+ $msgKey = "$($job.Id):$output"
+ if ($output -match '^\[ATTEMPT\]') {
+ $msgKey = "$($job.Id):ATTEMPT"
+ }
+
+ if ($output -match '^\[ATTEMPT\]') {
+ # Always show ATTEMPT messages (retries)
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[SENT\]') {
+ # Only show SENT once globally
+ $msgKey = "$($job.Id):$output"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ # Format: [SENT] [HH:mm:ss] Partition X/Y - Query sent to Purview (QueryId: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
+ if ($output -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $jobPartition = $jobMeta[$job.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].QueryId = $extractedQueryId
+ Write-Verbose "[QUERYID-CAPTURED] Partition $($jobPartition.Index) QueryId=$extractedQueryId" -Verbose:$VerbosePreference
+
+ # CHECKPOINT: Save QueryCreated state so we can resume data fetch if interrupted
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $jobPartition.Index -QueryId $extractedQueryId -State 'QueryCreated'
+ }
+ }
+ }
+ } else {
+ Write-Verbose "DEDUP: Already shown - JobId=$($job.Id), Msg=$output" -Verbose:$VerbosePreference
+ }
+ $gotSentMessage = $true
+ }
+ elseif ($output -match '^\[ERROR\]') {
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor Red
+ $script:shownJobMessages[$msgKey] = $true
+ # Check for 401 in error messages
+ if ($output -match '401|Unauthorized') {
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected in job output" -ForegroundColor Red
+ }
+ # CRITICAL: Mark this partition as Failed for retry after re-auth
+ $jobPartition = $jobMeta[$job.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Failed'
+ $script:partitionStatus[$jobPartition.Index].LastError = '401 Unauthorized - token expired'
+ }
+ break # IMMEDIATE EXIT
+ }
+ }
+ $gotSentMessage = $true # Stop polling on error too
+ }
+ elseif ($output -match '^\[403-(CREATE|POLL|FETCH)\]') {
+ # Transient 403 retry messages - dedupe by partition+attempt
+ $msgKey = "$($job.Id):403:$output"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor Magenta
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[403-(PERM|MAX)\]') {
+ # Permanent/max retry 403 messages
+ $msgKey = "$($job.Id):403:$output"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor Red
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[STATUS\] Query running') {
+ $msgKey = "$($job.Id):STATUS"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost ($output -replace '^\[STATUS\]\s*','') -ForegroundColor Yellow
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($job.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost ($output -replace '^\[SUCCESS\]\s*','') -ForegroundColor Green
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ }
+ elseif ($output -isnot [string] -and -not $script:processedJobIds.Contains($job.Id)) {
+ # FULL RESULT PROCESSING: Handle result objects immediately to ensure JSONL save
+ $jobPartition = $jobMeta[$job.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $currentStatus = $script:partitionStatus[$jobPartition.Index].Status
+ if ($currentStatus -ne 'Complete') {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Complete'
+ $script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
+ $script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
+
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
+ $allLogs.AddRange($output.Logs)
+ }
+
+ # Update metrics
+ try {
+ $script:metrics.QueryMs += [int]$output.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$output.RetrievedCount
+ } catch {}
+
+ # Save checkpoint
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $jobPartition.Index -QueryId $output.QueryId -State 'Completed' -RecordCount $output.RetrievedCount
+ }
+
+ # INCREMENTAL SAVE: Write JSONL immediately
+ if ($output.Logs -and $output.Logs.Count -gt 0) {
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
+ $output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Mark job as processed
+ [void]$script:processedJobIds.Add($job.Id)
+ }
+ }
+ }
+ }
+ }
+ } catch {}
+ }
+
+ Write-Verbose "Created job for partition $($pt.Index)/$($pt.Total) - Job ID: $($job.Id)" -Verbose:$VerbosePreference
+
+ # Start monitoring loop in background once first batch is queued
+ # Note: Jobs may still be retrying 403s internally - this just means they've been started
+ if (-not $monitoringStarted -and $jobs.Count -ge $firstBatchSize) {
+ $monitoringStarted = $true
+ # Initialize monitoring state
+ $script:lastStatusUpdate = Get-Date
+ } # Show status updates while creating jobs (if monitoring started)
+ if ($monitoringStarted) {
+ # Collect output from all existing jobs
+ # FIX: Process ALL jobs including completed ones to ensure JSONL gets saved
+ foreach ($existingJob in $jobs) {
+ # REMOVED: if ($existingJob.State -eq 'Completed') { continue }
+ # Completed jobs MUST be processed to get their result objects for JSONL save
+ try {
+ # NO -Keep: process result objects fully here (JSONL save, checkpoint)
+ $jobOutput = Receive-Job -Job $existingJob -ErrorAction SilentlyContinue
+ if ($jobOutput) {
+ foreach ($output in $jobOutput) {
+ if ($output -is [string]) {
+ $msgKey = "$($existingJob.Id):$output"
+ if ($output -match '^\[ATTEMPT\]') {
+ $msgKey = "$($existingJob.Id):ATTEMPT"
+ }
+
+ if ($output -match '^\[ATTEMPT\]') {
+ # Always show ATTEMPT messages (retries)
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[SENT\]') {
+ # Only show SENT once per job
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ if ($output -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $jobPartition = $jobMeta[$existingJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].QueryId = $extractedQueryId
+ }
+ }
+ }
+ elseif ($output -match '^\[ERROR\]') {
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor Red
+ $script:shownJobMessages[$msgKey] = $true
+ # Check for 401 in error messages
+ if ($output -match '401|Unauthorized') {
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected in job output" -ForegroundColor Red
+ }
+ # CRITICAL: Mark this partition as Failed for retry after re-auth
+ $jobPartition = $jobMeta[$existingJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Failed'
+ $script:partitionStatus[$jobPartition.Index].LastError = '401 Unauthorized - token expired'
+ }
+ break # IMMEDIATE EXIT
+ }
+ }
+ }
+ elseif ($output -match '^\[STATUS\] Query running') {
+ $msgKey = "$($existingJob.Id):STATUS"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost ($output -replace '^\[STATUS\]\s*','') -ForegroundColor Yellow
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[NETWORK\]') {
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkYellow
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($existingJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost ($output -replace '^\[SUCCESS\]\s*','') -ForegroundColor Green
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ }
+ }
+ elseif ($output -isnot [string] -and -not $script:processedJobIds.Contains($existingJob.Id)) {
+ # FULL RESULT PROCESSING: Handle result objects immediately to ensure JSONL save
+ $jobPartition = $jobMeta[$existingJob.Id]
+ if ($jobPartition -and $script:partitionStatus.ContainsKey($jobPartition.Index)) {
+ $currentStatus = $script:partitionStatus[$jobPartition.Index].Status
+ if ($currentStatus -ne 'Complete') {
+ $script:partitionStatus[$jobPartition.Index].Status = 'Complete'
+ $script:partitionStatus[$jobPartition.Index].QueryId = $output.QueryId
+ $script:partitionStatus[$jobPartition.Index].RecordCount = $output.RetrievedCount
+
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($existingJob.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($jobPartition.Index)/$($jobPartition.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
+ $allLogs.AddRange($output.Logs)
+ }
+
+ # Update metrics
+ try {
+ $script:metrics.QueryMs += [int]$output.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$output.RetrievedCount
+ } catch {}
+
+ # Save checkpoint
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $jobPartition.Index -QueryId $output.QueryId -State 'Completed' -RecordCount $output.RetrievedCount
+ }
+
+ # INCREMENTAL SAVE: Write JSONL immediately
+ if ($output.Logs -and $output.Logs.Count -gt 0) {
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($jobPartition.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
+ $output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Partition $($jobPartition.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Partition $($jobPartition.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Mark job as processed
+ [void]$script:processedJobIds.Add($existingJob.Id)
+ }
+ }
+ }
+ }
+ }
+ } catch {
+ # Silently continue if we can't receive from a job
+ }
+ }
+
+ $elapsedSinceLastUpdate = ((Get-Date) - $script:lastStatusUpdate).TotalSeconds
+ if ($elapsedSinceLastUpdate -ge 60) {
+ # Count partitions by status (EXCLUDE 'Subdivided' parent partitions from total)
+ $activeStatuses = $script:partitionStatus.Values | Where-Object { $_.Status -ne 'Subdivided' }
+ $completedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'Complete' }).Count
+ $jobCreatedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'JobCreated' }).Count
+ $notStartedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'NotStarted' }).Count
+
+ # Calculate remaining and total (only active partitions)
+ $remainingToComplete = $jobCreatedPartitions + $notStartedPartitions
+ $totalPartitions = $activeStatuses.Count
+
+ $statusLine = "[$(Get-Date -Format 'HH:mm:ss')] Total Queries: $totalPartitions | Completed: $completedPartitions | Remaining: $remainingToComplete"
+ Write-LogHost $statusLine -ForegroundColor White
+
+ $script:lastStatusUpdate = Get-Date
+ }
+ } # Staggered launch with 10-25s jitter to prevent API burst (except for last job)
+ if ($pt.Index -lt $pt.Total) {
+ $staggerDelay = Get-Random -Minimum 10 -Maximum 25
+ Start-Sleep -Seconds $staggerDelay
+ }
+ }
+
+ # All ThreadJobs now launched (note: this only means threads are running, not that queries were created on server)
+ $launchElapsed = [Math]::Round(((Get-Date) - $launchStartTime).TotalSeconds, 1)
+ Write-LogHost " All $($jobs.Count) ThreadJobs launched (${launchElapsed}s), monitoring query creation..." -ForegroundColor DarkCyan
+
+
+ # If monitoring wasn't started earlier (only happens if $partitions.Count < $firstBatchSize)
+ if (-not $monitoringStarted) {
+ $script:throttleNotifications = [System.Collections.Concurrent.ConcurrentQueue[PSObject]]::new()
+ } # Wait for all jobs to complete and display monitoring output
+ $initialBlockSize = if ($script:globalLearnedBlockSize -and $script:globalLearnedBlockSize -gt 0) { $script:globalLearnedBlockSize } else { $BlockHours }
+ if ($initialBlockSize -le 0) { $initialBlockSize = 0.5 }
+
+ # Simple polling to monitor job completion
+ $lastStatusUpdate = Get-Date
+ $firstStatus = $true
+
+ while (($jobs | Where-Object { $_.State -in 'Running','NotStarted' }).Count -gt 0) {
+ # Continuously collect output from all jobs (running and completed)
+ foreach ($job in $jobs) {
+ # Skip jobs we've already processed (Receive-Job can only be called once)
+ if ($script:processedJobIds.Contains($job.Id)) {
+ continue
+ }
+
+ try {
+ $jobOutput = Receive-Job -Job $job -Keep -ErrorAction SilentlyContinue -ErrorVariable jobErrors
+
+ # Capture and log job errors to file (only once per job)
+ if ($jobErrors -and $jobErrors.Count -gt 0) {
+ # Check if we've already logged errors for this job
+ $errorLogKey = "ERRORS:$($job.Id)"
+ if (-not $script:shownJobMessages.ContainsKey($errorLogKey)) {
+ $script:shownJobMessages[$errorLogKey] = $true
+ foreach ($err in $jobErrors) {
+ $errMsg = if ($err.Exception) { $err.Exception.Message } else { $err.ToString() }
+ Write-Log "Job $($job.Id) error: $errMsg" -Level "ERROR"
+
+ # DIFFERENTIATE 401 vs 403 ERRORS
+ # 401 = Token expired/invalid → refresh will help
+ # 403 = Permission denied → refresh will NOT help
+ if ($errMsg -match '403|Forbidden|Access.*denied|Insufficient.*privileges') {
+ # 403 Forbidden - permissions issue, NOT token expiration
+ # Don't set AuthFailureDetected - refresh won't help
+ Write-LogHost " [AUTH] 403 Forbidden detected - this is a PERMISSIONS issue, not token expiration" -ForegroundColor Red
+ Write-LogHost " [AUTH] Token refresh will NOT resolve this. Check:" -ForegroundColor Yellow
+ Write-LogHost " • AuditLog.Read.All scope is granted" -ForegroundColor Yellow
+ Write-LogHost " • Admin consent has been provided" -ForegroundColor Yellow
+ Write-LogHost " • Required Azure AD role is assigned" -ForegroundColor Yellow
+ }
+ elseif ($errMsg -match '401|Unauthorized|token.*expired|authentication.*failed') {
+ # 401 Unauthorized - token issue, refresh will help
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected - stopping to re-authenticate" -ForegroundColor Red
+ }
+ break # IMMEDIATE EXIT from job error processing
+ }
+ }
+
+ # Mark partition as Failed for retry when errors detected
+ $pt = $jobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $currentStatus = $script:partitionStatus[$pt.Index].Status
+ if ($currentStatus -notin 'Complete', 'Failed', 'Subdivided') {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $jobErrors[0].Exception.Message
+ Write-LogHost " [RETRY-QUEUE] Partition $($pt.Index)/$($pt.Total) marked as Failed for retry (from monitoring loop)" -ForegroundColor Yellow
+ }
+ }
+ }
+ }
+
+ if ($jobOutput) {
+ foreach ($output in $jobOutput) {
+ if ($output -is [string]) {
+ # Create unique key for deduplication
+ $msgKey = "$($job.Id):$output"
+ if ($output -match '^\[ATTEMPT\]') {
+ $msgKey = "$($job.Id):ATTEMPT"
+ }
+ elseif ($output -match '^\[STATUS\] Query running') {
+ $msgKey = "$($job.Id):STATUS"
+ }
+ elseif ($output -match '^\[SUCCESS\]') {
+ $msgKey = "$($job.Id):SUCCESS"
+ }
+ elseif ($output -match '^\[403-CREATE\]|^\[403-FETCH\]') {
+ # Use the full output as key to deduplicate identical 403 messages
+ # This prevents "Attempt 2/3" from repeating but allows different attempts to show
+ $msgKey = "$($job.Id):$output"
+ }
+ elseif ($output -match '^\[NETWORK\]') {
+ # NETWORK messages include changing elapsed time - deduplicate by partition/page only
+ # Extract partition and page info for stable key
+ if ($output -match 'Partition (\d+/\d+).*Page (\d+)') {
+ $msgKey = "$($job.Id):NETWORK:$($matches[1]):Page$($matches[2])"
+ } elseif ($output -match 'Partition (\d+/\d+)') {
+ $msgKey = "$($job.Id):NETWORK:$($matches[1])"
+ } else {
+ $msgKey = "$($job.Id):NETWORK"
+ }
+ }
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ $script:shownJobMessages[$msgKey] = $true
+
+ # Display messages with appropriate colors
+ if ($output -match '^\[ATTEMPT\]') {
+ Write-LogHost $output -ForegroundColor Cyan
+ }
+ elseif ($output -match '^\[SENT\]') {
+ Write-LogHost $output -ForegroundColor DarkGray
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ if ($output -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $pt = $jobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].QueryId = $extractedQueryId
+ }
+ }
+ }
+ elseif ($output -match '^\[ERROR\]') {
+ Write-LogHost $output -ForegroundColor Red
+ # Check for 401 in error messages
+ if ($output -match '401|Unauthorized') {
+ $script:AuthFailureDetected = $true
+ if (-not $script:Auth401MessageShown) {
+ $script:Auth401MessageShown = $true
+ Write-LogHost " [AUTH] 401 Unauthorized detected in job output" -ForegroundColor Red
+ }
+ # CRITICAL: Mark this partition as Failed for retry after re-auth
+ $pt = $jobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = '401 Unauthorized - token expired'
+ }
+ break # IMMEDIATE EXIT
+ }
+ }
+ elseif ($output -match '^\[403-CREATE\]|^\[403-FETCH\]') {
+ Write-LogHost $output -ForegroundColor Yellow
+ }
+ elseif ($output -match '^\[NETWORK\]') {
+ Write-LogHost $output -ForegroundColor Yellow
+ }
+ elseif ($output -match '^\[STATUS\] Query running') {
+ Write-LogHost ($output -replace '^\[STATUS\]\s*','') -ForegroundColor Yellow
+ }
+ elseif ($output -match '^\[SUCCESS\]') {
+ Write-LogHost ($output -replace '^\[SUCCESS\]\s*','') -ForegroundColor Green
+ }
+ }
+ }
+ elseif ($output -isnot [string]) {
+ # This is a result object - mark partition as Complete and collect logs immediately
+ $pt = $jobMeta[$job.Id]
+ if ($pt) {
+
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $currentStatus = $script:partitionStatus[$pt.Index].Status
+
+ if ($currentStatus -ne 'Complete') {
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].QueryId = $output.QueryId
+ $script:partitionStatus[$pt.Index].RecordCount = $output.RetrievedCount
+
+ # Fallback: emit SUCCESS message if the original [SUCCESS] string was already consumed
+ $successKey = "$($job.Id):SUCCESS"
+ if (-not $script:shownJobMessages.ContainsKey($successKey)) {
+ $script:shownJobMessages[$successKey] = $true
+ Write-LogHost "Query succeeded - Partition $($pt.Index)/$($pt.Total) - Query ID: $($output.QueryId) - Retrieved $($output.RetrievedCount) records" -ForegroundColor Green
+ }
+
+ # Add logs to collection (skip if memory flush enabled - using JSONL only)
+ # Receive-Job can only be called once, so we must collect now
+ if ($output.Logs -and $output.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
+ $allLogs.AddRange($output.Logs)
+ }
+
+ # Update aggregate metrics only (per-record activity breakdown happens in explosion phase)
+ # CRITICAL: Do NOT iterate through logs here - it blocks the monitoring loop
+ try {
+ $script:metrics.QueryMs += [int]$output.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$output.RetrievedCount
+ } catch {}
+
+ # Mark partition complete in checkpoint (so Ctrl+C shows accurate count)
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $pt.Index -QueryId $output.QueryId -State 'Completed' -RecordCount $output.RetrievedCount
+ }
+
+ # INCREMENTAL SAVE: Write partition records to disk immediately (prevents data loss on auth failure)
+ if ($output.Logs -and $output.Logs.Count -gt 0) {
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($output.QueryId)_$($output.RetrievedCount)records.jsonl"
+ # Write as JSON Lines (NDJSON) - one record per line for recoverability
+ $output.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Partition $($pt.Index): $($output.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+
+ # Clear reference to allow GC (critical for memory management)
+ $output.Logs = $null
+
+ # MEMORY MANAGEMENT: Mark if we're using JSONL-only mode
+ if ($script:memoryFlushEnabled -and -not $script:memoryFlushed) {
+ $script:memoryFlushed = $true
+ Write-LogHost " [MEMORY] Memory management active - data written to JSONL only (streaming export at end)" -ForegroundColor Yellow
+ }
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Mark job as processed since we've already collected its output
+ [void]$script:processedJobIds.Add($job.Id)
+ } else {
+ }
+ } else {
+ }
+ } else {
+ }
+ }
+ }
+ }
+ } catch {
+ # Silently continue if we can't receive from a job
+ }
+ }
+
+ # Check if 60 seconds have passed since last status update (or first iteration)
+ $elapsedSinceLastUpdate = ((Get-Date) - $lastStatusUpdate).TotalSeconds
+ if ($firstStatus -or $elapsedSinceLastUpdate -ge 60) {
+ # Count partitions by status (EXCLUDE 'Subdivided' parent partitions)
+ $activeStatuses = @($script:partitionStatus.Values) | Where-Object { $_.Status -ne 'Subdivided' }
+ $completedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'Complete' }).Count
+ $jobCreatedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'JobCreated' }).Count
+ $notStartedPartitions = @($activeStatuses | Where-Object { $_.Status -eq 'NotStarted' }).Count
+ $failedCount = @($activeStatuses | Where-Object { $_.Status -eq 'Failed' }).Count
+
+ # Calculate remaining and total (only active partitions)
+ $remainingToComplete = $jobCreatedPartitions + $notStartedPartitions
+ $totalPartitions = @($activeStatuses).Count
+
+ $statusLine = "[$(Get-Date -Format 'HH:mm:ss')] Total Queries: $totalPartitions | Completed: $completedPartitions | Remaining: $remainingToComplete"
+ if ($failedCount -gt 0) {
+ $statusLine += " | Failed: $failedCount"
+ }
+
+ Write-LogHost $statusLine -ForegroundColor White
+
+ $lastStatusUpdate = Get-Date
+ $firstStatus = $false
+ }
+
+ # PROACTIVE TOKEN REFRESH (ALL auth modes): Refresh before expiration
+ # Uses SharedAuthState.ExpiresOn to determine when refresh is needed
+ # This prevents 401 errors during long-running job monitoring
+ # Thread jobs read from SharedAuthState, so they get the fresh token automatically
+ $refreshResult = Refresh-GraphTokenIfNeeded -BufferMinutes 5
+ # CRITICAL: Use -is [string] check to avoid PowerShell coercion bug where $true -eq 'Quit' returns True
+ if ($refreshResult -is [string] -and $refreshResult -eq 'Quit') {
+ # User chose to quit at auth prompt - save checkpoint and exit gracefully
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # REACTIVE AUTH CHECK: If 401 detected, handle token refresh
+ # Use AuthPromptInProgress as debounce to prevent multiple simultaneous auth prompts
+ if ($script:AuthFailureDetected -and -not $script:AuthPromptInProgress) {
+ $script:AuthPromptInProgress = $true # Set debounce flag
+ Write-LogHost ""
+ Write-LogHost " [AUTH] Authentication failure detected - pausing job monitoring" -ForegroundColor Red
+
+ # AppRegistration mode: Automatically refresh token (silent, no prompt)
+ # Interactive modes: Prompt user for re-authentication
+ if ($script:AuthConfig.Method -eq 'AppRegistration' -and $script:AuthConfig.CanReauthenticate) {
+ Write-LogHost " [AUTH] Attempting automatic token refresh for AppRegistration..." -ForegroundColor Yellow
+ $refreshResult = Invoke-TokenRefresh -Force
+
+ if (-not $refreshResult.Success -or -not $refreshResult.NewToken) {
+ Write-LogHost " [AUTH] Automatic token refresh failed: $($refreshResult.Message)" -ForegroundColor Red
+ Write-LogHost " [AUTH] Cannot continue without valid authentication." -ForegroundColor Red
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -Force
+ }
+ Write-LogHost " Exiting due to authentication failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+
+ # CRITICAL: Update $accessToken with fresh token for retry phase
+ $accessToken = $refreshResult.NewToken
+ Write-LogHost " [AUTH] Token refreshed successfully" -ForegroundColor Green
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false # Reset for next auth failure cycle
+ $script:AuthPromptInProgress = $false # Reset debounce flag
+ } else {
+ # Interactive mode - prompt user
+ $reauthResult = Invoke-TokenRefreshPrompt
+
+ if ($reauthResult -eq 'Quit') {
+ # User chose to quit - save checkpoint and exit gracefully
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -Force
+ }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+
+ # CRITICAL: Get fresh token after interactive reauth
+ $accessToken = Get-GraphAccessToken
+ if (-not $accessToken) {
+ Write-LogHost " [AUTH] FATAL: Could not obtain access token after re-authentication" -ForegroundColor Red
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -Force
+ }
+ Write-LogHost " Exiting due to token extraction failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ Write-LogHost " [AUTH] Fresh token obtained for retry phase" -ForegroundColor Green
+ # CRITICAL: Reset auth failure flags after successful interactive re-auth
+ # Without this, old 401 errors in job buffers re-trigger the auth prompt
+ $script:AuthFailureDetected = $false
+ $script:Auth401MessageShown = $false
+ $script:AuthPromptInProgress = $false # Reset debounce flag
+ }
+
+ # Re-authenticated successfully - token updated for retry phase
+ Write-LogHost " [AUTH] Resuming job monitoring with fresh token" -ForegroundColor Green
+
+ # FIX D: Drain all job buffers to clear old 401 errors
+ # Without this, old errors in buffers re-trigger auth detection on next loop iteration
+ foreach ($drainJob in $jobs) {
+ if ($drainJob.State -ne 'Completed' -and $drainJob.State -ne 'Failed') {
+ try {
+ # Drain WITHOUT -Keep to clear the buffers
+ $null = Receive-Job -Job $drainJob -ErrorAction SilentlyContinue
+ } catch { }
+ }
+ }
+ Write-LogHost " [AUTH] Drained job buffers to clear old error messages" -ForegroundColor DarkGray
+ }
+
+ Start-Sleep -Milliseconds 500
+ }
+
+ # Process completed jobs and collect results
+ $subdivisionOccurred = $false
+ # CRITICAL: Check error stream - jobs with State='Completed' but errors should be treated as failures
+ $completedNow = $jobs | Where-Object { $_.State -eq 'Completed' -and -not $script:processedJobIds.Contains($_.Id) -and $_.Error.Count -eq 0 }
+
+ foreach ($job in $completedNow) {
+ $res = $null
+ try {
+ # Receive all output from job (includes Write-Output debug messages)
+ $allOutput = Receive-Job -Job $job -ErrorAction Stop
+
+ # Filter output: PSCustomObject is the result, strings are debug messages
+ $debugMessages = $allOutput | Where-Object { $_ -is [string] }
+ $res = $allOutput | Where-Object { $_ -isnot [string] } | Select-Object -First 1
+
+ # Display [SENT] messages to terminal, log all debug messages to file
+ foreach ($debugMsg in $debugMessages) {
+ # Show [SENT] messages in terminal with color (with deduplication)
+ if ($debugMsg -match '^\[SENT\]') {
+ $msgKey = "$($job.Id):$debugMsg"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $debugMsg -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ if ($debugMsg -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $pt = $jobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].QueryId = $extractedQueryId
+ }
+ }
+ }
+
+ $debugTimestamp = Get-Date -Format 'yyyy-MM-dd HH:mm:ss'
+
+ if ($debugMsg -match '^\[GRAPH-(WARN|ERROR)\]' -or $debugMsg -like 'Graph API Query Body*' -or $debugMsg -like 'API Stored Query Details*' -or $debugMsg -match '^\[NETWORK\]' -or $debugMsg -match '^\[ERROR\]') {
+ Write-LogHost $debugMsg -ForegroundColor DarkGray
+ }
+ }
+ } catch {
+ Write-LogHost " ✗ Error receiving job: $($_.Exception.Message)" -ForegroundColor Red
+
+ # Mark as Failed for retry (job exceptions need explicit status update)
+ $pt = $jobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $_.Exception.Message
+ Write-LogHost " [RETRY-QUEUE] Partition $($pt.Index)/$($pt.Total) marked as Failed for retry" -ForegroundColor Yellow
+ }
+ # Mark job as processed to avoid duplicate processing
+ [void]$script:processedJobIds.Add($job.Id)
+ continue # Skip to next job - this one failed
+ }
+
+ $pt = $jobMeta[$job.Id]
+
+ if ($null -ne $res) {
+ # CHECK FOR SUBDIVISION SIGNAL: Job returned needs_subdivision status
+ if ($res.Status -eq 'needs_subdivision') {
+ Write-LogHost "[SUBDIVISION] Partition $($pt.Index)/$($pt.Total) requires subdivision - Preview count: $($res.PreviewCount)" -ForegroundColor Yellow
+
+ # Calculate subdivision time windows
+ $partitionSpan = $res.PartitionEnd - $res.PartitionStart
+ $partitionHours = $partitionSpan.TotalHours
+
+ # SMART SUBDIVISION: Analyze timestamp distribution in returned records
+ # to calculate optimal subdivision size instead of just dividing by 2
+ $subdivisionFactor = 2 # Default to half
+
+ if ($res.Logs -and $res.Logs.Count -eq 10000) {
+ try {
+ # Get timestamps from returned records
+ $timestamps = @()
+ foreach ($log in $res.Logs) {
+ if ($log.CreationTime) {
+ $ts = script:Parse-DateSafe $log.CreationTime
+ if ($ts) { $timestamps += $ts }
+ }
+ }
+
+ if ($timestamps.Count -gt 100) {
+ # Sort timestamps and find the last one (most recent in the 10k batch)
+ $sortedTimestamps = $timestamps | Sort-Object
+ $lastTimestamp = $sortedTimestamps[-1]
+ $firstTimestamp = $sortedTimestamps[0]
+
+ # Calculate how much of the partition timespan was covered by the 10k records
+ $coveredSpan = ($lastTimestamp - $firstTimestamp).TotalHours
+ $totalSpan = ($res.PartitionEnd - $res.PartitionStart).TotalHours
+
+ if ($coveredSpan -gt 0 -and $coveredSpan -lt $totalSpan) {
+ # Calculate records per hour for the covered span
+ $recordsPerHour = 10000 / $coveredSpan
+
+ # Calculate target hours to get ~8000 records (buffer below 10k)
+ $targetHours = 8000 / $recordsPerHour
+
+ # Calculate subdivision factor (but cap it to avoid tiny partitions)
+ $minSubdivisionHours = 0.001389 * 24 # 2 minutes
+ if ($targetHours -ge $minSubdivisionHours) {
+ $subdivisionFactor = [Math]::Max(2, [Math]::Ceiling($totalSpan / $targetHours))
+ Write-LogHost " [SMART SUBDIVISION] Analyzed 10k records: covered $([Math]::Round($coveredSpan,2))h of $([Math]::Round($totalSpan,2))h span" -ForegroundColor Cyan
+ Write-LogHost " [SMART SUBDIVISION] Estimated $([Math]::Round($recordsPerHour,0)) records/hour → dividing by $subdivisionFactor instead of 2" -ForegroundColor Cyan
+ }
+ }
+ }
+ } catch {
+ Write-LogHost " [SMART SUBDIVISION] Timestamp analysis failed, using default subdivision: $_" -ForegroundColor Yellow
+ }
+ }
+
+ # Calculate subdivision size
+ $subPartitionHours = $partitionHours / $subdivisionFactor
+ $subPartitionSpan = [TimeSpan]::FromHours($subPartitionHours)
+
+ # Create sub-partitions (divide evenly based on calculated factor)
+ $newSubPartitions = @()
+ for ($i = 0; $i -lt $subdivisionFactor; $i++) {
+ $subStart = $res.PartitionStart + ([TimeSpan]::FromHours($i * $subPartitionHours))
+ $subEnd = if ($i -eq ($subdivisionFactor - 1)) {
+ $res.PartitionEnd # Last partition goes to the end
+ } else {
+ $res.PartitionStart + ([TimeSpan]::FromHours(($i + 1) * $subPartitionHours))
+ }
+
+ $newSubPartitions += [PSCustomObject]@{
+ PStart = $subStart
+ PEnd = $subEnd
+ Index = $null
+ Total = $null
+ ParentIndex = $pt.Index
+ SubdivisionLevel = if ($pt.SubdivisionLevel) { $pt.SubdivisionLevel + 1 } else { 1 }
+ }
+ }
+
+ Write-LogHost " Creating $($newSubPartitions.Count) sub-partitions:" -ForegroundColor DarkYellow
+ foreach ($subPt in $newSubPartitions) {
+ $subHours = ($subPt.PEnd - $subPt.PStart).TotalHours
+ Write-LogHost " $($subPt.PStart.ToString('yyyy-MM-dd HH:mm')) to $($subPt.PEnd.ToString('yyyy-MM-dd HH:mm')) ($([Math]::Round($subHours,2))h)" -ForegroundColor DarkYellow
+ }
+
+ # Add sub-partitions to partitions array
+ $partitions += $newSubPartitions
+
+ # Save old status data before re-indexing (indexed by old Index values)
+ $oldStatusData = @{}
+ foreach ($key in $script:partitionStatus.Keys) {
+ $oldStatusData[$key] = $script:partitionStatus[$key]
+ }
+
+ # Re-index all partitions
+ $newTotal = $partitions.Count
+ for ($i = 0; $i -lt $partitions.Count; $i++) {
+ $partitions[$i].Index = $i + 1
+ $partitions[$i].Total = $newTotal
+ }
+
+ # Rebuild partition status dictionary with new indexes
+ $script:partitionStatus = @{}
+ foreach ($partition in $partitions) {
+ # Find if this partition had existing status data (by object reference or parent index)
+ $existingStatus = $null
+ foreach ($oldStatus in $oldStatusData.Values) {
+ if ([object]::ReferenceEquals($oldStatus.Partition, $partition)) {
+ $existingStatus = $oldStatus
+ break
+ }
+ }
+
+ if ($existingStatus) {
+ # Preserve existing status data but use new Index
+ $script:partitionStatus[$partition.Index] = @{
+ Partition = $partition
+ AttemptNumber = $existingStatus.AttemptNumber
+ QueryId = $existingStatus.QueryId
+ QueryName = $existingStatus.QueryName
+ Status = $existingStatus.Status
+ LastError = $existingStatus.LastError
+ RecordCount = $existingStatus.RecordCount
+ ParentPartition = $existingStatus.ParentPartition
+ SubdivisionReason = $existingStatus.SubdivisionReason
+ }
+ } else {
+ # New sub-partition - initialize fresh
+ $script:partitionStatus[$partition.Index] = @{
+ Partition = $partition
+ AttemptNumber = 0
+ QueryId = $null
+ QueryName = $null
+ Status = 'NotStarted'
+ LastError = $null
+ RecordCount = 0
+ ParentPartition = $pt.Index
+ }
+ }
+ }
+
+ # Mark parent partition as Subdivided (now using new Index from re-indexed partition)
+ $parentPartition = $partitions | Where-Object { [object]::ReferenceEquals($_, $pt) } | Select-Object -First 1
+ if ($parentPartition -and $script:partitionStatus.ContainsKey($parentPartition.Index)) {
+ $script:partitionStatus[$parentPartition.Index].Status = 'Subdivided'
+ $script:partitionStatus[$parentPartition.Index].RecordCount = 0
+ $script:partitionStatus[$parentPartition.Index].SubdivisionReason = "preview_count_$($res.PreviewCount)"
+ }
+
+ # Mark as processed to avoid duplicate handling
+ [void]$script:processedJobIds.Add($job.Id)
+
+ # Set flag to break out of inner loop and re-queue new sub-partitions
+ $subdivisionOccurred = $true
+ Write-LogHost " [SUBDIV-DEBUG] After creating sub-partitions: Total partitions=$($partitions.Count)" -ForegroundColor Magenta
+ break # Break from the job processing loop to re-start with new sub-partitions
+ }
+ elseif ($res.TokenExpired -eq $true) {
+ # FIX C: Token expired in job - mark for retry with fresh token
+ Write-LogHost "[TOKEN-RETRY] Partition $($pt.Index)/$($pt.Total) returned due to expired token - will retry with fresh token" -ForegroundColor Yellow
+
+ # Reset partition status to allow retry
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'NotStarted'
+ $script:partitionStatus[$pt.Index].LastError = 'Token expired - retry pending'
+ # Preserve QueryId if present for retry reuse
+ if ($res.QueryId) {
+ $script:partitionStatus[$pt.Index].QueryId = $res.QueryId
+ }
+ }
+
+ # Mark job as processed
+ [void]$script:processedJobIds.Add($job.Id)
+
+ # Set auth failure flag to trigger token refresh before retry
+ $script:AuthFailureDetected = $true
+ }
+ else {
+ # NORMAL COMPLETION: Not a subdivision - process as completed query
+ # Track whether data was actually added to collection
+ $dataAddedToCollection = $false
+ $recordsBeforeAdd = $allLogs.Count
+
+ # Add logs to collection (skip when memory flush enabled - data goes to JSONL only)
+ if ($res.Logs -and $res.Logs.Count -gt 0 -and -not $script:memoryFlushEnabled) {
+ foreach ($log in $res.Logs) {
+ [void]$allLogs.Add($log)
+ }
+ $dataAddedToCollection = ($allLogs.Count > $recordsBeforeAdd)
+ }
+
+ # Update partition status tracking - mark Complete if query succeeded (even with 0 records)
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $queryName = "PAX_Query_$($pt.PStart.ToString('yyyyMMdd_HHmm'))-$($pt.PEnd.ToString('yyyyMMdd_HHmm'))_Part$($pt.Index)/$($pt.Total)"
+ $script:partitionStatus[$pt.Index].QueryName = $queryName
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].QueryId = $res.QueryId
+ $script:partitionStatus[$pt.Index].RecordCount = $res.RetrievedCount
+
+ # CHECKPOINT: Save Completed state - this partition's data is now fully fetched
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $pt.Index -QueryId $res.QueryId -State 'Completed'
+ }
+
+ # INCREMENTAL SAVE: Write partition records to disk immediately (prevents data loss on auth failure)
+ if ($res.Logs -and $res.Logs.Count -gt 0) {
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($res.QueryId)_$($res.RetrievedCount)records.jsonl"
+ # Write as JSON Lines (NDJSON) - one record per line for recoverability
+ $res.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Partition $($pt.Index): $($res.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+ }
+
+ # Collect telemetry if present
+ if ($res.Telemetry) {
+ $script:telemetryData += $res.Telemetry
+ }
+
+ # Show throttle retry summary if any retries occurred
+ if ($res.Telemetry -and $res.Telemetry.ThrottleRetriesDuringCreation -and $res.Telemetry.ThrottleRetriesDuringCreation -gt 0) {
+ Write-LogHost " [!] Throttled during query creation: $($res.Telemetry.ThrottleRetriesDuringCreation) retry(s)" -ForegroundColor Yellow
+ }
+
+ try {
+ $script:metrics.QueryMs += [int]$res.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$res.RetrievedCount
+ # Count records by their actual Operation value, not by query group name
+ if ($res.Logs -and $res.Logs.Count -gt 0) {
+ foreach ($log in $res.Logs) {
+ # Handle both Operation (EOM format) and Operations (Graph API normalized format)
+ $actualOperation = if ($log.Operation) { $log.Operation } elseif ($log.Operations) { $log.Operations } else { $null }
+ if (-not [string]::IsNullOrWhiteSpace($actualOperation)) {
+ if (-not $script:metrics.Activities.ContainsKey($actualOperation)) {
+ $script:metrics.Activities[$actualOperation] = @{ Retrieved = 0; Structured = 0 }
+ }
+ $script:metrics.Activities[$actualOperation].Retrieved += 1
+ }
+ }
+ }
+ } catch {}
+ } # End of else block for normal completion
+
+
+ # Capture diagnostic output for any failed jobs so payload logging is persisted
+ $failedNow = $jobs | Where-Object { $_.State -eq 'Failed' -and (-not $script:processedJobIds.Contains($_.Id)) }
+ foreach ($job in $failedNow) {
+ $pt = $jobMeta[$job.Id]
+ try {
+ $failOutput = Receive-Job -Job $job -ErrorAction SilentlyContinue
+ }
+ catch {
+ $failOutput = $null
+ }
+
+ if ($failOutput) {
+ foreach ($msg in $failOutput) {
+ if ($msg -is [string]) {
+ $msgKey = "$($job.Id):$msg"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ $script:shownJobMessages[$msgKey] = $true
+ }
+
+ $debugTimestamp = Get-Date -Format 'yyyy-MM-dd HH:mm:ss'
+
+ if ($msg -match '^[\[]?(GRAPH-(WARN|ERROR)|ERROR|NETWORK|ATTEMPT|SENT)') {
+ Write-LogHost $msg -ForegroundColor DarkGray
+ }
+ elseif ($msg -like 'Graph API Query Body*' -or $msg -like 'API Stored Query Details*') {
+ Write-LogHost $msg -ForegroundColor DarkGray
+ }
+ else {
+ Write-LogHost $msg -ForegroundColor DarkGray
+ }
+ }
+ }
+ }
+
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = 'ThreadJob failed before completion'
+ }
+
+ [void]$script:processedJobIds.Add($job.Id)
+ }
+
+ # Handle jobs that completed with errors in error stream (State='Completed' but had Write-Error calls)
+ $completedWithErrors = $jobs | Where-Object { $_.State -eq 'Completed' -and (-not $script:processedJobIds.Contains($_.Id)) -and $_.Error.Count -gt 0 }
+ foreach ($job in $completedWithErrors) {
+ $pt = $jobMeta[$job.Id]
+ $errorMsg = ($job.Error | Select-Object -First 1).ToString()
+ Write-LogHost "[ERROR-STREAM] Partition $($pt.Index)/$($pt.Total) - Job completed but had error: $errorMsg" -ForegroundColor Red
+
+ # Capture diagnostic output from the job
+ try {
+ $errorOutput = Receive-Job -Job $job -ErrorAction SilentlyContinue
+ if ($errorOutput) {
+ foreach ($msg in $errorOutput) {
+ if ($msg -is [string]) {
+ $msgKey = "$($job.Id):$msg"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ $script:shownJobMessages[$msgKey] = $true
+ if ($msg -match '^[\[]?(GRAPH-(WARN|ERROR)|ERROR|NETWORK|ATTEMPT|SENT)') {
+ Write-LogHost $msg -ForegroundColor DarkGray
+ }
+ }
+ }
+ }
+ }
+ } catch {}
+
+ # Mark partition as Failed for retry
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $errorMsg
+ Write-LogHost " [RETRY-QUEUE] Partition $($pt.Index)/$($pt.Total) marked as Failed for retry due to error stream" -ForegroundColor Yellow
+ }
+
+ [void]$script:processedJobIds.Add($job.Id)
+ }
+ # Note: Logs already added to $allLogs earlier (moved above to validate data retrieval)
+
+ # Update progress - in parallel mode, each partition = 1 unit of progress
+ $script:progressState.Query.Current = [Math]::Min($script:progressState.Query.Current + 1, $script:progressState.Query.Total)
+ Update-Progress -Status ("Partition {0}/{1} complete" -f $pt.Index, $pt.Total)
+ $qc = $script:progressState.Query.Current
+ $qt = $script:progressState.Query.Total
+ Write-ProgressTick
+ }
+
+ # Mark job as processed to avoid duplicate processing
+ [void]$script:processedJobIds.Add($job.Id)
+ }
+
+ # If subdivision occurred, break to queue new sub-partitions
+ if ($subdivisionOccurred) {
+ Write-LogHost " Subdivision detected - queuing new sub-partitions..." -ForegroundColor Yellow
+ # Don't wait for all jobs to complete, loop back now to queue subdivided partitions
+ Write-LogHost " [SUBDIV-DEBUG] After break, outer loop should check for pending NotStarted partitions" -ForegroundColor Magenta
+ break
+ }
+
+ # All jobs completed - begin retry and reconciliation phase
+ $finalStates = $jobs | Group-Object -Property State | ForEach-Object { "$($_.Name):$($_.Count)" }
+ Write-LogHost " Initial job execution complete. States: $($finalStates -join ', ')" -ForegroundColor DarkCyan
+
+ # Mark any completed jobs as Complete if status wasn't already updated
+ # (This handles jobs where result object wasn't captured but job completed successfully)
+ # CRITICAL: Check for job errors first - ThreadJobs that throw exceptions still end up with State='Completed'
+ $completedJobs = $jobs | Where-Object { $_.State -eq 'Completed' }
+ foreach ($completedJob in $completedJobs) {
+ $pt = $jobMeta[$completedJob.Id]
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ # Check if job produced errors (401, network failures, thrown exceptions)
+ $jobErrors = @()
+ try { $jobErrors = @($completedJob.ChildJobs | ForEach-Object { $_.Error } | Where-Object { $_ }) } catch {}
+
+ if ($jobErrors.Count -gt 0 -and $script:partitionStatus[$pt.Index].Status -notin @('Complete', 'Subdivided')) {
+ # Job completed WITH errors - mark as Failed for retry
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $jobErrors[0].ToString()
+ Write-LogHost " [ERROR-CHECK] Partition $($pt.Index) job completed with errors - marked Failed for retry" -ForegroundColor Yellow
+ }
+ elseif ($script:partitionStatus[$pt.Index].Status -in @('NotStarted','JobCreated')) {
+ # No errors and not already processed - mark as Complete
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ }
+ }
+ }
+
+ # Track failed jobs and update status
+ $failedJobs = $jobs | Where-Object { $_.State -eq 'Failed' }
+ foreach ($failedJob in $failedJobs) {
+ $pt = $jobMeta[$failedJob.Id]
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ try {
+ $errorOutput = Receive-Job -Job $failedJob -ErrorAction SilentlyContinue 2>&1
+ if ($errorOutput) {
+ $lastErrorText = ($errorOutput | Out-String).Trim()
+ $script:partitionStatus[$pt.Index].LastError = $lastErrorText
+
+ # Log the actual error for diagnostics (useful level)
+ Write-LogHost " Partition $($pt.Index) error: $lastErrorText" -ForegroundColor DarkYellow
+
+ # 401/Unauthorized failures don't count against retry limit (auth issue, not partition issue)
+ # This ensures partitions can get full retry attempts after re-authentication
+ if ($lastErrorText -match '401|Unauthorized') {
+ Write-LogHost " [AUTH] 401 error for Partition $($pt.Index) - will not count against retry limit" -ForegroundColor Yellow
+ }
+ }
+ } catch {
+ $script:partitionStatus[$pt.Index].LastError = $_.Exception.Message
+ }
+ }
+ } # RETRY LOGIC: Up to 4 additional attempts (5 total)
+ $maxAttempts = 5
+ $retryPass = 1
+
+ while ($retryPass -lt $maxAttempts) {
+ # Find partitions that need retry (Status = Failed or NotStarted only)
+ $partitionsToRetry = @()
+ foreach ($idx in $script:partitionStatus.Keys) {
+ $status = $script:partitionStatus[$idx]
+ # Only retry actual failures, not queries that completed with 0 records
+ if ($status.Status -in @('Failed', 'NotStarted')) {
+ if ($status.AttemptNumber -lt $maxAttempts) {
+ $partitionsToRetry += $status.Partition
+ }
+ }
+ }
+
+ if ($partitionsToRetry.Count -eq 0) {
+ Write-LogHost " No partitions require retry" -ForegroundColor Green
+ break
+ }
+
+ $retryPass++
+ Write-LogHost " [RETRY] Pass $retryPass/$maxAttempts - $($partitionsToRetry.Count) partition(s) need retry" -ForegroundColor Yellow
+
+ # Cooldown before retry
+ $cooldownSeconds = Get-Random -Minimum 30 -Maximum 60
+ Write-LogHost " Waiting $cooldownSeconds seconds before retry..." -ForegroundColor Gray
+ Start-Sleep -Seconds $cooldownSeconds
+
+ # CRITICAL: Refresh access token before partition retry
+ # For AppRegistration mode, force re-authentication to get fresh token
+ # For other modes, use Get-GraphAccessToken helper (HTTP primary)
+ try {
+ $tokenObtained = $false
+
+ # Check if we can use forced re-authentication (AppRegistration only)
+ if ($script:AuthConfig.CanReauthenticate -and $script:AuthConfig.Method -eq 'AppRegistration') {
+ $refreshResult = Invoke-TokenRefresh -Force
+ if ($refreshResult.Success -and $refreshResult.NewToken) {
+ $accessToken = $refreshResult.NewToken
+ $tokenObtained = $true
+ Write-LogHost " [TOKEN] Access token refreshed via re-authentication" -ForegroundColor Cyan
+ } else {
+ Write-LogHost " [TOKEN] Re-authentication failed: $($refreshResult.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # If AppRegistration refresh failed or using interactive auth, use helper
+ if (-not $tokenObtained) {
+ $newToken = Get-GraphAccessToken
+ if ($newToken) {
+ if ($newToken -ne $accessToken) {
+ $accessToken = $newToken
+ $tokenObtained = $true
+ Write-LogHost " [TOKEN] Fresh token obtained for retry phase" -ForegroundColor Cyan
+ } else {
+ # Token is the same - that's OK if we just re-authenticated
+ $tokenObtained = $true
+ Write-LogHost " [TOKEN] Token validated (unchanged but valid)" -ForegroundColor Gray
+ }
+ }
+ }
+
+ # CRITICAL: Block retry if we can't get a valid token
+ if (-not $tokenObtained) {
+ Write-LogHost " [TOKEN] FATAL: Cannot obtain valid access token for retry" -ForegroundColor Red
+ Write-LogHost " [TOKEN] This typically means the session has expired and requires re-authentication" -ForegroundColor Yellow
+
+ # For interactive modes, prompt user to re-authenticate
+ if ($script:AuthConfig.Method -in @('WebLogin', 'ExistingSession')) {
+ Write-LogHost " [TOKEN] Prompting for re-authentication..." -ForegroundColor Yellow
+ $reauthResult = Invoke-TokenRefreshPrompt
+
+ if ($reauthResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to user request. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+
+ # Try getting token again after reauth
+ $accessToken = Get-GraphAccessToken
+ if (-not $accessToken) {
+ Write-LogHost " [TOKEN] FATAL: Still cannot obtain token after re-authentication" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to token extraction failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ Write-LogHost " [TOKEN] Fresh token obtained after re-authentication" -ForegroundColor Green
+ } else {
+ # AppRegistration mode with no token - fatal error
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to authentication failure. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+ }
+ } catch {
+ Write-LogHost " [TOKEN] Error during token refresh: $($_.Exception.Message)" -ForegroundColor Red
+ # Don't proceed with potentially expired token
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Write-LogHost " Exiting due to token error. Use -Resume to continue later." -ForegroundColor Yellow
+ return
+ }
+
+ # Create retry jobs
+ $retryJobs = @()
+ $retryJobMeta = @{}
+
+ foreach ($pt in $partitionsToRetry) {
+ $script:partitionStatus[$pt.Index].AttemptNumber++
+ $script:partitionStatus[$pt.Index].Status = 'NotStarted' # Reset for retry
+
+ Write-LogHost " [RETRY] Attempt $($script:partitionStatus[$pt.Index].AttemptNumber)/$maxAttempts for Partition $($pt.Index)/$($pt.Total)" -ForegroundColor Yellow
+
+ # Re-create the job using the same scriptblock
+ # CRITICAL: Use $graphResultSize (0 for Graph API = unlimited) instead of $ResultSize (10000)
+ # Pass existing QueryId if available (for retry after 403 fetch failure)
+ $existingQueryId = $script:partitionStatus[$pt.Index].QueryId
+ if ($existingQueryId) {
+ Write-LogHost " [REUSE] Reusing existing QueryId: $existingQueryId" -ForegroundColor Cyan
+ }
+ $job = Start-ThreadJob -ThrottleLimit $maxConcurrentPartitions -ScriptBlock $queryJobScriptBlock -ArgumentList $pt.PStart, $pt.PEnd, $activities, $graphResultSize, $UserIds, $pt.Index, $pt.Total, $script:SharedAuthState, $pt, $MaxNetworkOutageMinutes, $script:GraphAuditApiVersion, $script:LogFile, $existingQueryId
+ $retryJobs += $job
+ $retryJobMeta[$job.Id] = $pt
+ }
+
+ while (($retryJobs | Where-Object { $_.State -in 'Running','NotStarted' }).Count -gt 0) {
+ foreach ($job in $retryJobs) {
+ try {
+ $jobOutput = Receive-Job -Job $job -ErrorAction SilentlyContinue
+ if ($jobOutput) {
+ foreach ($output in $jobOutput) {
+ if ($output -is [string] -and $output -match '^\[SENT\]') {
+ $msgKey = "$($job.Id):$output"
+ if (-not $script:shownJobMessages.ContainsKey($msgKey)) {
+ Write-LogHost $output -ForegroundColor DarkGray
+ $script:shownJobMessages[$msgKey] = $true
+ }
+ # CRITICAL: Extract and store QueryId from SENT message for retry reuse
+ if ($output -match 'QueryId:\s*([a-f0-9-]+)\)') {
+ $extractedQueryId = $matches[1]
+ $pt = $retryJobMeta[$job.Id]
+ if ($pt -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].QueryId = $extractedQueryId
+ }
+ }
+ }
+ }
+ }
+ } catch {}
+ }
+ Start-Sleep -Milliseconds 500
+ }
+
+ # Check for retry jobs that completed but have errors in error stream
+ # These need to be marked Failed so they get re-queued (mirrors initial pass logic)
+ foreach ($job in $retryJobs) {
+ if ($job.State -eq 'Completed' -and $job.Error.Count -gt 0) {
+ $pt = $retryJobMeta[$job.Id]
+ $errorText = ($job.Error | Out-String).Trim()
+ Write-LogHost "[ERROR-STREAM] Retry Partition $($pt.Index)/$($pt.Total) - Job completed but had error: $errorText" -ForegroundColor Yellow
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $errorText
+ Write-LogHost " [RETRY-QUEUE] Partition $($pt.Index)/$($pt.Total) marked as Failed for next retry pass due to error stream" -ForegroundColor Yellow
+ }
+ }
+ }
+
+ # Process retry job results
+ foreach ($job in $retryJobs) {
+ $pt = $retryJobMeta[$job.Id]
+
+ # Skip jobs already marked Failed by error-stream check above
+ if ($script:partitionStatus.ContainsKey($pt.Index) -and $script:partitionStatus[$pt.Index].Status -eq 'Failed') {
+ continue
+ }
+
+ if ($job.State -eq 'Completed') {
+ try {
+ $allOutput = Receive-Job -Job $job -ErrorAction Stop
+ $res = $allOutput | Where-Object { $_ -isnot [string] } | Select-Object -First 1
+
+ if ($null -ne $res -and $script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].QueryId = $res.QueryId
+ $script:partitionStatus[$pt.Index].QueryName = "PAX_Query_$($pt.PStart.ToString('yyyyMMdd_HHmm'))-$($pt.PEnd.ToString('yyyyMMdd_HHmm'))_Part$($pt.Index)/$($pt.Total)"
+ $script:partitionStatus[$pt.Index].RecordCount = $res.RetrievedCount
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+
+ Write-LogHost " Retry successful for Partition $($pt.Index)/$($pt.Total): $($res.RetrievedCount) records" -ForegroundColor Green # Add to allLogs
+ if ($res.Logs -and $res.Logs.Count -gt 0) {
+ foreach ($log in $res.Logs) {
+ [void]$allLogs.Add($log)
+ }
+
+ # INCREMENTAL SAVE: Write retry partition records to disk immediately (prevents data loss on auth failure)
+ # BUG FIX: This was missing from retry path, causing data loss on subsequent failures
+ try {
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) {
+ New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null
+ }
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-$($res.QueryId)_$($res.RetrievedCount)records.jsonl"
+ # Write as JSON Lines (NDJSON) - one record per line for recoverability
+ $res.Logs | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+ Write-LogHost " [SAVE] Retry Partition $($pt.Index): $($res.RetrievedCount) records saved to disk" -ForegroundColor DarkGreen
+ } catch {
+ Write-LogHost " [WARN] Failed to save incremental data for Retry Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Update checkpoint for retry success
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $pt.Index -QueryId $res.QueryId -State 'Completed' -RecordCount $res.RetrievedCount
+ }
+ }
+ } catch {
+ Write-LogHost " ✗ Retry failed for Partition $($pt.Index)/$($pt.Total): $($_.Exception.Message)" -ForegroundColor Red
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $_.Exception.Message
+
+ # 401/Unauthorized failures don't count against retry limit (auth issue, not partition issue)
+ if ($_.Exception.Message -match '401|Unauthorized') {
+ $script:partitionStatus[$pt.Index].AttemptNumber--
+ Write-LogHost " [AUTH] 401 error detected - retry attempt not counted (auth issue, not partition issue)" -ForegroundColor Yellow
+ }
+ }
+ }
+ } elseif ($job.State -eq 'Failed') {
+ Write-LogHost " ✗ Retry job failed for Partition $($pt.Index)/$($pt.Total)" -ForegroundColor Red
+ if ($script:partitionStatus.ContainsKey($pt.Index)) {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ try {
+ $errorOutput = Receive-Job -Job $job -ErrorAction SilentlyContinue 2>&1
+ if ($errorOutput) {
+ $lastErrorText = ($errorOutput | Out-String).Trim()
+ $script:partitionStatus[$pt.Index].LastError = $lastErrorText
+
+ # Log the actual error for diagnostics (useful level)
+ Write-LogHost " Error details: $lastErrorText" -ForegroundColor DarkYellow
+
+ # 401/Unauthorized failures don't count against retry limit (auth issue, not partition issue)
+ if ($lastErrorText -match '401|Unauthorized') {
+ $script:partitionStatus[$pt.Index].AttemptNumber--
+ Write-LogHost " [AUTH] 401 error detected - retry attempt not counted (auth issue, not partition issue)" -ForegroundColor Yellow
+ }
+ }
+ } catch {}
+ }
+ }
+ }
+
+ # Cleanup retry jobs
+ Remove-Job -Job $retryJobs -Force -ErrorAction SilentlyContinue | Out-Null
+ }
+
+ # FINAL SUMMARY AND RECONCILIATION
+ Write-LogHost "" -ForegroundColor White
+ Write-LogHost "═══════════════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost " QUERY SUBMISSION SUMMARY" -ForegroundColor Cyan
+ Write-LogHost "═══════════════════════════════════════════════════════════════" -ForegroundColor Cyan
+
+ $totalPartitions = $script:partitionStatus.Count
+ $completedPartitions = @($script:partitionStatus.Values | Where-Object { $_.Status -eq 'Complete' })
+ $sentButIncomplete = @($script:partitionStatus.Values | Where-Object { $_.Status -eq 'Sent' -and $_.QueryId })
+ $neverSent = @($script:partitionStatus.Values | Where-Object { $_.Status -in @('Failed', 'NotStarted') -and -not $_.QueryId })
+
+ Write-LogHost " Total Partitions: $totalPartitions" -ForegroundColor White
+ Write-LogHost " Sent and Complete: $($completedPartitions.Count)" -ForegroundColor Green
+
+ if ($sentButIncomplete.Count -gt 0) {
+ Write-LogHost " [!] Sent but Incomplete: $($sentButIncomplete.Count)" -ForegroundColor Yellow
+ foreach ($status in $sentButIncomplete) {
+ $pt = $status.Partition
+ Write-LogHost " - Partition $($pt.Index)/$($pt.Total): QueryName=$($status.QueryName), QueryId=$($status.QueryId)" -ForegroundColor Yellow
+ }
+ }
+
+ if ($neverSent.Count -gt 0) {
+ Write-LogHost " ✗ Never Sent: $($neverSent.Count)" -ForegroundColor Red
+ foreach ($status in $neverSent) {
+ $pt = $status.Partition
+ $errorMsg = if ($status.LastError) { " - Error: $($status.LastError)" } else { "" }
+ Write-LogHost " - Partition $($pt.Index)/$($pt.Total): QueryName=$($status.QueryName)$errorMsg" -ForegroundColor Red
+ }
+ }
+
+ # Check for any missing/skipped partitions (exclude intentionally skipped partitions from resume mode)
+ $expectedPartitions = 1..$totalPartitions
+ $attemptedPartitions = @($script:partitionStatus.Keys)
+
+ # In resume mode, use the originally-skipped partition indices (captured at start of run)
+ # This avoids confusion where partitions completed during THIS run appear as "previously completed"
+ $intentionallySkipped = @()
+ if ($script:IsResumeMode -and $script:OriginallySkippedPartitionIndices) {
+ $intentionallySkipped = $script:OriginallySkippedPartitionIndices
+ }
+
+ $missingPartitions = $expectedPartitions | Where-Object { $_ -notin $attemptedPartitions -and $_ -notin $intentionallySkipped }
+
+ if ($missingPartitions.Count -gt 0) {
+ Write-LogHost " [!] MISSING/SKIPPED PARTITIONS: $($missingPartitions.Count)" -ForegroundColor Red
+ Write-LogHost " Partitions: $($missingPartitions -join ', ')" -ForegroundColor Red
+ }
+
+ # Show info about intentionally skipped partitions (resume mode)
+ if ($intentionallySkipped.Count -gt 0) {
+ Write-LogHost " Previously completed partitions (from checkpoint): $($intentionallySkipped -join ', ')" -ForegroundColor Green
+ }
+
+ Write-LogHost "═══════════════════════════════════════════════════════════════" -ForegroundColor Cyan
+ Write-LogHost "" -ForegroundColor White
+
+ # Continue with remaining processing if we have any completed partitions
+ if ($completedPartitions.Count -eq 0) {
+ Write-LogHost " ✗ ERROR: No partitions completed successfully. Cannot continue." -ForegroundColor Red
+ throw "All partitions failed - no data retrieved"
+ }
+
+ Write-LogHost " Continuing with $($completedPartitions.Count) successful partition(s)..." -ForegroundColor Green
+
+ # Process any remaining completed jobs that weren't caught in the loop
+ # CRITICAL: Skip jobs with errors - they should have been handled by retry logic
+ $remainingCompleted = $jobs | Where-Object { $_.State -eq 'Completed' -and -not $script:processedJobIds.Contains($_.Id) -and $_.Error.Count -eq 0 }
+ foreach ($job in $remainingCompleted) {
+ $res = $null
+ try {
+ $res = Receive-Job -Job $job -ErrorAction Stop
+ } catch {
+ Write-LogHost " ERROR receiving job: $($_.Exception.Message)" -ForegroundColor Red
+ }
+
+ $pt = $jobMeta[$job.Id] # Skip if job metadata not found (shouldn't happen but defensive)
+ if ($null -eq $pt) {
+ Write-LogHost " WARNING: Job metadata not found for job ID $($job.Id) - skipping" -ForegroundColor Yellow
+ [void]$script:processedJobIds.Add($job.Id)
+ continue
+ }
+
+ if ($null -ne $res) {
+ Write-LogHost " Partition $($pt.Index)/$($pt.Total) complete: Retrieved $($res.RetrievedCount) records in $($res.ElapsedMs)ms" -ForegroundColor Cyan
+ Write-LogHost " Query: $($pt.PStart.ToString('yyyy-MM-dd HH:mm')) to $($pt.PEnd.ToString('yyyy-MM-dd HH:mm')) UTC" -ForegroundColor DarkGray
+ try {
+ $script:metrics.QueryMs += [int]$res.ElapsedMs
+ $script:metrics.TotalRecordsFetched += [int]$res.RetrievedCount
+ # Count records by their actual Operation value, not by query group name
+ if ($res.Logs -and $res.Logs.Count -gt 0) {
+ foreach ($log in $res.Logs) {
+ # Handle both Operation (EOM format) and Operations (Graph API normalized format)
+ $actualOperation = if ($log.Operation) { $log.Operation } elseif ($log.Operations) { $log.Operations } else { $null }
+ if (-not [string]::IsNullOrWhiteSpace($actualOperation)) {
+ if (-not $script:metrics.Activities.ContainsKey($actualOperation)) {
+ $script:metrics.Activities[$actualOperation] = @{ Retrieved = 0; Structured = 0 }
+ }
+ $script:metrics.Activities[$actualOperation].Retrieved += 1
+ }
+ }
+ }
+ } catch {}
+
+ if ($res.Logs -and $res.Logs.Count -gt 0) {
+ foreach ($log in $res.Logs) {
+ [void]$allLogs.Add($log)
+ }
+ }
+
+ # Update progress - in parallel mode, each partition = 1 unit of progress
+ $script:progressState.Query.Current = [Math]::Min($script:progressState.Query.Current + 1, $script:progressState.Query.Total)
+ Update-Progress -Status ("Partition {0}/{1} complete (retry)" -f $pt.Index, $pt.Total)
+ $qc = $script:progressState.Query.Current
+ $qt = $script:progressState.Query.Total
+ Write-ProgressTick
+ }
+
+ [void]$script:processedJobIds.Add($job.Id)
+ }
+
+ # Cleanup
+ Remove-Job -Job $jobs -Force -ErrorAction SilentlyContinue | Out-Null
+
+ # Normalize progress if needed
+ if ($script:progressState.Query.Current -lt $script:progressState.Query.Total -and $script:progressState.Query.Total -le 200) {
+ $script:progressState.Query.Current = $script:progressState.Query.Total
+ Update-Progress -Status 'Parallel partitions complete (normalized)'
+ }
+ }
+ }
+ catch {
+ Write-LogHost " Graph API parallel execution error: $($_.Exception.Message)" -ForegroundColor Red
+ Write-LogHost " Error details: $($_.ScriptStackTrace)" -ForegroundColor Red
+ throw
+ }
+ } # End of while (-not $allPartitionsProcessed) loop - subdivision pass complete
+
+ # Show accurate completion status (only for parallel mode - sequential has its own summary)
+ if ($canParallel -and $script:partitionStatus) {
+ $successCount = @($script:partitionStatus.Values | Where-Object { $_.Status -eq 'Complete' }).Count
+ $failedCount = $partitions.Count - $successCount
+ if ($failedCount -eq 0) {
+ Write-LogHost " All $successCount partitions completed" -ForegroundColor Green
+ } else {
+ Write-LogHost " [!] $successCount/$($partitions.Count) partitions completed ($failedCount failed)" -ForegroundColor Yellow
+ }
+ }
+ }
+ # Sequential fallback: Process partitions one-by-one when parallel execution is not available
+ # This handles: EOM mode (-UseEOM), PS 5.1, or when ThreadJob module is unavailable
+ if (-not $canParallel) {
+ $sequentialGroups++
+ Write-LogHost " Processing $($partitions.Count) partitions sequentially..." -ForegroundColor DarkCyan
+ foreach ($pt in $partitions) {
+ $tq0 = Get-Date
+ if (-not $UseEOM) { Write-LogHost " Querying partition $($pt.Index)/$($pt.Total) sequentially" -ForegroundColor DarkCyan }
+ $logs = Invoke-ActivityTimeWindowProcessing -ActivityType $pt.Activity -StartDate $pt.PStart -EndDate $pt.PEnd -PartitionIndex $pt.Index -TotalPartitions $pt.Total -UseEOMMode $UseEOM
+ $tq1 = Get-Date
+ try {
+ $ms = [int]($tq1 - $tq0).TotalMilliseconds
+ $script:metrics.QueryMs += $ms
+ if ($logs) {
+ # Count records by their actual Operation value, not by query group name
+ $logArray = if ($logs -is [Array]) { $logs } else { @($logs) }
+ foreach ($log in $logArray) {
+ # Handle both Operation (EOM format) and Operations (Graph API normalized format)
+ $actualOperation = if ($log.Operation) { $log.Operation } elseif ($log.Operations) { $log.Operations } else { $null }
+ if (-not [string]::IsNullOrWhiteSpace($actualOperation)) {
+ if (-not $script:metrics.Activities.ContainsKey($actualOperation)) {
+ $script:metrics.Activities[$actualOperation] = @{ Retrieved = 0; Structured = 0 }
+ }
+ $script:metrics.Activities[$actualOperation].Retrieved += 1
+ }
+ }
+ $script:metrics.TotalRecordsFetched += $logArray.Count
+ # Add to collection
+ foreach ($item in $logArray) { [void]$allLogs.Add($item) }
+ }
+ # Explicit progress tick per sequential partition
+ $script:progressState.Query.Current = [Math]::Min($script:progressState.Query.Current + 1, $script:progressState.Query.Total)
+ Write-ProgressTick
+ } catch {
+ Write-LogHost " Warning: Error processing partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+ Write-LogHost " Sequential processing complete: $($allLogs.Count) records retrieved" -ForegroundColor Green
+ }
+ }
+ $script:CurrentServiceFilter = $null
+
+ # ============================================================================
+ # FINAL SAFETY NET: Ensure ALL partitions were completed before export phase
+ # This catches any partitions that slipped through all retry mechanisms
+ # ============================================================================
+ if ($script:partitionStatus -and $script:partitionStatus.Count -gt 0 -and -not $UseEOM) {
+ # Find partitions not in terminal success states
+ $incompletePartitions = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($incompletePartitions.Count -gt 0) {
+ Write-LogHost "" -ForegroundColor Yellow
+ Write-LogHost "============================================================" -ForegroundColor Yellow
+ Write-LogHost "[FINAL-RECONCILE] $($incompletePartitions.Count) partition(s) incomplete - initiating final recovery" -ForegroundColor Yellow
+ Write-LogHost "============================================================" -ForegroundColor Yellow
+
+ # List each incomplete partition
+ foreach ($incomplete in $incompletePartitions) {
+ $lastErr = if ($incomplete.LastError) { " (Last error: $($incomplete.LastError.Substring(0, [Math]::Min(80, $incomplete.LastError.Length)))...)" } else { "" }
+ Write-LogHost " Partition $($incomplete.Index): Status=$($incomplete.Status)$lastErr" -ForegroundColor DarkYellow
+ }
+
+ $maxFinalAttempts = 5
+ $finalAttempt = 0
+ $recoveredCount = 0
+
+ while ($finalAttempt -lt $maxFinalAttempts) {
+ # Re-check which partitions still need recovery
+ $stillIncomplete = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($stillIncomplete.Count -eq 0) {
+ Write-LogHost "[FINAL-RECONCILE] All partitions recovered successfully!" -ForegroundColor Green
+ break
+ }
+
+ $finalAttempt++
+ Write-LogHost "[FINAL-RECONCILE] Attempt $finalAttempt/$maxFinalAttempts - $($stillIncomplete.Count) partition(s) remaining" -ForegroundColor Yellow
+
+ # Refresh token before retry (critical for long-running exports)
+ $refreshResult = Refresh-GraphTokenIfNeeded -BufferMinutes 5
+ if ($refreshResult -is [string] -and $refreshResult -eq 'Quit') {
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # Get fresh token for recovery jobs
+ $recoveryToken = Get-GraphAccessToken
+ if (-not $recoveryToken) {
+ Write-LogHost "[FINAL-RECONCILE] Cannot obtain access token - saving checkpoint and exiting" -ForegroundColor Red
+ if ($script:CheckpointEnabled) { Save-Checkpoint -Force }
+ Show-CheckpointExitMessage
+ exit 0
+ }
+
+ # Brief cooldown between attempts
+ if ($finalAttempt -gt 1) {
+ $cooldown = Get-Random -Minimum 15 -Maximum 30
+ Write-LogHost " Waiting $cooldown seconds before retry..." -ForegroundColor Gray
+ Start-Sleep -Seconds $cooldown
+ }
+
+ # Process each incomplete partition sequentially (safer for final recovery)
+ foreach ($incomplete in $stillIncomplete) {
+ $pt = $incomplete.Partition
+ if (-not $pt) {
+ Write-LogHost " [WARN] Partition $($incomplete.Index) has no partition object - cannot recover" -ForegroundColor Red
+ continue
+ }
+
+ Write-LogHost " [RECOVER] Retrying Partition $($pt.Index)/$($pt.Total)..." -ForegroundColor Cyan
+
+ try {
+ # Reset status for retry
+ $script:partitionStatus[$pt.Index].Status = 'NotStarted'
+ $script:partitionStatus[$pt.Index].AttemptNumber++
+
+ # Use sequential processing for recovery (Invoke-ActivityTimeWindowProcessing)
+ $logs = Invoke-ActivityTimeWindowProcessing -ActivityType $pt.Activity -StartDate $pt.PStart -EndDate $pt.PEnd -PartitionIndex $pt.Index -TotalPartitions $pt.Total -UseEOMMode $false
+
+ if ($logs) {
+ $logArray = if ($logs -is [Array]) { $logs } else { @($logs) }
+
+ # Add to $allLogs
+ foreach ($item in $logArray) { [void]$allLogs.Add($item) }
+
+ # Save to JSONL for streaming export
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (-not (Test-Path $incrementalDir)) { New-Item -ItemType Directory -Path $incrementalDir -Force | Out-Null }
+ $incrementalFile = Join-Path $incrementalDir "Part$($pt.Index)_${global:ScriptRunTimestamp}_qid-recovery_$($logArray.Count)records.jsonl"
+ $logArray | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Out-File -FilePath $incrementalFile -Encoding utf8 -Force
+
+ # Mark complete
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].RecordCount = $logArray.Count
+ $recoveredCount++
+
+ Write-LogHost " [RECOVERED] Partition $($pt.Index): $($logArray.Count) records" -ForegroundColor Green
+
+ # Update metrics
+ $script:metrics.TotalRecordsFetched += $logArray.Count
+ } else {
+ # Zero records is valid - mark complete
+ $script:partitionStatus[$pt.Index].Status = 'Complete'
+ $script:partitionStatus[$pt.Index].RecordCount = 0
+ $recoveredCount++
+ Write-LogHost " [RECOVERED] Partition $($pt.Index): 0 records (empty time window)" -ForegroundColor Green
+ }
+
+ # Save checkpoint after each successful recovery
+ if ($script:CheckpointEnabled) {
+ Save-Checkpoint -PartitionIndex $pt.Index -State 'Completed' -RecordCount ($logArray.Count)
+ }
+ } catch {
+ $script:partitionStatus[$pt.Index].Status = 'Failed'
+ $script:partitionStatus[$pt.Index].LastError = $_.Exception.Message
+ Write-LogHost " [FAILED] Partition $($pt.Index): $($_.Exception.Message)" -ForegroundColor Red
+ }
+ }
+ }
+
+ # Final status report
+ $finalIncomplete = @($script:partitionStatus.Values | Where-Object {
+ $_.Status -notin @('Complete', 'Subdivided')
+ })
+
+ if ($finalIncomplete.Count -gt 0) {
+ Write-LogHost "" -ForegroundColor Red
+ Write-LogHost "============================================================" -ForegroundColor Red
+ Write-LogHost "[FINAL-RECONCILE] WARNING: $($finalIncomplete.Count) partition(s) could not be recovered after $maxFinalAttempts attempts" -ForegroundColor Red
+ Write-LogHost "============================================================" -ForegroundColor Red
+ foreach ($failed in $finalIncomplete) {
+ Write-LogHost " Partition $($failed.Index): $($failed.LastError)" -ForegroundColor Red
+ }
+ Write-LogHost " These partitions will be missing from the export. Use -Resume to retry later." -ForegroundColor Yellow
+ } else {
+ Write-LogHost "[FINAL-RECONCILE] Complete: Recovered $recoveredCount partition(s) via final safety net" -ForegroundColor Green
+ }
+ Write-LogHost "" -ForegroundColor White
+ }
+ }
+
+ # MEMORY FLUSH MODE: If we flushed $allLogs during fetch, enable streaming merge from JSONL files
+ # This path is only for non-explosion mode (explosion is excluded from memory flush feature)
+ if ($script:memoryFlushed) {
+ Write-LogHost " [MEMORY] Memory flush occurred during fetch - enabling streaming export from JSONL files" -ForegroundColor Yellow
+ $script:UseStreamingMergeForExport = $true
+ $script:StreamingMergeDirectory = Split-Path $script:PartialOutputPath -Parent
+
+ # Get all completed partition indices from this run for streaming merge
+ # Note: partitionStatus values are hashtables with a 'Partition' key containing the partition object;
+ # the Index property lives on the partition object, not on the status hashtable itself
+ $completedPartitions = @($script:partitionStatus.Values | Where-Object { $_.Status -eq 'Complete' } | ForEach-Object { $_.Partition.Index })
+ $script:StreamingMergePartitions = $completedPartitions
+
+ # Count records from JSONL files for metrics
+ $incrementalDir = Join-Path $script:StreamingMergeDirectory ".pax_incremental"
+ $estimatedFromJSONL = 0
+ if (Test-Path $incrementalDir) {
+ $jsonlFiles = Get-ChildItem -Path $incrementalDir -Filter "*${global:ScriptRunTimestamp}*.jsonl" -ErrorAction SilentlyContinue
+ foreach ($f in $jsonlFiles) {
+ if ($f.Name -match '_(\d+)records\.jsonl$') {
+ $estimatedFromJSONL += [int]$Matches[1]
+ }
+ }
+ }
+ $script:StreamingMergeRecordCount = $estimatedFromJSONL
+ Write-LogHost " [MEMORY] Found $($estimatedFromJSONL.ToString('N0')) records across $($completedPartitions.Count) partitions for streaming export" -ForegroundColor DarkCyan
+ }
+
+ # MERGE INCREMENTAL SAVES: Only needed for Resume mode to recover data from skipped partitions
+ # Fresh runs already have all records in $allLogs (added when each partition completed)
+ # In resume mode, only merge data for partitions that were SKIPPED (already completed before this run)
+ # Partitions completed during THIS run already have their data in $allLogs via AddRange
+ if ($script:IsResumeMode) {
+ $partitionsToMerge = $script:OriginallySkippedPartitionIndices
+ if ($partitionsToMerge -and $partitionsToMerge.Count -gt 0) {
+ Write-LogHost " [MERGE] Merging incremental data for previously-completed partitions: $($partitionsToMerge -join ', ')" -ForegroundColor Cyan
+
+ # Use streaming merge when we have many partitions OR large record counts to avoid memory exhaustion
+ # Threshold: If more than 20 partitions to merge, or estimated >500K records, use streaming
+ $estimatedRecords = 0
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ if (Test-Path $incrementalDir) {
+ $filesToMerge = Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue | Where-Object {
+ $partMatch = [regex]::Match($_.Name, '^Part(\d+)_')
+ $partMatch.Success -and ([int]$partMatch.Groups[1].Value -in $partitionsToMerge)
+ }
+ foreach ($f in $filesToMerge) {
+ if ($f.Name -match '_(\d+)records\.jsonl$') {
+ $estimatedRecords += [int]$Matches[1]
+ }
+ }
+ }
+
+ $useStreamingMerge = ($partitionsToMerge.Count -gt 20) -or ($estimatedRecords -gt 500000) -or ($allLogs.Count -eq 0)
+
+ # Disable streaming merge for explosion modes - explosion requires in-memory processing
+ # Streaming merge only works with the non-explosion fast path (direct 1:1 CSV export)
+ $isExplosionMode = ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)
+ if ($useStreamingMerge -and $isExplosionMode) {
+ Write-LogHost " [MERGE] Explosion mode active - loading records into memory (streaming merge not supported with explosion)" -ForegroundColor DarkYellow
+ $useStreamingMerge = $false
+ }
+
+ if ($useStreamingMerge) {
+ $streamingReason = if ($allLogs.Count -eq 0) { "all partitions from prior run" } elseif ($partitionsToMerge.Count -gt 20) { "$($partitionsToMerge.Count) partitions" } else { "~$($estimatedRecords.ToString('N0')) records" }
+ Write-LogHost " [MERGE] Large merge detected ($streamingReason) - using streaming mode" -ForegroundColor Yellow
+ Write-LogHost " [MERGE] Streaming merge avoids memory exhaustion for large datasets" -ForegroundColor DarkGray
+
+ # Flag that we're using streaming - will need special handling for CSV export
+ $script:UseStreamingMergeForExport = $true
+ $script:StreamingMergePartitions = $partitionsToMerge
+ $script:StreamingMergeDirectory = Split-Path $script:PartialOutputPath -Parent
+
+ # Don't merge into $allLogs - we'll stream directly to CSV later
+ # Just count the records for metrics
+ $mergedFromIncremental = $estimatedRecords
+ Write-LogHost " [MERGE] Deferred streaming merge: $($mergedFromIncremental.ToString('N0')) records will be streamed during export" -ForegroundColor DarkGray
+ } elseif ($isExplosionMode -and $allLogs.Count -eq 0) {
+ # Special case: Explosion mode with all partitions from prior run
+ # Need to load JSONL directly into $allLogs (Merge-IncrementalSaves requires non-empty collection)
+ $script:UseStreamingMergeForExport = $false
+ Write-LogHost " [MERGE] Loading $($estimatedRecords.ToString('N0')) records from prior run into memory for explosion..." -ForegroundColor Cyan
+ $incrementalDir = Join-Path (Split-Path $script:PartialOutputPath -Parent) ".pax_incremental"
+ $mergedFromIncremental = 0
+ if (Test-Path $incrementalDir) {
+ $filesToLoad = Get-ChildItem -Path $incrementalDir -Filter "*.jsonl" -ErrorAction SilentlyContinue | Where-Object {
+ $partMatch = [regex]::Match($_.Name, '^Part(\d+)_')
+ $partMatch.Success -and ([int]$partMatch.Groups[1].Value -in $partitionsToMerge)
+ }
+ foreach ($file in $filesToLoad) {
+ try {
+ $lines = Get-Content -Path $file.FullName -Encoding utf8
+ foreach ($line in $lines) {
+ if (-not [string]::IsNullOrWhiteSpace($line)) {
+ try {
+ $record = $line | ConvertFrom-Json
+ [void]$allLogs.Add($record)
+ $mergedFromIncremental++
+ } catch {}
+ }
+ }
+ } catch {
+ Write-LogHost " [WARN] Failed to load $($file.Name): $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+ }
+ Write-LogHost " [MERGE] Loaded $($mergedFromIncremental.ToString('N0')) records into memory" -ForegroundColor Green
+ } else {
+ # Small merge - use original in-memory approach (faster for small datasets)
+ $script:UseStreamingMergeForExport = $false
+ $mergedFromIncremental = Merge-IncrementalSaves -AllLogs $allLogs -OutputDirectory (Split-Path $script:PartialOutputPath -Parent) -CleanupAfterMerge $false -OnlyPartitionIndices $partitionsToMerge
+ }
+
+ # Update TotalRecordsFetched to include merged records (these were "fetched" in a previous run)
+ if ($mergedFromIncremental -gt 0) {
+ $script:metrics.TotalRecordsFetched += $mergedFromIncremental
+ Write-LogHost " [MERGE] Updated record count: +$mergedFromIncremental from previous run" -ForegroundColor DarkGray
+ }
+ } else {
+ $mergedFromIncremental = 0
+ $script:UseStreamingMergeForExport = $false
+ }
+ # Note: Incremental files are retained until successful script completion for data safety
+ # Cleanup happens at end of script (before exit) to allow recovery if explosion/export fails
+ } else {
+ $mergedFromIncremental = 0
+ # Preserve UseStreamingMergeForExport if memory flush already set it (fresh run with JSONL-only data)
+ if (-not $script:memoryFlushed) {
+ $script:UseStreamingMergeForExport = $false
+ }
+ # Note: Incremental files are retained until successful script completion for data safety
+ }
+
+ Set-ProgressPhase -Phase 'Explosion' -Status 'Analyzing and exploding records'
+ Write-LogHost ""; Write-LogHost "=== Enterprise Processing Summary ===" -ForegroundColor Green
+ if ($script:UseStreamingMergeForExport -and $mergedFromIncremental -gt 0) {
+ Write-LogHost "Records retrieved this run: $($allLogs.Count)" -ForegroundColor Cyan
+ Write-LogHost "Records from prior run (streaming): $($mergedFromIncremental.ToString('N0'))" -ForegroundColor Cyan
+ Write-LogHost "Total records for export: $(($allLogs.Count + $mergedFromIncremental).ToString('N0'))" -ForegroundColor Green
+ } elseif ($script:UseStreamingMergeForExport -and $script:StreamingMergeRecordCount -gt 0) {
+ # Fresh run with memory flush - records are in JSONL, not $allLogs
+ Write-LogHost "Total audit records retrieved: $($script:StreamingMergeRecordCount.ToString('N0')) (streaming from JSONL)" -ForegroundColor Cyan
+ } else {
+ Write-LogHost "Total audit records retrieved: $($allLogs.Count)" -ForegroundColor Cyan
+ }
+
+ # Deduplicate by RecordId to handle session pagination retry scenarios
+ # Bug context: When session pagination retries with a new SessionId after transient failures,
+ # Search-UnifiedAuditLog can return the same records again, causing duplicates in $allLogs.
+ # This deduplication ensures each unique audit record appears only once in the final output.
+ $preDedupeCount = $allLogs.Count
+ if ($preDedupeCount -gt 0) {
+ try {
+ Write-LogHost "Running deduplication check on $preDedupeCount records..." -ForegroundColor DarkGray
+
+ # Deduplicate records by unique ID
+ # Live queries use 'Identity' property, CSV exports use 'Id' property
+ $uniqueLogs = New-Object System.Collections.ArrayList
+ $seenIds = New-Object System.Collections.Generic.HashSet[string]
+ $duplicateSkipped = 0
+
+ foreach ($log in $allLogs) {
+ $recordId = $null
+ try {
+ # Check properties in order of likelihood (Identity for live queries, Id for CSV)
+ if ($log.Identity) { $recordId = $log.Identity }
+ elseif ($log.Id) { $recordId = $log.Id }
+ elseif ($log.RecordId) { $recordId = $log.RecordId }
+ } catch {}
+
+ if ($recordId -and -not $seenIds.Contains($recordId)) {
+ [void]$seenIds.Add($recordId)
+ [void]$uniqueLogs.Add($log)
+ }
+ elseif (-not $recordId) {
+ # Preserve records without an Id (shouldn't happen, but be safe)
+ [void]$uniqueLogs.Add($log)
+ }
+ else {
+ # Duplicate ID detected - skip this record
+ $duplicateSkipped++
+ }
+ }
+
+ # Report deduplication results
+ $duplicatesRemoved = $preDedupeCount - $uniqueLogs.Count
+ if ($duplicatesRemoved -gt 0) {
+ Write-LogHost "Deduplication: Removed $duplicatesRemoved duplicate record(s) (pagination retry artifacts)" -ForegroundColor Yellow
+ $allLogs = $uniqueLogs
+ }
+ else {
+ Write-LogHost "Deduplication: No duplicates found" -ForegroundColor DarkGray
+ }
+ }
+ catch {
+ Write-LogHost "Warning: Deduplication failed: $($_.Exception.Message) - proceeding with original records" -ForegroundColor DarkYellow
+ }
+ }
+
+ # Show accurate record count — in streaming mode allLogs may be empty because records went JSONL→CSV directly
+ if ($script:UseStreamingMergeForExport) {
+ # StreamingMergeRecordCount = memory flush fresh run; mergedFromIncremental = deferred resume merge
+ $streamCount = if ($script:StreamingMergeRecordCount -gt 0) { $script:StreamingMergeRecordCount } else { $mergedFromIncremental }
+ $effectiveTotal = $allLogs.Count + $streamCount
+ Write-LogHost "Unique audit records: $($effectiveTotal.ToString('N0')) (streaming records deduplicated during export)" -ForegroundColor Cyan
+ } else {
+ Write-LogHost "Unique audit records: $($allLogs.Count)" -ForegroundColor Cyan
+ }
+ if ($script:Hit10KLimit -or $script:Hit1MLimit) {
+ Write-LogHost "";
+ $limitType = if ($script:Hit1MLimit) { "1M (Graph API)" } else { "10K (EOM)" }
+ Write-LogHost " CRITICAL NOTICE: $limitType record limit was reached during processing!" -ForegroundColor Red
+
+ try { Write-LogHost ("Structured rows produced: {0}" -f $structuredDataCount) -ForegroundColor DarkGray } catch {}
+ try { Write-LogHost ("Metrics.TotalStructuredRows: {0}" -f $script:metrics.TotalStructuredRows) -ForegroundColor DarkGray } catch {}
+ # Show subdivision summary if adaptive subdivision occurred
+ if ($script:SubdividedPartitions.Count -gt 0) {
+ Write-LogHost "";
+ Write-LogHost " ADAPTIVE SUBDIVISION SUMMARY:" -ForegroundColor Yellow
+ Write-LogHost " Total partitions subdivided: $($script:SubdividedPartitions.Count)" -ForegroundColor Yellow
+ foreach ($key in $script:SubdividedPartitions.Keys) {
+ $count = $script:SubdividedPartitions[$key]
+ Write-LogHost " - Time range: $key (preview count: $count)" -ForegroundColor DarkYellow
+ }
+ Write-LogHost " Subdivision successfully prevented data loss by splitting high-volume partitions" -ForegroundColor Green
+ }
+ }
+ if ($allLogs.Count -eq 0 -and $RAWInputCSV) {
+ try {
+ $rehydrated = Import-Csv -Path $RAWInputCSV
+ $allLogs = New-Object System.Collections.ArrayList
+ foreach ($row in $rehydrated) {
+ try {
+ $creation = if ($row.CreationDate) { script:Parse-DateSafe $row.CreationDate } else { $null }
+ $identity = if ($row.Id) { $row.Id } elseif ($row.RecordId) { $row.RecordId } else { [guid]::NewGuid().ToString() }
+ $rec = [pscustomobject]@{
+ RecordType = $(try { [int]$row.RecordType } catch { 0 })
+ CreationDate = $(if ($creation) { $creation } else { Get-Date })
+ UserIds = @($row.UserId)
+ Operations = $row.Operation
+ ResultStatus = $(try { $row.ResultStatus } catch { '' })
+ ResultCount = 0
+ Identity = $identity
+ IsValid = $true
+ ObjectState = ''
+ AuditData = $row.AuditData
+ Operation = $row.Operation
+ UserId = $row.UserId
+ }
+ [void]$allLogs.Add($rec)
+ } catch {}
+ }
+ }
+ catch { }
+ }
+ # For OnlyUserInfo mode, skip the early return - we don't need audit logs, just Entra data
+ # For streaming merge mode, $allLogs is intentionally empty - data will be streamed from JSONL files
+ if ($allLogs.Count -eq 0 -and -not $OnlyUserInfo -and -not $script:UseStreamingMergeForExport) {
+ Write-LogHost ""; Write-LogHost "No audit logs found in the specified date range for the selected activity types." -ForegroundColor Yellow
+ Write-LogHost "Emitting header-only CSV (0 rows) for deterministic downstream processing..." -ForegroundColor Cyan
+ $headerColumns = if ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion) { if ($IncludeM365Usage -and $RAWInputCSV) { Get-M365UsageWideHeader -RawCsvPath $RAWInputCSV -BaseHeader $M365UsageBaseHeader } else { $PurviewExplodedHeader } } else { @('RecordType', 'CreationDate', 'UserIds', 'Operations', 'ResultStatus', 'ResultCount', 'Identity', 'IsValid', 'ObjectState', 'Id', 'CreationTime', 'Operation', 'OrganizationId', 'RecordTypeNum', 'ResultStatus_Audit', 'UserKey', 'UserType', 'Version', 'Workload', 'UserId', 'AppId', 'ClientAppId', 'CorrelationId', 'ModelId', 'ModelProvider', 'ModelFamily', 'TokensTotal', 'TokensInput', 'TokensOutput', 'DurationMs', 'OutcomeStatus', 'ConversationId', 'TurnNumber', 'RetryCount', 'ClientVersion', 'ClientPlatform', 'AgentId', 'AgentName', 'AgentVersion', 'AgentCategory', 'AppIdentity', 'ApplicationName', 'AuditData', 'CopilotEventData') }
+ try { $outputDirEmpty = Split-Path $OutputFile -Parent; if (-not (Test-Path $outputDirEmpty)) { New-Item -ItemType Directory -Path $outputDirEmpty -Force | Out-Null }; $enc = New-Object System.Text.UTF8Encoding($false); $sw = [System.IO.StreamWriter]::new($OutputFile, $false, $enc); $escapedCols = @(); foreach ($col in $headerColumns) { $c = [string]$col; $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' '); $escaped = $c -replace '"', '""'; if ($needsQuote) { $escaped = '"' + $escaped + '"' }; $escapedCols += , $escaped }; $sw.WriteLine(($escapedCols -join ',')); $sw.Flush(); $sw.Dispose() } catch { Write-LogHost "Failed to write header-only CSV: $($_.Exception.Message)" -ForegroundColor Red }
+ # Finalize checkpoint: rename _PARTIAL files and delete checkpoint (same pattern as normal completion)
+ if ($script:CheckpointEnabled -and $script:PartialOutputPath -and (Test-Path $script:PartialOutputPath)) {
+ Complete-CheckpointRun -FinalOutputPath $script:FinalOutputPath
+ $OutputFile = $script:FinalOutputPath
+ $LogFile = $script:LogFile
+ }
+ $script:metrics.TotalStructuredRows = 0; $script:metrics.EffectiveChunkSize = 0; Set-ProgressPhase -Phase 'Complete' -Status 'No data'; Complete-Progress; Write-LogHost "Header-only CSV created at: $OutputFile" -ForegroundColor Green; $script:ScriptCompleted = $true; return
+ }
+
+ # Determine explosion mode:
+ # - Full Explosion: ExplodeDeep, ExplodeArrays, or ForcedRawInputCsvExplosion
+ # - No Explosion: Standard 1:1 format
+ $fullExplode = ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion)
+ $partialExplode = $false
+ $effectiveExplode = $fullExplode
+
+ $processingMode = if ($ExplodeDeep) { "deep column flattening (with row explosion)" } elseif ($fullExplode) { "array explosion (full)" } else { "standard 1:1 format" }
+ Write-LogHost "Converting audit records to structured format using $processingMode..." -ForegroundColor Yellow
+ $structuredDataCount = 0
+
+ # Parallel explosion configuration (PS7+ only, controlled by -ExplosionThreads)
+ # ExplosionThreads: 0=auto (optimized based on CPU, memory), 1=force serial, 2-32=explicit
+ $actualExplosionThreads = if ($ExplosionThreads -eq 0) {
+ # Auto-optimization: explosion is memory-bound, not CPU-bound
+ # 8 threads provides good parallelism without excessive GC pressure
+ $baseThreads = [Math]::Max(2, [Math]::Min([Environment]::ProcessorCount, 8))
+
+ # Memory pressure adjustment: reduce threads if system is under memory pressure
+ $memoryAdjustedThreads = $baseThreads
+ try {
+ $workingSetMB = [int]([System.Diagnostics.Process]::GetCurrentProcess().WorkingSet64 / 1MB)
+ $availableMemory = $null
+ try {
+ $osInfo = Get-CimInstance -ClassName Win32_OperatingSystem -ErrorAction SilentlyContinue
+ if ($osInfo) { $availableMemory = [int]($osInfo.FreePhysicalMemory / 1KB) } # Convert KB to MB
+ } catch { }
+
+ if ($availableMemory -and $availableMemory -lt 2000) {
+ # Less than 2GB free: reduce to 2-4 threads
+ $memoryAdjustedThreads = [Math]::Max(2, [Math]::Min(4, $baseThreads))
+ Write-LogHost " [THREADS] Memory pressure detected ($availableMemory MB free) - reducing threads: $baseThreads -> $memoryAdjustedThreads" -ForegroundColor Yellow
+ }
+ elseif ($availableMemory -and $availableMemory -lt 4000) {
+ # Less than 4GB free: reduce by 25%
+ $memoryAdjustedThreads = [Math]::Max(2, [int]($baseThreads * 0.75))
+ if ($memoryAdjustedThreads -lt $baseThreads) {
+ Write-LogHost " [THREADS] Low memory ($availableMemory MB free) - reducing threads: $baseThreads -> $memoryAdjustedThreads" -ForegroundColor Yellow
+ }
+ }
+ } catch { }
+
+ $memoryAdjustedThreads
+ } elseif ($ExplosionThreads -eq 1) {
+ 1 # Force serial
+ } else {
+ $ExplosionThreads # User-specified value (already validated 2-32)
+ }
+
+ # Compute effective record count accounting for streaming merge (where $allLogs is empty, records are in JSONL)
+ $effectiveRecordCount = if ($script:UseStreamingMergeForExport) {
+ $sc = if ($script:StreamingMergeRecordCount -gt 0) { $script:StreamingMergeRecordCount } elseif ($mergedFromIncremental -gt 0) { $mergedFromIncremental } else { 0 }
+ $allLogs.Count + $sc
+ } else {
+ $allLogs.Count
+ }
+
+ # Enable parallel only if PS7+, more than 500 records, AND threads > 1
+ $useParallelExplosion = $script:IsPS7 -and ($effectiveRecordCount -gt 500) -and ($actualExplosionThreads -gt 1)
+ $parallelBatchSize = 1000 # Records per parallel batch
+ $parallelThrottleLimit = $actualExplosionThreads
+
+ if ($useParallelExplosion) {
+ Write-LogHost "Parallel processing: ENABLED (PS7+ detected, $parallelThrottleLimit threads)" -ForegroundColor Green
+ } else {
+ if (-not $script:IsPS7) {
+ Write-LogHost "Parallel processing: DISABLED (requires PowerShell 7+)" -ForegroundColor Gray
+ } elseif ($ExplosionThreads -eq 1) {
+ Write-LogHost "Parallel processing: DISABLED (forced serial via -ExplosionThreads 1)" -ForegroundColor Gray
+ } elseif ($effectiveRecordCount -le 500) {
+ Write-LogHost "Parallel processing: DISABLED (only $effectiveRecordCount records - threshold is 500)" -ForegroundColor Gray
+ }
+ }
+
+ # Schema discovery note: Parallel mode scans ALL rows for 100% column coverage; serial mode uses sampling
+ if ($useParallelExplosion) {
+ Write-LogHost "Streaming export mode enabled (parallel mode: full schema scan; base chunk size=$StreamingChunkSize)" -ForegroundColor Yellow
+ } else {
+ Write-LogHost "Streaming export mode enabled (schema sample=$StreamingSchemaSample; base chunk size=$StreamingChunkSize)" -ForegroundColor Yellow
+ }
+ $te0 = Get-Date
+ $schemaFrozen = $false; $schemaSampleRows = New-Object System.Collections.Generic.List[object]; $postFreezeNewColumns = 0; $lateIgnoredColumns = New-Object System.Collections.Generic.HashSet[string]; $columnOrder = $null; $buffer = New-Object System.Collections.Generic.List[object]; $exportTemp = Join-Path ([System.IO.Path]::GetTempPath()) ("pax_export_" + [guid]::NewGuid().ToString() + ".tmp"); $csvWriter = $false
+
+ # Ensure TotalRecordsFetched reflects allLogs count (may not have been set in all code paths)
+ if ($script:metrics.TotalRecordsFetched -eq 0 -and $allLogs.Count -gt 0) {
+ $script:metrics.TotalRecordsFetched = $allLogs.Count
+ }
+
+ # ═══════════════════════════════════════════════════════════════════════════════
+ # NON-EXPLOSION FAST PATH: Direct stream to CSV with fixed schema (skip parallel overhead)
+ # ═══════════════════════════════════════════════════════════════════════════════
+ if (-not $fullExplode) {
+
+ # STREAMING MERGE PATH - Handle large resume scenarios without loading into memory
+ if ($script:UseStreamingMergeForExport) {
+ Write-LogHost "Non-explosion fast path: STREAMING MERGE MODE (memory-efficient)" -ForegroundColor Cyan
+ Write-LogHost " Streaming directly from incremental files to avoid memory exhaustion..." -ForegroundColor DarkGray
+ $fastPathStart = Get-Date
+ $fastPathColumns = @('RecordId', 'CreationDate', 'RecordType', 'Operation', 'AuditData', 'AssociatedAdminUnits', 'AssociatedAdminUnitsNames')
+
+ # First, write any in-memory records from THIS run's partitions (if any)
+ $inMemoryCount = $allLogs.Count
+ $streamingActivityCounts = @{}
+ if ($inMemoryCount -gt 0) {
+ Write-LogHost " Writing $($inMemoryCount.ToString('N0')) in-memory records from current run..." -ForegroundColor DarkCyan
+ Open-CsvWriter -Path $exportTemp -Columns $fastPathColumns
+ $csvWriter = $true
+
+ $batch = New-Object System.Collections.Generic.List[object]
+ $inMemoryRecordIds = New-Object System.Collections.Generic.HashSet[string]
+ $batchSize = 5000
+ foreach ($log in $allLogs) {
+ $auditData = $log.AuditData
+ $parsedAudit = if ($log.PSObject.Properties['_ParsedAuditData']) { $log._ParsedAuditData } else { try { $auditData | ConvertFrom-Json -ErrorAction SilentlyContinue } catch { $null } }
+ $opValue = if ($parsedAudit -and $parsedAudit.Operation) { $parsedAudit.Operation } else { $log.Operations }
+
+ # Track per-activity counts for Activity Type Breakdown
+ if ($opValue) {
+ if (-not $streamingActivityCounts.ContainsKey($opValue)) { $streamingActivityCounts[$opValue] = 0 }
+ $streamingActivityCounts[$opValue]++
+ }
+
+ $fastRecord = [pscustomobject]@{
+ RecordId = if ($log.RecordId) { $log.RecordId } elseif ($log.Identity) { $log.Identity } elseif ($log.Id) { $log.Id } elseif ($parsedAudit -and $parsedAudit.Id) { $parsedAudit.Id } else { $null }
+ CreationDate = if ($log.CreationDate) { $log.CreationDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { '' }
+ RecordType = $log.RecordType
+ Operation = $opValue
+ AuditData = $auditData
+ AssociatedAdminUnits = $null
+ AssociatedAdminUnitsNames = $null
+ }
+ $batch.Add($fastRecord)
+ if ($fastRecord.RecordId) { [void]$inMemoryRecordIds.Add($fastRecord.RecordId) }
+
+ if ($batch.Count -ge $batchSize) {
+ Write-CsvRows -Rows $batch -Columns $fastPathColumns
+ $batch.Clear()
+ }
+ }
+ if ($batch.Count -gt 0) {
+ Write-CsvRows -Rows $batch -Columns $fastPathColumns
+ $batch.Clear()
+ }
+ Close-CsvWriter
+ $csvWriter = $false
+
+ # Clear in-memory collection to free RAM before streaming merge
+ $allLogs.Clear()
+ [GC]::Collect()
+ Write-LogHost " In-memory records written, RAM freed" -ForegroundColor DarkGray
+ }
+
+ # Now stream merge the previously-completed partitions directly to CSV
+ Write-LogHost " Streaming merge of $($script:StreamingMergePartitions.Count) previously-completed partitions..." -ForegroundColor Cyan
+
+ # If we already wrote in-memory records, we need to append to the temp file
+ if ($inMemoryCount -gt 0) {
+ # Streaming merge needs to append to existing file - use a second temp file then combine
+ $streamingTemp = Join-Path ([System.IO.Path]::GetTempPath()) ("pax_streaming_" + [guid]::NewGuid().ToString() + ".tmp")
+ $streamedCount = Merge-IncrementalSaves-Streaming -OutputFile $streamingTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns -ExcludeRecordIds $inMemoryRecordIds -ActivityCounts ([ref]$streamingActivityCounts)
+
+ # Append streaming temp to main temp (skip header line from streaming file)
+ if ((Test-Path $streamingTemp) -and $streamedCount -gt 0) {
+ Write-LogHost " Combining in-memory and streamed data..." -ForegroundColor DarkGray
+ Get-Content $streamingTemp | Select-Object -Skip 1 | Add-Content $exportTemp
+ Remove-Item $streamingTemp -Force -ErrorAction SilentlyContinue
+ }
+ $totalStreamedRecords = $inMemoryCount + $streamedCount
+ } else {
+ # No in-memory records - stream directly to final temp file
+ $totalStreamedRecords = Merge-IncrementalSaves-Streaming -OutputFile $exportTemp -OutputDirectory $script:StreamingMergeDirectory -OnlyPartitionIndices $script:StreamingMergePartitions -Columns $fastPathColumns -ActivityCounts ([ref]$streamingActivityCounts)
+ }
+
+ # Move temp file to final output
+ if (Test-Path $exportTemp) {
+ Move-Item -Path $exportTemp -Destination $OutputFile -Force
+ }
+
+ $fastPathElapsed = (Get-Date) - $fastPathStart
+ $fastPathRate = if ($fastPathElapsed.TotalSeconds -gt 0) { [int]($totalStreamedRecords / $fastPathElapsed.TotalSeconds) } else { 0 }
+ Write-LogHost "Streaming merge export complete: $($totalStreamedRecords.ToString('N0')) records in $([Math]::Round($fastPathElapsed.TotalSeconds, 1))s ($fastPathRate rec/sec)" -ForegroundColor Green
+
+ # Update metrics
+ $script:metrics.TotalStructuredRows = $totalStreamedRecords
+ $structuredDataCount = $totalStreamedRecords
+ $processedRecordCount = $totalStreamedRecords
+ $columnOrder = $fastPathColumns
+ $schemaFrozen = $true
+
+ # Populate per-activity metrics from actual streaming counts (inline handlers don't track these reliably)
+ $script:metrics.Activities = @{}
+ foreach ($opKey in $streamingActivityCounts.Keys) {
+ $script:metrics.Activities[$opKey] = @{ Retrieved = $streamingActivityCounts[$opKey]; Structured = $streamingActivityCounts[$opKey] }
+ }
+
+ # Store original count for ratio comparisons (allLogs was cleared for RAM)
+ $script:OriginalInputRecordCount = $totalStreamedRecords
+
+ # Skip the normal fast path processing below
+ $script:StreamingMergeCompleted = $true
+ }
+ # END STREAMING MERGE PATH
+
+ # Standard non-explosion fast path (original code, only runs if NOT using streaming merge)
+ if (-not $script:UseStreamingMergeForExport) {
+ Write-LogHost "Non-explosion fast path: Direct streaming with fixed 7-column schema..." -ForegroundColor Cyan
+ $fastPathStart = Get-Date
+
+ # Fixed schema for non-explosion mode (no discovery needed)
+ $fastPathColumns = @('RecordId', 'CreationDate', 'RecordType', 'Operation', 'AuditData', 'AssociatedAdminUnits', 'AssociatedAdminUnitsNames')
+
+ # Open CSV with known schema immediately
+ Open-CsvWriter -Path $exportTemp -Columns $fastPathColumns
+ $csvWriter = $true
+
+ # Stream directly - minimal transformation
+ $batchSize = 5000
+ $batch = New-Object System.Collections.Generic.List[object]
+ $processedCount = 0
+ $lastProgressTime = Get-Date
+
+ # Track per-activity export counts for metrics (fast path)
+ $fastPathActivityCounts = @{}
+
+ foreach ($log in $allLogs) {
+ $processedCount++
+
+ # Minimal record creation (no full Convert-ToStructuredRecord overhead)
+ $auditData = $log.AuditData
+ $parsedAudit = if ($log.PSObject.Properties['_ParsedAuditData']) { $log._ParsedAuditData } else { try { $auditData | ConvertFrom-Json -ErrorAction SilentlyContinue } catch { $null } }
+ $opValue = if ($parsedAudit -and $parsedAudit.Operation) { $parsedAudit.Operation } else { $log.Operations }
+
+ # Track per-activity counts
+ $opKey = if ($opValue) { [string]$opValue } else { 'Unknown' }
+ if (-not $fastPathActivityCounts.ContainsKey($opKey)) { $fastPathActivityCounts[$opKey] = 0 }
+ $fastPathActivityCounts[$opKey]++
+
+ $fastRecord = [pscustomobject]@{
+ RecordId = if ($log.RecordId) { $log.RecordId } elseif ($log.Identity) { $log.Identity } elseif ($log.Id) { $log.Id } elseif ($parsedAudit -and $parsedAudit.Id) { $parsedAudit.Id } else { $null }
+ CreationDate = if ($log.CreationDate) { $log.CreationDate.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') } else { '' }
+ RecordType = $log.RecordType
+ Operation = $opValue
+ AuditData = $auditData
+ AssociatedAdminUnits = $null
+ AssociatedAdminUnitsNames = $null
+ }
+
+ $batch.Add($fastRecord)
+
+ if ($batch.Count -ge $batchSize) {
+ Write-CsvRows -Rows $batch -Columns $fastPathColumns
+ $batch.Clear()
+
+ # Progress every 60 seconds
+ if (((Get-Date) - $lastProgressTime).TotalSeconds -ge 60) {
+ $elapsed = (Get-Date) - $fastPathStart
+ $rate = [int]($processedCount / $elapsed.TotalSeconds)
+ $pct = [int](($processedCount / $allLogs.Count) * 100)
+ Write-LogHost ("[{0}] Fast path: {1:N0}/{2:N0} ({3}%) | ~{4:N0} rec/sec" -f (Get-Date -Format "HH:mm:ss"), $processedCount, $allLogs.Count, $pct, $rate) -ForegroundColor DarkCyan
+ $lastProgressTime = Get-Date
+ }
+ }
+ }
+
+ # Flush remaining batch
+ if ($batch.Count -gt 0) {
+ Write-CsvRows -Rows $batch -Columns $fastPathColumns
+ $batch.Clear()
+ }
+
+ Close-CsvWriter
+ $csvWriter = $false
+
+ # Move temp file to final output
+ if (Test-Path $exportTemp) {
+ Move-Item -Path $exportTemp -Destination $OutputFile -Force
+ }
+
+ $fastPathElapsed = (Get-Date) - $fastPathStart
+ $fastPathRate = if ($fastPathElapsed.TotalSeconds -gt 0) { [int]($allLogs.Count / $fastPathElapsed.TotalSeconds) } else { 0 }
+ Write-LogHost "Non-explosion fast path complete: $($allLogs.Count) records in $([Math]::Round($fastPathElapsed.TotalSeconds, 1))s ($fastPathRate rec/sec)" -ForegroundColor Green
+
+ # Update metrics
+ $script:metrics.TotalStructuredRows = $allLogs.Count
+ $structuredDataCount = $allLogs.Count
+ $processedRecordCount = $allLogs.Count
+ $columnOrder = $fastPathColumns
+ $schemaFrozen = $true
+
+ # Merge fast path activity counts into script:metrics.Activities (for Activity Type Breakdown)
+ # In fast path (non-explosion), Retrieved and Structured are equal (1:1 mapping)
+ foreach ($opKey in $fastPathActivityCounts.Keys) {
+ if (-not $script:metrics.Activities.ContainsKey($opKey)) {
+ $script:metrics.Activities[$opKey] = @{ Retrieved = 0; Structured = 0 }
+ }
+ $script:metrics.Activities[$opKey].Retrieved += $fastPathActivityCounts[$opKey]
+ $script:metrics.Activities[$opKey].Structured += $fastPathActivityCounts[$opKey]
+ }
+
+ # Skip to post-processing (bypass parallel and serial paths)
+ $skipToPostProcessing = $true
+ } # End of standard non-explosion fast path (if not using streaming merge)
+
+ # If streaming merge was used, also skip to post-processing
+ if ($script:UseStreamingMergeForExport) {
+ $skipToPostProcessing = $true
+ }
+ } else {
+ $skipToPostProcessing = $false
+ }
+
+ # ═══════════════════════════════════════════════════════════════════════════════
+ # PARALLEL MODE: Skip serial loop entirely - do parallel schema discovery + processing
+ # ═══════════════════════════════════════════════════════════════════════════════
+ if (-not $skipToPostProcessing -and $useParallelExplosion) {
+ # Jump to parallel processing block (after the serial foreach loop)
+ $processedRecordCount = 0
+ $schemaFrozen = $false
+ # Fall through to parallel block below (skip serial foreach)
+ } elseif (-not $skipToPostProcessing) {
+ # SERIAL MODE: Original foreach loop for PS5 compatibility
+
+ # Explosion progress tracking (provides heartbeat for long-running explosion phase)
+ $explosionProgressInterval = 20000
+ $processedRecordCount = 0
+ $lastProgressUpdate = 0
+
+ foreach ($log in $allLogs) {
+ $processedRecordCount++
+
+ # NOTE: Retrieved counts are tracked during partition retrieval (lines 11052, 11431, 11784)
+ # Do NOT increment here - that would double-count records
+
+ # Periodic progress update (every 20K records)
+ if ($processedRecordCount -ge ($lastProgressUpdate + $explosionProgressInterval)) {
+ $elapsed = (Get-Date) - $te0
+ $rate = [int]($processedRecordCount / $elapsed.TotalSeconds)
+ $pct = [int](($processedRecordCount / $allLogs.Count) * 100)
+ $elapsedStr = if ($elapsed.TotalMinutes -ge 1) {
+ "{0}m {1}s" -f [int]$elapsed.TotalMinutes, $elapsed.Seconds
+ } else {
+ "{0}s" -f [int]$elapsed.TotalSeconds
+ }
+ Write-LogHost ("[{0}] Processing: {1:N0} / {2:N0} records ({3}%) | Elapsed: {4} | Rate: {5:N0} rec/sec" -f (Get-Date -Format "HH:mm:ss"), $processedRecordCount, $allLogs.Count, $pct, $elapsedStr, $rate) -ForegroundColor DarkCyan
+ $lastProgressUpdate = $processedRecordCount
+ }
+ $records = if ($effectiveExplode) { Convert-ToPurviewExplodedRecords -Record $log -Deep:$ExplodeDeep -PartialExplode:$partialExplode -PromptFilterValue $PromptFilter } else { Convert-ToStructuredRecord -Record $log -EnableExplosion:$false }
+ $recordsArr = To-RecordArray $records
+ if ($recordsArr.Count -gt 0) {
+ try {
+ $script:metrics.TotalStructuredRows += $recordsArr.Count
+ $structuredDataCount += $recordsArr.Count
+ $opName = $null; try { $opName = if ($log.Operation) { [string]$log.Operation } elseif ($log.Operations) { [string]$log.Operations } else { $null } } catch {}; if (-not $opName) { $opName = 'Unknown' }; if (-not $script:metrics.Activities.ContainsKey($opName)) { $script:metrics.Activities[$opName] = @{ Retrieved = 0; Structured = 0 } }; $script:metrics.Activities[$opName].Structured += $recordsArr.Count
+ } catch {}
+ foreach ($r in $recordsArr) {
+ if (-not $schemaFrozen) {
+ $schemaSampleRows.Add($r) | Out-Null
+ if ($schemaSampleRows.Count -ge $StreamingSchemaSample) {
+ if ($ExplodeArrays -or $ExplodeDeep -or $ForcedRawInputCsvExplosion) {
+ $columnOrder = New-Object System.Collections.Generic.List[string]
+ if ($IncludeM365Usage -and $RAWInputCSV) {
+ foreach ($c in (Get-M365UsageWideHeader -RawCsvPath $RAWInputCSV -BaseHeader $M365UsageBaseHeader)) { [void]$columnOrder.Add($c) }
+ } else {
+ foreach ($c in $PurviewExplodedHeader) { [void]$columnOrder.Add($c) }
+ }
+ if ($ExplodeDeep -and $script:DeepExtraColumns -and $script:DeepExtraColumns.Count -gt 0) {
+ foreach ($c in $script:DeepExtraColumns) { if (-not $columnOrder.Contains($c)) { [void]$columnOrder.Add($c) } }
+ }
+ # Augment header with discovered columns (live or replay)
+ foreach ($sr in $schemaSampleRows) { foreach ($pn in $sr.PSObject.Properties.Name) { if (-not $columnOrder.Contains($pn)) { [void]$columnOrder.Add($pn) } } }
+ }
+ else {
+ $columnOrder = New-Object System.Collections.Generic.List[string]
+ foreach ($sr in $schemaSampleRows) { foreach ($pn in $sr.PSObject.Properties.Name) { if (-not $columnOrder.Contains($pn)) { [void]$columnOrder.Add($pn) } } }
+ }
+ Write-LogHost "Schema frozen with $($columnOrder.Count) columns after $($schemaSampleRows.Count) sample rows (serial mode - sample-based)" -ForegroundColor DarkCyan
+ $effectiveChunkSize = $StreamingChunkSize
+ $colCount = $columnOrder.Count
+ if ($colCount -gt 1000) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 1000) }
+ elseif ($colCount -gt 750) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 1500) }
+ elseif ($colCount -gt 500) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 2500) }
+ elseif ($colCount -gt 250) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 4000) }
+ else { if ($colCount -le 60 -and $StreamingChunkSize -lt 15000) { $autoBoost = [int][Math]::Min(15000, [Math]::Max($StreamingChunkSize * 3, 8000)); $effectiveChunkSize = $autoBoost } }
+ if ($effectiveChunkSize -ne $StreamingChunkSize) { Write-LogHost "Adaptive chunk size applied: $effectiveChunkSize (was $StreamingChunkSize) due to column width $colCount" -ForegroundColor DarkYellow } else { Write-LogHost "Chunk size retained/boosted: $effectiveChunkSize (columns=$colCount)" -ForegroundColor DarkGray }
+ $script:metrics.EffectiveChunkSize = $effectiveChunkSize
+ if (-not $csvWriter) { Open-CsvWriter -Path $exportTemp -Columns $columnOrder; $csvWriter = $true }
+ $emitRows = @(); foreach ($sr in $schemaSampleRows) { $emitRows += ($sr | Select-Object -Property $columnOrder) }; if ($emitRows.Count -gt 0) { Write-CsvRows -Rows $emitRows -Columns $columnOrder }
+ $schemaSampleRows.Clear(); $schemaFrozen = $true
+ }
+ }
+ else {
+ $rowHadNew = $false
+ foreach ($pn in $r.PSObject.Properties.Name) { if (-not $columnOrder.Contains($pn)) { if (-not $rowHadNew) { $postFreezeNewColumns++; $rowHadNew = $true }; if (-not $lateIgnoredColumns.Contains($pn)) { [void]$lateIgnoredColumns.Add($pn) } } }
+ $buffer.Add($r) | Out-Null
+ if (-not $effectiveChunkSize) { $effectiveChunkSize = $StreamingChunkSize }
+ if ($buffer.Count -ge $effectiveChunkSize) { $emitSet = $buffer | ForEach-Object { $_ | Select-Object -Property $columnOrder }; if (-not $csvWriter) { Open-CsvWriter -Path $exportTemp -Columns $columnOrder; $csvWriter = $true }; if ($emitSet.Count -gt 0) { Write-CsvRows -Rows $emitSet -Columns $columnOrder }; $buffer.Clear() }
+ }
+ }
+ }
+ else {
+ if (-not $script:loggedZeroRecords) { $script:loggedZeroRecords = 0 }
+ if ($script:loggedZeroRecords -lt 5) {
+ $opName = $null; try { $opName = if ($log.Operation) { [string]$log.Operation } elseif ($log.Operations) { [string]$log.Operations } else { $null } } catch {}
+ Write-LogHost "Record produced 0 structured rows (Operation=$opName)" -ForegroundColor DarkYellow
+ $script:loggedZeroRecords++
+ }
+ }
+ }
+ } # End of SERIAL MODE else block
+
+ # ═══════════════════════════════════════════════════════════════════════════════
+ # PARALLEL EXPLOSION PROCESSING (PS7+ only)
+ # Uses Start-ThreadJob with job queue pattern for optimal load balancing
+ # Many small chunks + N concurrent workers = better CPU utilization
+ # ═══════════════════════════════════════════════════════════════════════════════
+ if (-not $skipToPostProcessing -and $useParallelExplosion) {
+ $processingMode = if ($ExplodeArrays -or $ExplodeDeep) { "array explosion" } else { "record conversion" }
+ Write-LogHost "Starting parallel $processingMode of $($allLogs.Count) records using $parallelThrottleLimit concurrent threads..." -ForegroundColor Cyan
+
+ # Update checkpoint to track explosion phase
+ if ($script:CheckpointEnabled -and $script:CheckpointData) {
+ $script:CheckpointData.explosion.status = 'InProgress'
+ $script:CheckpointData.explosion.recordsProcessed = 0
+ $script:CheckpointData.explosion.rowsGenerated = 0
+ $script:CheckpointData.explosion.lastUpdateTime = (Get-Date).ToUniversalTime().ToString('o')
+ Save-CheckpointToDisk
+ }
+
+ $parallelStartTime = Get-Date
+
+ # ─────────────────────────────────────────────────────────────────────────
+ # PHASE 1: Split records into many small chunks for job queue
+ # Small chunks = better load balancing when work varies per record
+ # ─────────────────────────────────────────────────────────────────────────
+ $totalRecords = $allLogs.Count
+ # Use ~1000 records per chunk (sweet spot for overhead vs load balancing)
+ # Minimum chunks = 2x thread count to ensure good distribution
+ $targetChunkSize = 1000
+ $minChunks = $parallelThrottleLimit * 2
+ $chunkSize = [Math]::Min($targetChunkSize, [Math]::Ceiling($totalRecords / $minChunks))
+ $chunkSize = [Math]::Max(100, $chunkSize) # At least 100 records per chunk
+
+ $chunks = [System.Collections.Generic.List[object[]]]::new()
+ for ($i = 0; $i -lt $totalRecords; $i += $chunkSize) {
+ $endIdx = [Math]::Min($i + $chunkSize - 1, $totalRecords - 1)
+ $chunk = $allLogs[$i..$endIdx]
+ $chunks.Add($chunk)
+ }
+
+ Write-LogHost "Split $totalRecords records into $($chunks.Count) chunks (~$chunkSize records each) for $parallelThrottleLimit workers" -ForegroundColor Cyan
+
+ # ─────────────────────────────────────────────────────────────────────────
+ # PHASE 2: Job queue - run up to N concurrent jobs, queue the rest
+ # As each job completes, start another from the queue
+ # ─────────────────────────────────────────────────────────────────────────
+ Write-LogHost "Phase 1: Processing $($chunks.Count) chunks with $parallelThrottleLimit concurrent workers..." -ForegroundColor Cyan
+ $explosionStart = Get-Date
+
+ # Capture parameters for thread jobs
+ $threadParams = @{
+ ExplodeDeep = $ExplodeDeep
+ PartialExplode = $partialExplode
+ PromptFilter = $PromptFilter
+ EffectiveExplode = $effectiveExplode
+ }
+
+ # Job queue management
+ $activeJobs = [System.Collections.Generic.List[object]]::new()
+ $completedResults = [System.Collections.Generic.List[object]]::new()
+ $chunkQueue = [System.Collections.Generic.Queue[object]]::new()
+ foreach ($chunk in $chunks) { $chunkQueue.Enqueue($chunk) }
+
+ $totalChunks = $chunks.Count
+ $chunksStarted = 0
+ $chunksCompleted = 0
+ $failedChunks = 0
+ $lastProgressTime = Get-Date
+ $progressInterval = [TimeSpan]::FromSeconds(60) # Update progress every 60 seconds
+
+ # ─────────────────────────────────────────────────────────────────────────
+ # Build InitializationScript with all required function definitions
+ # Start-ThreadJob creates ISOLATED runspaces - functions are NOT inherited
+ # ─────────────────────────────────────────────────────────────────────────
+ $initScriptText = @'
+# Thread-local helper functions (required for explosion)
+function Test-ScalarValue { param($v) ($null -eq $v -or $v -is [string] -or $v -is [char] -or $v -is [bool] -or $v -is [int] -or $v -is [long] -or $v -is [double] -or $v -is [decimal] -or $v -is [float] -or $v -is [datetime] -or $v -is [guid]) }
+
+function Get-SafeProperty { param($obj, [string]$name) try { if ($null -ne $obj -and $obj.PSObject.Properties[$name]) { return $obj.($name) } } catch {}; return $null }
+
+function Select-FirstNonNull { param([object[]]$Values) foreach ($v in $Values) { if ($null -ne $v -and ('' -ne [string]$v)) { return $v } } return $null }
+
+function To-RecordArray {
+ param($records)
+ $result = @()
+ if ($null -eq $records) { return $result }
+ $isEnumerable = ($records -is [System.Collections.IEnumerable])
+ $isScalarish = ($records -is [string] -or $records -is [System.Management.Automation.PSObject] -or $records -is [System.Management.Automation.PSCustomObject])
+ if ($isEnumerable -and -not $isScalarish) { foreach ($r in $records) { $result += ,$r } }
+ else { $result += ,$records }
+ return $result
+}
+
+function Find-AllArrays {
+ param($Data, [string]$Path = '', [int]$Depth = 0, [hashtable]$Arrays)
+ if ($null -eq $Data) { return @{} }
+ if (-not $Arrays) { $Arrays = @{} }
+ if ($Depth -gt 6) { return $Arrays }
+ $isArray = ($Data -is [System.Collections.IEnumerable] -and -not ($Data -is [string]) -and (($Data -is [System.Collections.IList]) -or $Data.GetType().IsArray))
+ if ($isArray) {
+ $key = if ($Path) { $Path } else { 'root' }
+ if (-not $Arrays.ContainsKey($key)) { $Arrays[$key] = [pscustomobject]@{ Path = $Path; Data = $Data; Count = ($Data | Measure-Object).Count } }
+ }
+ $props = $null
+ if ($Data -is [System.Management.Automation.PSObject]) { $props = $Data.PSObject.Properties }
+ elseif ($Data -is [System.Collections.IDictionary]) { $props = $Data.GetEnumerator() }
+ if ($props) {
+ foreach ($p in $props) {
+ $name = if ($p -is [System.Collections.DictionaryEntry]) { $p.Key } else { $p.Name }
+ $val = if ($p -is [System.Collections.DictionaryEntry]) { $p.Value } else { $p.Value }
+ $childPath = if ($Path) { "$Path.$name" } else { $name }
+ Find-AllArrays -Data $val -Path $childPath -Depth ($Depth + 1) -Arrays $Arrays | Out-Null
+ }
+ }
+ return $Arrays
+}
+
+function ConvertTo-FlatColumns {
+ param([object]$Node, [string]$Prefix = '', [int]$MaxDepth = 60)
+ $cols = @{}
+ function Recurse([object]$n, [string]$p, [int]$d) {
+ if ($d -gt $MaxDepth) { return }
+ if ($null -eq $n) { if ($p) { $cols[$p.TrimEnd('.')] = $null }; return }
+ if (Test-ScalarValue $n) { if ($p) { $cols[$p.TrimEnd('.')] = $n }; return }
+ if ($n -is [System.Collections.IEnumerable] -and -not ($n -is [string]) -and -not ($n -is [System.Collections.IDictionary])) {
+ # Smart array handling: single-element arrays recurse without index, multi-element become JSON
+ $arr = @($n)
+ if ($arr.Count -eq 1) {
+ Recurse -n $arr[0] -p $p -d ($d + 1)
+ } elseif ($arr.Count -gt 1) {
+ if ($p) { try { $cols[$p.TrimEnd('.')] = ($n | ConvertTo-Json -Depth 10 -Compress -ErrorAction SilentlyContinue) } catch { $cols[$p.TrimEnd('.')] = '' } }
+ } else {
+ if ($p) { $cols[$p.TrimEnd('.')] = '' }
+ }
+ return
+ }
+ $props = $null; try { $props = $n.PSObject.Properties } catch {}
+ if ($props) { foreach ($prop in $props) { $name = [string]$prop.Name; $child = $prop.Value; $cp = if ($p) { $p + $name + '.' } else { $name + '.' }; Recurse -n $child -p $cp -d ($d + 1) } }
+ }
+ Recurse -n $Node -p $Prefix -d 0
+ return $cols
+}
+
+function Profile-AuditData { param([object]$AuditData) } # No-op stub for threads
+'@
+
+ # Extract Convert-ToPurviewExplodedRecords and Convert-ToStructuredRecord function definitions
+ $funcConvertExploded = (Get-Command Convert-ToPurviewExplodedRecords -ErrorAction SilentlyContinue).Definition
+ $funcConvertStructured = (Get-Command Convert-ToStructuredRecord -ErrorAction SilentlyContinue).Definition
+
+ # Build complete initialization script with all functions
+ $fullInitScript = $initScriptText + "`n`n" + `
+ "function Convert-ToPurviewExplodedRecords {`n$funcConvertExploded`n}`n`n" + `
+ "function Convert-ToStructuredRecord {`n$funcConvertStructured`n}"
+
+ $initScriptBlock = [scriptblock]::Create($fullInitScript)
+
+ # Capture additional variables needed in threads
+ $threadVars = @{
+ PurviewExplodedHeader = $PurviewExplodedHeader
+ FlatDepthStandard = $FlatDepthStandard
+ FlatDepthDeep = $FlatDepthDeep
+ JsonDepth = $JsonDepth
+ ExplosionPerRecordRowCap = $ExplosionPerRecordRowCap
+ }
+
+ # Helper to start a new job
+ $startNextJob = {
+ if ($chunkQueue.Count -gt 0) {
+ $chunk = $chunkQueue.Dequeue()
+ $chunksStarted++
+ $job = Start-ThreadJob -Name "PAX_Chunk_$chunksStarted" -InitializationScript $initScriptBlock -ScriptBlock {
+ param($Records, $Params, $Vars)
+
+ # Initialize thread-local script-scoped variables and helpers
+ $script:metrics = @{ FilteringSkippedRecords = 0; FilteringMissingAuditData = 0; FilteringPromptFiltered = 0; FilteringParseFailures = 0; ExplosionEvents = 0; ExplosionRowsFromEvents = 0; ExplosionMaxPerRecord = 0; ExplosionTruncated = $false }
+ $script:DeepExtraColumns = New-Object System.Collections.Generic.List[string]
+ $script:RegexTrueFalse = [regex]::new('^(?i:true|false)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+ $script:RegexYes1 = [regex]::new('^(?i:yes|1)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+ $script:RegexNo0 = [regex]::new('^(?i:no|0)$', [System.Text.RegularExpressions.RegexOptions]::Compiled)
+ $PurviewExplodedHeader = $Vars.PurviewExplodedHeader
+ $FlatDepthStandard = $Vars.FlatDepthStandard
+ $FlatDepthDeep = $Vars.FlatDepthDeep
+ $JsonDepth = $Vars.JsonDepth
+ $ExplosionPerRecordRowCap = $Vars.ExplosionPerRecordRowCap
+
+ # Thread-local script-scoped helper functions
+ # Parse-DateSafe: Culture-invariant date parsing for Purview API dates
+ function script:Parse-DateSafe {
+ param([string]$dateStr)
+ if ([string]::IsNullOrWhiteSpace($dateStr)) { return $null }
+ $dateStr = $dateStr.Trim()
+ # Try ISO 8601 formats first (most common from Purview)
+ $isoFormats = @(
+ 'yyyy-MM-ddTHH:mm:ss.fffffffZ', 'yyyy-MM-ddTHH:mm:ss.ffffffZ', 'yyyy-MM-ddTHH:mm:ss.fffffZ',
+ 'yyyy-MM-ddTHH:mm:ss.ffffZ', 'yyyy-MM-ddTHH:mm:ss.fffZ', 'yyyy-MM-ddTHH:mm:ss.ffZ',
+ 'yyyy-MM-ddTHH:mm:ss.fZ', 'yyyy-MM-ddTHH:mm:ssZ', 'yyyy-MM-ddTHH:mm:ss',
+ 'yyyy-MM-dd HH:mm:ss', 'yyyy-MM-dd'
+ )
+ foreach ($fmt in $isoFormats) {
+ try { return [datetime]::ParseExact($dateStr, $fmt, [System.Globalization.CultureInfo]::InvariantCulture, [System.Globalization.DateTimeStyles]::AdjustToUniversal) } catch {}
+ }
+ # US date formats (MM/dd/yyyy as Purview returns)
+ $usFormats = @('M/d/yyyy h:mm:ss tt', 'M/d/yyyy HH:mm:ss', 'M/d/yyyy H:mm:ss', 'M/d/yyyy')
+ foreach ($fmt in $usFormats) {
+ try { return [datetime]::ParseExact($dateStr, $fmt, [System.Globalization.CultureInfo]::InvariantCulture) } catch {}
+ }
+ # Fallback to InvariantCulture Parse
+ try { return [datetime]::Parse($dateStr, [System.Globalization.CultureInfo]::InvariantCulture) } catch { return $null }
+ }
+ function script:Format-DatePurviewFast($dt) {
+ if (-not $dt) { return '' }
+ if ($dt -is [datetime]) { return $dt.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') }
+ $parsed = script:Parse-DateSafe $dt
+ if ($parsed) { return $parsed.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss.fffZ') }
+ return ''
+ }
+ function script:BoolTFFast($v) {
+ if ($null -eq $v) { return '' }
+ if ($v -is [bool]) { return $v.ToString().ToUpper() }
+ $vStr = [string]$v
+ if ($script:RegexTrueFalse.IsMatch($vStr)) { return $vStr.ToUpper() }
+ if ($script:RegexYes1.IsMatch($vStr)) { return 'TRUE' }
+ if ($script:RegexNo0.IsMatch($vStr)) { return 'FALSE' }
+ return $vStr
+ }
+ function script:ToJsonIfObjectFast($v) {
+ if ($null -eq $v) { return '' }
+ if (Test-ScalarValue $v) { return $v }
+ try { return ($v | ConvertTo-Json -Depth $JsonDepth -Compress) } catch { return [string]$v }
+ }
+ function script:GetArrayFast($parent, [string]$name) {
+ $val = Get-SafeProperty $parent $name
+ if ($null -eq $val) { return @() }
+ if ($val -is [System.Collections.IEnumerable] -and -not ($val -is [string])) { return @($val) }
+ return @($val)
+ }
+
+ $results = [System.Collections.Generic.List[object]]::new()
+ $recordCount = 0
+
+ foreach ($log in $Records) {
+ $recordCount++
+ try {
+ $records = if ($Params.EffectiveExplode) {
+ Convert-ToPurviewExplodedRecords -Record $log -Deep:$Params.ExplodeDeep -PartialExplode:$Params.PartialExplode -PromptFilterValue $Params.PromptFilter -SkipMetrics
+ } else {
+ Convert-ToStructuredRecord -Record $log -EnableExplosion:$false
+ }
+
+ $recordsArr = To-RecordArray $records
+ if ($recordsArr.Count -gt 0) {
+ foreach ($r in $recordsArr) {
+ $results.Add($r)
+ }
+ }
+ } catch {
+ # Skip failed records
+ }
+ }
+
+ @{
+ Records = $results.ToArray()
+ RecordCount = $recordCount
+ }
+ } -ArgumentList @($chunk, $threadParams, $threadVars)
+
+ $activeJobs.Add($job)
+ }
+ }
+
+ # Start initial batch of jobs (up to thread limit)
+ for ($j = 0; $j -lt $parallelThrottleLimit -and $chunkQueue.Count -gt 0; $j++) {
+ & $startNextJob
+ }
+
+ # Process job queue until all complete
+ while ($activeJobs.Count -gt 0 -or $chunkQueue.Count -gt 0) {
+ # Check for completed jobs
+ $justCompleted = @($activeJobs | Where-Object { $_.State -eq 'Completed' -or $_.State -eq 'Failed' })
+
+ foreach ($job in $justCompleted) {
+ $activeJobs.Remove($job) | Out-Null
+ $chunksCompleted++
+
+ if ($job.State -eq 'Completed') {
+ try {
+ $result = Receive-Job -Job $job -ErrorAction Stop
+ if ($result) {
+ $completedResults.Add($result)
+ }
+ } catch {
+ $failedChunks++
+ }
+ } else {
+ $failedChunks++
+ }
+
+ Remove-Job -Job $job -Force -ErrorAction SilentlyContinue
+
+ # Start next job from queue
+ & $startNextJob
+ }
+
+ # Progress update
+ if (((Get-Date) - $lastProgressTime) -gt $progressInterval) {
+ $elapsed = (Get-Date) - $explosionStart
+ $elapsedStr = if ($elapsed.TotalMinutes -ge 1) { "{0}m {1}s" -f [int]$elapsed.TotalMinutes, $elapsed.Seconds } else { "{0}s" -f [int]$elapsed.TotalSeconds }
+ $pct = [int](($chunksCompleted / $totalChunks) * 100)
+ $recordsProcessed = $chunksCompleted * $chunkSize # Approximate
+ $rate = if ($elapsed.TotalSeconds -gt 0) { [int]($recordsProcessed / $elapsed.TotalSeconds) } else { 0 }
+ Write-LogHost ("[{0}] Chunks: {1}/{2} ({3}%) | Active: {4} | Queue: {5} | ~{6:N0} rec/sec | {7}" -f (Get-Date -Format "HH:mm:ss"), $chunksCompleted, $totalChunks, $pct, $activeJobs.Count, $chunkQueue.Count, $rate, $elapsedStr) -ForegroundColor DarkCyan
+ $lastProgressTime = Get-Date
+ }
+
+ if ($activeJobs.Count -gt 0) {
+ Start-Sleep -Milliseconds 100
+ }
+ }
+
+ # Final 100% status line for Phase 1
+ $elapsed = (Get-Date) - $explosionStart
+ $elapsedStr = if ($elapsed.TotalMinutes -ge 1) { "{0}m {1}s" -f [int]$elapsed.TotalMinutes, $elapsed.Seconds } else { "{0}s" -f [int]$elapsed.TotalSeconds }
+ $finalRate = if ($elapsed.TotalSeconds -gt 0) { [int]($totalRecords / $elapsed.TotalSeconds) } else { 0 }
+ Write-LogHost ("[{0}] Chunks: {1}/{1} (100%) | Complete | ~{2:N0} rec/sec | {3}" -f (Get-Date -Format "HH:mm:ss"), $totalChunks, $finalRate, $elapsedStr) -ForegroundColor DarkCyan
+
+ $explosionElapsed = (Get-Date) - $explosionStart
+ $explosionRate = if ($explosionElapsed.TotalSeconds -gt 0) { [int]($totalRecords / $explosionElapsed.TotalSeconds) } else { 0 }
+
+ if ($failedChunks -gt 0) {
+ Write-LogHost "WARNING: $failedChunks chunk(s) failed - some records may be missing" -ForegroundColor Yellow
+ }
+
+ # ─────────────────────────────────────────────────────────────────────────
+ # PHASE 3: Collect all results into single list
+ # ─────────────────────────────────────────────────────────────────────────
+ Write-LogHost "Phase 2: Consolidating results from $($completedResults.Count) chunks..." -ForegroundColor Cyan
+ $allExplodedRecords = [System.Collections.Generic.List[object]]::new()
+ $totalSourceRecords = 0
+
+ foreach ($result in $completedResults) {
+ if ($result.Records) {
+ $totalSourceRecords += $result.RecordCount
+ foreach ($r in $result.Records) {
+ $allExplodedRecords.Add($r)
+ }
+ }
+ }
+ $completedResults.Clear()
+
+ $phaseDesc = if ($ExplodeArrays -or $ExplodeDeep) { "Parallel explosion" } else { "Parallel conversion" }
+ Write-LogHost "$phaseDesc complete: $totalRecords records -> $($allExplodedRecords.Count) rows in $([Math]::Round($explosionElapsed.TotalSeconds, 1))s ($explosionRate rec/sec)" -ForegroundColor Green
+
+ # Update checkpoint with explosion completion
+ if ($script:CheckpointEnabled -and $script:CheckpointData) {
+ $script:CheckpointData.explosion.status = 'Completed'
+ $script:CheckpointData.explosion.recordsProcessed = $totalRecords
+ $script:CheckpointData.explosion.rowsGenerated = $allExplodedRecords.Count
+ $script:CheckpointData.explosion.lastUpdateTime = (Get-Date).ToUniversalTime().ToString('o')
+ Save-CheckpointToDisk
+ }
+
+ # ─────────────────────────────────────────────────────────────────────────
+ # PHASE 4: Apply schema and write CSV (must be serial for proper ordering)
+ # ─────────────────────────────────────────────────────────────────────────
+ Write-LogHost "Phase 3: Discovering schema from all $($allExplodedRecords.Count) rows (full scan)..." -ForegroundColor Cyan
+ $schemaStart = Get-Date
+
+ $structuredDataCount = $allExplodedRecords.Count
+ $script:metrics.TotalStructuredRows = $structuredDataCount
+ $processedRecordCount = $totalRecords
+
+ # Update per-activity Structured counts for Activity Type Breakdown display
+ # (Parallel explosion doesn't update these during processing - count now from consolidated results)
+ $activityStructuredCounts = @{}
+ foreach ($row in $allExplodedRecords) {
+ $opName = if ($row -is [hashtable]) { $row['Operation'] } else { $row.Operation }
+ if ($opName) {
+ if (-not $activityStructuredCounts.ContainsKey($opName)) { $activityStructuredCounts[$opName] = 0 }
+ $activityStructuredCounts[$opName]++
+ }
+ }
+ foreach ($opName in $activityStructuredCounts.Keys) {
+ if ($script:metrics.Activities.ContainsKey($opName)) {
+ $script:metrics.Activities[$opName].Structured = $activityStructuredCounts[$opName]
+ }
+ }
+
+ # Build schema by scanning ALL records (not just first N)
+ # This ensures 100% column discovery - only reads property names, not values (fast)
+ $columnOrder = New-Object System.Collections.Generic.List[string]
+ if ($ExplodeArrays -or $ExplodeDeep -or $ForcedRawInputCsvExplosion) {
+ if ($IncludeM365Usage -and $RAWInputCSV) {
+ foreach ($c in (Get-M365UsageWideHeader -RawCsvPath $RAWInputCSV -BaseHeader $M365UsageBaseHeader)) { [void]$columnOrder.Add($c) }
+ } else {
+ foreach ($c in $PurviewExplodedHeader) { [void]$columnOrder.Add($c) }
+ }
+ if ($ExplodeDeep -and $script:DeepExtraColumns -and $script:DeepExtraColumns.Count -gt 0) {
+ foreach ($c in $script:DeepExtraColumns) { if (-not $columnOrder.Contains($c)) { [void]$columnOrder.Add($c) } }
+ }
+ }
+ # Full scan: iterate ALL exploded records to discover every column
+ # This is O(n) but only touches property names, not values - typically <2s for 100K rows
+ foreach ($sr in $allExplodedRecords) {
+ foreach ($pn in $sr.PSObject.Properties.Name) {
+ if (-not $columnOrder.Contains($pn)) { [void]$columnOrder.Add($pn) }
+ }
+ }
+ $schemaElapsed = (Get-Date) - $schemaStart
+ Write-LogHost "Schema discovered: $($columnOrder.Count) columns from $($allExplodedRecords.Count) rows in $([Math]::Round($schemaElapsed.TotalSeconds, 1))s (full scan - 100% column coverage)" -ForegroundColor Cyan
+ $schemaSampleRows.Clear()
+ $schemaFrozen = $true
+
+ Write-LogHost "Phase 4: Writing CSV..." -ForegroundColor Cyan
+ $writeStart = Get-Date
+
+ # Calculate effective chunk size for CSV writing
+ # OPTIMIZATION: Increased base chunk sizes since Write-CsvRows now uses column index lookup (O(1) vs O(n))
+ $colCount = $columnOrder.Count
+ $effectiveChunkSize = $StreamingChunkSize
+ if ($colCount -gt 1000) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 3000) }
+ elseif ($colCount -gt 500) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 5000) }
+ elseif ($colCount -gt 250) { $effectiveChunkSize = [int][Math]::Min($effectiveChunkSize, 8000) }
+ elseif ($colCount -gt 100) { $effectiveChunkSize = [int][Math]::Max($StreamingChunkSize, 10000) } # New tier for 100-250 columns
+ else { $effectiveChunkSize = [int][Math]::Min(20000, [Math]::Max($StreamingChunkSize * 4, 15000)) } # Boost for ≤100 columns
+ $script:metrics.EffectiveChunkSize = $effectiveChunkSize
+
+ # Open CSV writer and write all records in chunks
+ Open-CsvWriter -Path $exportTemp -Columns $columnOrder
+ $csvWriter = $true
+
+ $writeChunkSize = $effectiveChunkSize
+ $totalWritten = 0
+ $lastWriteProgressTime = Get-Date
+ $writeProgressInterval = [TimeSpan]::FromSeconds(60)
+
+ for ($i = 0; $i -lt $allExplodedRecords.Count; $i += $writeChunkSize) {
+ $endIdx = [Math]::Min($i + $writeChunkSize - 1, $allExplodedRecords.Count - 1)
+ $chunk = $allExplodedRecords[$i..$endIdx]
+
+ # Rows from parallel explosion are already hashtables - pass directly to CSV writer
+ # Removed: Select-Object -Property $columnOrder (extremely slow with 100+ columns)
+ if ($chunk.Count -gt 0) {
+ Write-CsvRows -Rows $chunk -Columns $columnOrder
+ $totalWritten += $chunk.Count
+ }
+
+ # Progress every 60 seconds
+ if (((Get-Date) - $lastWriteProgressTime) -gt $writeProgressInterval) {
+ $writeElapsedSoFar = (Get-Date) - $writeStart
+ $elapsedStr = if ($writeElapsedSoFar.TotalMinutes -ge 1) { "{0}m {1}s" -f [int]$writeElapsedSoFar.TotalMinutes, $writeElapsedSoFar.Seconds } else { "{0}s" -f [int]$writeElapsedSoFar.TotalSeconds }
+ $pct = [int](($totalWritten / $allExplodedRecords.Count) * 100)
+ $rowRate = if ($writeElapsedSoFar.TotalSeconds -gt 0) { [int]($totalWritten / $writeElapsedSoFar.TotalSeconds) } else { 0 }
+ Write-LogHost ("[{0}] CSV Write: {1:N0}/{2:N0} rows ({3}%) | {4} cols | ~{5:N0} rows/sec | {6}" -f (Get-Date -Format "HH:mm:ss"), $totalWritten, $allExplodedRecords.Count, $pct, $columnOrder.Count, $rowRate, $elapsedStr) -ForegroundColor DarkCyan
+ $lastWriteProgressTime = Get-Date
+ }
+ }
+
+ # Final Phase 3 status line
+ $writeElapsed = (Get-Date) - $writeStart
+ $elapsedStr = if ($writeElapsed.TotalMinutes -ge 1) { "{0}m {1}s" -f [int]$writeElapsed.TotalMinutes, $writeElapsed.Seconds } else { "{0}s" -f [int]$writeElapsed.TotalSeconds }
+ $finalRowRate = if ($writeElapsed.TotalSeconds -gt 0) { [int]($totalWritten / $writeElapsed.TotalSeconds) } else { 0 }
+ Write-LogHost ("[{0}] CSV Write: {1:N0}/{1:N0} rows (100%) | {2} cols | ~{3:N0} rows/sec | {4}" -f (Get-Date -Format "HH:mm:ss"), $totalWritten, $columnOrder.Count, $finalRowRate, $elapsedStr) -ForegroundColor Cyan
+ Write-LogHost "CSV write complete: $totalWritten rows in $([Math]::Round($writeElapsed.TotalSeconds, 1))s" -ForegroundColor Green
+
+ # Track activity metrics (approximate - we don't have per-record operation info in parallel mode)
+ # This is a known limitation - parallel mode won't have detailed per-operation breakdown
+
+ # Clean up to free memory
+ $allExplodedRecords.Clear()
+ $allExplodedRecords = $null
+ [System.GC]::Collect()
+
+ $parallelElapsed = (Get-Date) - $parallelStartTime
+ $parallelRate = if ($parallelElapsed.TotalSeconds -gt 0) { [int]($totalRecords / $parallelElapsed.TotalSeconds) } else { 0 }
+ $completeDesc = if ($ExplodeArrays -or $ExplodeDeep) { "PARALLEL EXPLOSION" } else { "PARALLEL PROCESSING" }
+ Write-LogHost "$completeDesc COMPLETE: $totalRecords records -> $structuredDataCount rows in $([Math]::Round($parallelElapsed.TotalSeconds, 1))s total ($parallelRate rec/sec)" -ForegroundColor Green
+ }
+ # END PARALLEL BLOCK
+
+ # Flush any remaining unfrozen schema samples (small datasets)
+ if (-not $schemaFrozen -and $schemaSampleRows.Count -gt 0) {
+ if ($ExplodeArrays -or $ExplodeDeep -or $ForcedRawInputCsvExplosion) {
+ $columnOrder = New-Object System.Collections.Generic.List[string];
+ if ($IncludeM365Usage -and $RAWInputCSV) {
+ foreach ($c in (Get-M365UsageWideHeader -RawCsvPath $RAWInputCSV -BaseHeader $M365UsageBaseHeader)) { [void]$columnOrder.Add($c) }
+ } else {
+ foreach ($c in $PurviewExplodedHeader) { [void]$columnOrder.Add($c) }
+ }
+ if ($ExplodeDeep -and $script:DeepExtraColumns -and $script:DeepExtraColumns.Count -gt 0) { foreach ($c in $script:DeepExtraColumns) { if (-not $columnOrder.Contains($c)) { [void]$columnOrder.Add($c) } } }
+ } else {
+ $columnOrder = New-Object System.Collections.Generic.List[string]; foreach ($sr in $schemaSampleRows) { foreach ($pn in $sr.PSObject.Properties.Name) { if (-not $columnOrder.Contains($pn)) { [void]$columnOrder.Add($pn) } } }
+ }
+ foreach ($sr in $schemaSampleRows) { foreach ($pn in $sr.PSObject.Properties.Name) { if (-not $columnOrder.Contains($pn)) { [void]$columnOrder.Add($pn) } } }
+ Write-LogHost "Schema finalized with $($columnOrder.Count) columns from $($schemaSampleRows.Count) total rows (small dataset)" -ForegroundColor DarkCyan
+ if (-not $csvWriter) { Open-CsvWriter -Path $exportTemp -Columns $columnOrder; $csvWriter = $true }
+ $emitRows = @(); foreach ($sr in $schemaSampleRows) { $emitRows += ($sr | Select-Object -Property $columnOrder) }; if ($emitRows.Count -gt 0) { Write-CsvRows -Rows $emitRows -Columns $columnOrder }
+ $schemaSampleRows.Clear(); $schemaFrozen = $true
+ }
+ # Flush any remaining buffered rows after schema freeze
+ if ($schemaFrozen -and $buffer.Count -gt 0) {
+ $emitSet = $buffer | ForEach-Object { $_ | Select-Object -Property $columnOrder }
+ if (-not $csvWriter) { Open-CsvWriter -Path $exportTemp -Columns $columnOrder; $csvWriter = $true }
+ if ($emitSet.Count -gt 0) { Write-CsvRows -Rows $emitSet -Columns $columnOrder }
+ $buffer.Clear()
+ }
+ # Cleanup: ensure writer closed before export finalization.
+ if ($csvWriter) { try { Close-CsvWriter } catch {} }
+
+ # Replay fallback: if no structured rows but we have raw logs, emit compact rows (non-exploded)
+ if ($structuredDataCount -eq 0 -and $allLogs.Count -gt 0 -and $RAWInputCSV) {
+ Write-LogHost "Replay fallback: emitting compact non-exploded rows" -ForegroundColor Yellow
+ try {
+ $columnOrder = @('RecordId','CreationDate','RecordType','Operation','UserId','AuditData','AssociatedAdminUnits','AssociatedAdminUnitsNames')
+ if (-not $csvWriter) { Open-CsvWriter -Path $exportTemp -Columns $columnOrder; $csvWriter = $true }
+ $fallbackCount = 0
+ foreach ($log in $allLogs) {
+ try {
+ $rows = Convert-ToStructuredRecord -Record $log -EnableExplosion:$false
+ $rowsArr = To-RecordArray $rows
+ if ($rowsArr.Count -gt 0) {
+ $fallbackCount += $rowsArr.Count
+ Write-CsvRows -Rows ($rowsArr | ForEach-Object { $_ | Select-Object -Property $columnOrder }) -Columns $columnOrder
+ }
+ } catch {}
+ }
+ $structuredDataCount = $fallbackCount
+ try { $script:metrics.TotalStructuredRows = $fallbackCount } catch {}
+ Write-LogHost ("Replay fallback emitted: {0} rows" -f $fallbackCount) -ForegroundColor Yellow
+ } catch { Write-LogHost "Replay fallback failed: $($_.Exception.Message)" -ForegroundColor Red }
+ }
+ # Fallback: ensure temp file exists so Move-Item does not fail (very small datasets may not have flushed rows yet)
+ # Skip this fallback if fast-path already moved the temp file to final output
+ if (-not $skipToPostProcessing -and -not (Test-Path $exportTemp)) {
+ try {
+ $enc = New-Object System.Text.UTF8Encoding($false)
+ $sw = [System.IO.StreamWriter]::new($exportTemp, $false, $enc)
+ if ($columnOrder) {
+ $escapedCols = New-Object System.Collections.Generic.List[string]
+ foreach ($col in $columnOrder) {
+ $c = [string]$col; $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' ')
+ $escaped = $c -replace '"','""'
+ if ($needsQuote) { $escaped = '"' + $escaped + '"' }
+ $escapedCols.Add($escaped) | Out-Null
+ }
+ $sw.WriteLine(($escapedCols -join ','))
+ } else {
+ $sw.WriteLine('RecordId')
+ }
+ $sw.Flush(); $sw.Dispose()
+ } catch { Write-LogHost "WARNING: Fallback temp file creation failed: $($_.Exception.Message)" -ForegroundColor Yellow }
+ }
+
+ # Final explosion progress update (100% completion) - SERIAL MODE ONLY
+ # Parallel mode has its own completion summary, skip this to avoid duplicate/confusing output
+ # Also skip when fast path or streaming merge already handled export (skipToPostProcessing=true)
+ if (-not $useParallelExplosion -and -not $skipToPostProcessing) {
+ if ($allLogs.Count -ge $explosionProgressInterval) {
+ $elapsed = (Get-Date) - $te0
+ $rate = [int]($processedRecordCount / $elapsed.TotalSeconds)
+ $elapsedStr = if ($elapsed.TotalMinutes -ge 1) {
+ "{0}m {1}s" -f [int]$elapsed.TotalMinutes, $elapsed.Seconds
+ } else {
+ "{0}s" -f [int]$elapsed.TotalSeconds
+ }
+ Write-LogHost ("[{0}] Processing: {1:N0} / {2:N0} records (100%) | Elapsed: {3} | Rate: {4:N0} rec/sec" -f (Get-Date -Format "HH:mm:ss"), $processedRecordCount, $allLogs.Count, $elapsedStr, $rate) -ForegroundColor DarkCyan
+ }
+
+ $te1 = Get-Date; try { $script:metrics.ExplosionMs += [int]($te1 - $te0).TotalMilliseconds } catch {}
+ Write-LogHost "Standard processing (streamed) complete: $($allLogs.Count) input -> $structuredDataCount output" -ForegroundColor Cyan
+ }
+
+ # Explain record count changes (filtering vs explosion)
+ if ($structuredDataCount -lt $allLogs.Count) {
+ $recordsFiltered = $allLogs.Count - $structuredDataCount
+ Write-LogHost ""
+ Write-LogHost " ℹ Record count decreased: $recordsFiltered parent record(s) were filtered out" -ForegroundColor Yellow
+ Write-LogHost " Note: Filtering happens DURING explosion (before array expansion)" -ForegroundColor DarkYellow
+
+ # List active filters that could cause record exclusion
+ $activeFilters = @()
+ if ($PromptFilter) {
+ $activeFilters += "PromptFilter ($PromptFilter mode)"
+ Write-LogHost " • PromptFilter: Records with no matching messages were excluded" -ForegroundColor DarkYellow
+ }
+ if ($AgentId) {
+ $activeFilters += "AgentId filter"
+ Write-LogHost " • AgentId: Records not matching specified AgentId(s) were excluded" -ForegroundColor DarkYellow
+ }
+ if ($AgentsOnly) {
+ $activeFilters += "AgentsOnly filter"
+ Write-LogHost " • AgentsOnly: Records without any AgentId were excluded" -ForegroundColor DarkYellow
+ }
+ if ($ExcludeAgents) {
+ $activeFilters += "ExcludeAgents filter"
+ Write-LogHost " • ExcludeAgents: Records with AgentId present were excluded" -ForegroundColor DarkYellow
+ }
+ if ($UserIds) {
+ $activeFilters += "UserIds filter"
+ Write-LogHost " • UserIds: Records not matching specified user(s) were excluded" -ForegroundColor DarkYellow
+ }
+ if ($GroupNames) {
+ $activeFilters += "GroupNames filter"
+ Write-LogHost " • GroupNames: Records not matching group members were excluded" -ForegroundColor DarkYellow
+ }
+
+ if ($activeFilters.Count -eq 0) {
+ Write-LogHost " Reason: Unknown (no explicit filters active, possible internal filtering)" -ForegroundColor DarkYellow
+ }
+ }
+ # Use stored count for streaming merge (allLogs was cleared), otherwise use allLogs.Count
+ $inputRecordCount = if ($script:OriginalInputRecordCount) { $script:OriginalInputRecordCount } else { $allLogs.Count }
+
+ if ($structuredDataCount -eq $inputRecordCount -and ($ExplodeArrays -or $ExplodeDeep)) {
+ Write-LogHost ""
+ Write-LogHost " ℹ No explosion occurred (1:1 ratio)" -ForegroundColor Yellow
+ if ($PromptFilter) {
+ Write-LogHost " Reason: PromptFilter limits explosion to matching messages only" -ForegroundColor DarkYellow
+ Write-LogHost " Each record had exactly 1 matching message, producing 1 row per record" -ForegroundColor DarkYellow
+ Write-LogHost " Tip: Without PromptFilter, these records would explode to multiple rows" -ForegroundColor Cyan
+ } else {
+ Write-LogHost " Possible reasons: Records have no arrays to explode (Messages, Contexts, etc.)" -ForegroundColor DarkYellow
+ }
+ }
+ elseif ($inputRecordCount -gt 0 -and $structuredDataCount -gt $inputRecordCount) {
+ $explosionRatio = [Math]::Round($structuredDataCount / $inputRecordCount, 1)
+ Write-LogHost ""
+ Write-LogHost " Array explosion successful: ${explosionRatio}x expansion ($inputRecordCount records → $structuredDataCount rows)" -ForegroundColor Green
+ }
+
+ if ($postFreezeNewColumns -gt 0) { Write-LogHost "NOTICE: $postFreezeNewColumns row(s) contained new columns after schema freeze (ignored). This only affects serial mode - increase -StreamingSchemaSample or use parallel mode (PS7+) for full coverage." -ForegroundColor DarkYellow }
+ Set-ProgressPhase -Phase 'Export' -Status 'Finalizing streaming CSV'
+
+ # Handle AppendFile mode vs normal mode (skip if fast-path already handled export)
+ if (-not $skipToPostProcessing) {
+ if ($AppendFile) {
+ # AppendFile mode: Always create temporary CSV with new data first
+ $tempCsvPath = Join-Path $OutputPath "Temp_NewData_$global:ScriptRunTimestamp.csv"
+ $tx0 = Get-Date; Move-Item -Force -Path $exportTemp -Destination $tempCsvPath; $tx1 = Get-Date
+ try { $script:metrics.ExportMs += [int]($tx1 - $tx0).TotalMilliseconds } catch {}
+
+ if ($ExportWorkbook) {
+ # Excel AppendFile: Keep temp CSV for later reading
+ # Store temp CSV path for Excel conversion to read from
+ $script:AppendFileTempCsv = $tempCsvPath
+ # $OutputFile stays as the user's target Excel file (set earlier at line 4355)
+ Write-LogHost "Created temporary CSV for Excel append: $tempCsvPath" -ForegroundColor Gray
+ } else {
+ # CSV AppendFile: Append new data to existing CSV
+ Write-LogHost "Appending new data to existing CSV: $OutputFile" -ForegroundColor Cyan
+ try {
+ # Read new data (without header)
+ $newLines = Get-Content -Path $tempCsvPath -ErrorAction Stop | Select-Object -Skip 1
+
+ # Append to existing file
+ Add-Content -Path $OutputFile -Value $newLines -Encoding UTF8 -ErrorAction Stop
+
+ Write-LogHost " Appended $($newLines.Count) new record(s) to existing CSV" -ForegroundColor Green
+
+ # Clean up temporary file
+ Remove-Item -Path $tempCsvPath -Force -ErrorAction SilentlyContinue
+ }
+ catch {
+ Write-Host "ERROR: Failed to append CSV data: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host " Existing file: $OutputFile" -ForegroundColor Yellow
+ Write-Host " New data (temp): $tempCsvPath" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Possible causes:" -ForegroundColor Yellow
+ Write-Host " • File is open in Excel or another program" -ForegroundColor Gray
+ Write-Host " • Insufficient permissions" -ForegroundColor Gray
+ Write-Host " • Column mismatch between files" -ForegroundColor Gray
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "The new data is preserved in: $tempCsvPath" -ForegroundColor Cyan
+ exit 1
+ }
+ }
+ } else {
+ # Normal mode: Rename temp to final output (use CsvOutputFile for intermediate CSV when ExportWorkbook)
+ $tx0 = Get-Date; Move-Item -Force -Path $exportTemp -Destination $script:CsvOutputFile; $tx1 = Get-Date
+ try { $script:metrics.ExportMs += [int]($tx1 - $tx0).TotalMilliseconds } catch {}
+ }
+ } # End of skipToPostProcessing guard
+
+ $script:progressState.Export.Total = 1; $script:progressState.Export.Current = 1; Update-Progress -Status 'Export complete (stream)'; Set-ProgressPhase -Phase 'Complete' -Status 'Done'; Complete-Progress
+
+ # --- Dynamic Downgrade: If combined CSV contains data for only one activity type, rename to single-activity convention ---
+ # Note: Disabled when AppendFile is used to avoid renaming user-specified files
+ # Note: Skip for fast-path (non-explosion) runs - Import-Csv on large files is too slow
+ if (-not $skipToPostProcessing -and -not $ExportWorkbook -and $csvCombineMode -and (Test-Path $OutputFile) -and -not $AppendFile) {
+ try {
+ # Read all unique Operation values from the CSV
+ $allOperations = Import-Csv -Path $OutputFile -ErrorAction Stop |
+ Where-Object { -not [string]::IsNullOrWhiteSpace($_.Operation) } |
+ Select-Object -ExpandProperty Operation -Unique
+
+ $distinctOps = @($allOperations)
+ if ($distinctOps.Count -eq 1) {
+ $onlyType = [string]$distinctOps[0] # Explicit cast to string to avoid array slicing
+ if (-not [string]::IsNullOrWhiteSpace($onlyType)) {
+ $safeType = $onlyType -replace '[\/:*?"<>|]', '_'
+ $singleName = "Purview_Audit_${safeType}_${global:ScriptRunTimestamp}.csv"
+ $targetPath = Join-Path $OutputPath $singleName
+ if ($OutputFile -ne $targetPath) {
+ Write-LogHost "Detected single-activity result in combined mode: '$onlyType' → Renaming output file to $singleName" -ForegroundColor Yellow
+ Move-Item -Force -Path $OutputFile -Destination $targetPath
+ $OutputFile = $targetPath
+ # Update parameter snapshot if present
+ if ($paramSnapshot -and $paramSnapshot.Contains('OutputFile')) { $paramSnapshot['OutputFile'] = $OutputFile }
+ }
+ }
+ }
+ } catch {
+ Write-LogHost "WARNING: Single-activity downgrade check failed: $($_.Exception.Message)" -ForegroundColor DarkYellow
+ }
+ }
+ } # End else (live audit log query mode)
+ } # End if (-not $OnlyUserInfo) - Skip all audit log queries when only exporting user data
+
+ if ($OnlyUserInfo) {
+ # -OnlyUserInfo mode: Initialize empty logs collection
+ $allLogs = New-Object System.Collections.ArrayList
+
+ # Handle OnlyUserInfo with ExportWorkbook - create Excel with just EntraUsers tab
+ if ($ExportWorkbook -and $script:EntraUsersData) {
+ $entraExcelFile = Join-Path $OutputPath "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.xlsx"
+ try {
+ $entraTab = 'EntraUsers_MAClicensing'
+ Write-LogHost "Creating Excel workbook with $entraTab tab ($($script:EntraUsersData.Count) rows)..." -ForegroundColor Cyan
+ $dataTable = $script:EntraUsersData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $dataTable -Path $entraExcelFile -WorkSheetName $entraTab -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ Write-LogHost "EntraUsers Excel workbook created: $entraExcelFile" -ForegroundColor Green
+ } catch {
+ Write-LogHost "WARNING: Failed to export EntraUsers Excel: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # Handle OnlyUserInfo CSV export (non-workbook mode)
+ if (-not $ExportWorkbook -and $script:EntraUsersData) {
+ $entraFile = Join-Path $OutputPath "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"
+ try {
+ Write-LogHost "Exporting EntraUsers data to CSV ($($script:EntraUsersData.Count) rows)..." -ForegroundColor Cyan
+ $script:EntraUsersData | Export-Csv -Path $entraFile -NoTypeInformation -Encoding UTF8 -ErrorAction Stop
+ Write-LogHost "EntraUsers CSV created: $entraFile" -ForegroundColor Green
+ } catch {
+ Write-LogHost "WARNING: Failed to export EntraUsers CSV: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+ }
+ $csvSeparateMode = (-not $ExportWorkbook -and -not $csvCombineMode -and -not $AppendFile)
+
+ # --- EntraUsers CSV export (for IncludeUserInfo mode with combined/append CSV) ---
+ # Note: OnlyUserInfo mode is handled in the dedicated block above
+ # Export if: not workbook mode AND IncludeUserInfo enabled (not OnlyUserInfo) AND data exists
+ if (-not $ExportWorkbook -and -not $OnlyUserInfo -and $IncludeUserInfo -and $script:EntraUsersData) {
+ $entraFile = Join-Path $OutputPath "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"
+ try {
+ if ($script:EntraUsersData.Count -gt 0) {
+ $script:EntraUsersData | Export-Csv -Path $entraFile -NoTypeInformation -Encoding UTF8 -ErrorAction Stop
+ } else {
+ # Header-only export
+ $header = ($script:EntraUsersData | Select-Object -First 1 | Get-Member -MemberType NoteProperty | Select-Object -ExpandProperty Name)
+ if ($header) { ($null | Select-Object $header) | Export-Csv -Path $entraFile -NoTypeInformation -Encoding UTF8 }
+ }
+ Write-LogHost "EntraUsers CSV created: $entraFile" -ForegroundColor Green
+ } catch { Write-LogHost "WARNING: Failed to export EntraUsers CSV: $($_.Exception.Message)" -ForegroundColor Yellow }
+ }
+
+ if ($csvSeparateMode -and -not $OnlyUserInfo -and (Test-Path $OutputFile)) {
+ Write-LogHost ""
+ Write-LogHost "=== Splitting CSV by Activity Type ===" -ForegroundColor Cyan
+ Write-LogHost "Reading combined CSV: $OutputFile" -ForegroundColor Gray
+
+ try {
+ # Read combined CSV
+ $allRecords = Import-Csv -Path $OutputFile -ErrorAction Stop
+ Write-LogHost "Loaded $($allRecords.Count) records from combined CSV" -ForegroundColor Gray
+
+ # Group by Operation field
+ $groupedRecords = $allRecords | Group-Object -Property Operation
+ Write-LogHost "Found $($groupedRecords.Count) activity types" -ForegroundColor Gray
+
+ # Write separate CSV files
+ $outputDir = Split-Path $OutputFile -Parent
+ $createdFiles = @()
+
+ foreach ($group in $groupedRecords) {
+ $activityType = $group.Name
+ if ([string]::IsNullOrWhiteSpace($activityType)) { $activityType = "Unknown" }
+
+ # Sanitize filename (remove invalid characters)
+ $safeActivityName = $activityType -replace '[\\/:*?"<>|]', '_'
+ # Base name already contains full prefix+timestamp, just prepend activity type
+ $fileName = "Purview_Audit_${safeActivityName}_${global:ScriptRunTimestamp}.csv"
+ $filePath = Join-Path $outputDir $fileName # Export to separate CSV
+ $group.Group | Export-Csv -Path $filePath -NoTypeInformation -Encoding UTF8 -ErrorAction Stop
+ $createdFiles += $filePath
+
+ Write-LogHost " • $activityType → $fileName ($($group.Count) records)" -ForegroundColor DarkCyan
+ }
+
+ # Delete combined CSV file
+ Remove-Item -Path $OutputFile -Force -ErrorAction SilentlyContinue
+ Write-LogHost "Removed combined CSV (replaced with $($createdFiles.Count) separate files)" -ForegroundColor Gray
+
+ # Export EntraUsers CSV in separated mode
+ if ($IncludeUserInfo -and $script:EntraUsersData) {
+ $entraFile = Join-Path $outputDir "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv"
+ try {
+ $script:EntraUsersData | Export-Csv -Path $entraFile -NoTypeInformation -Encoding UTF8 -ErrorAction Stop
+ Write-LogHost " • EntraUsers → $(Split-Path -Leaf $entraFile) ($($script:EntraUsersData.Count) rows)" -ForegroundColor DarkCyan
+ $createdFiles += $entraFile
+ } catch { Write-LogHost "WARNING: Failed to export EntraUsers CSV: $($_.Exception.Message)" -ForegroundColor Yellow }
+ }
+
+ # Update OutputFile to point to directory for summary message
+ $script:CsvSplitFiles = $createdFiles
+ Write-LogHost "CSV splitting complete: $($createdFiles.Count) files created" -ForegroundColor Green
+
+ } catch {
+ Write-LogHost "WARNING: CSV splitting failed: $($_.Exception.Message)" -ForegroundColor Yellow
+ Write-LogHost "Combined CSV retained at: $OutputFile" -ForegroundColor Yellow
+ }
+ }
+
+ # --- Excel Post-Processing Conversion (Option A) ---
+ if ($ExportWorkbook -and $script:CsvOutputFile -and (Test-Path $script:CsvOutputFile)) {
+ Write-LogHost ""
+ Write-LogHost "=== Converting CSV to Excel ===" -ForegroundColor Cyan
+
+ # Excel filename already determined at script start in $OutputFile
+ $excelFilePath = $OutputFile
+
+ # Handle AppendFile mode vs normal mode
+ # Note: If AppendFile mode, temp CSV was already created during CSV export (stored in $script:AppendFileTempCsv)
+ if ($AppendFile) {
+ # AppendFile mode: Use user-specified Excel file and temp CSV
+ $excelFilePath = $OutputFile # User's target Excel file (set at line 4355)
+ $csvFilePath = $script:AppendFileTempCsv # Temp CSV with new data (set at line 7557)
+
+ Write-LogHost "AppendFile mode: Appending to existing workbook: $excelFilePath" -ForegroundColor Cyan
+ Write-LogHost " Reading new data from: $csvFilePath" -ForegroundColor Gray
+
+ # Pre-flight: Test file accessibility before trying Excel operations
+ try {
+ $fileStream = [System.IO.File]::Open($excelFilePath, [System.IO.FileMode]::Open, [System.IO.FileAccess]::Read, [System.IO.FileShare]::ReadWrite)
+ $fileStream.Close()
+ $fileStream.Dispose()
+ }
+ catch {
+ Write-Host "ERROR: Cannot access file for reading: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host " File: $excelFilePath" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Common causes:" -ForegroundColor Yellow
+ Write-Host " • File is currently open in Excel with exclusive lock (close it and try again)" -ForegroundColor Gray
+ Write-Host " • File is in a OneDrive/SharePoint folder with sync issues (check file status)" -ForegroundColor Gray
+ Write-Host " • Insufficient permissions to read the file" -ForegroundColor Gray
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Troubleshooting steps:" -ForegroundColor Cyan
+ Write-Host " 1. Close Excel if the file is open" -ForegroundColor Gray
+ Write-Host " 2. Copy file to a local folder (C:\temp) and retry" -ForegroundColor Gray
+ Write-Host " 3. Verify file permissions and OneDrive sync status" -ForegroundColor Gray
+ exit 1
+ }
+
+ # Read existing sheets inline for validation
+ try {
+ # Ensure ImportExcel module is loaded
+ if (-not (Get-Module -Name ImportExcel)) {
+ Write-Host "WARNING: ImportExcel module not loaded, attempting to load..." -ForegroundColor Yellow
+ Import-Module ImportExcel -ErrorAction Stop
+ }
+
+ $existingSheets = Get-ExcelSheetInfo -Path $excelFilePath | Select-Object -ExpandProperty Name
+ $script:ExistingExcelSheets = $existingSheets
+ Write-LogHost " Existing sheets: $($existingSheets -join ', ')" -ForegroundColor DarkGray
+ }
+ catch {
+ # First, validate it's actually an Excel file (ZIP container with xl/workbook.xml)
+ Write-Host "ERROR: Cannot read Excel workbook structure: $($_.Exception.Message)" -ForegroundColor Red
+ Write-Host " File: $excelFilePath" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+
+ # Try to determine the actual issue
+ $isValidZip = $false
+ $hasWorkbookXml = $false
+ try {
+ Add-Type -AssemblyName System.IO.Compression.FileSystem -ErrorAction SilentlyContinue
+ $zip = [System.IO.Compression.ZipFile]::OpenRead($excelFilePath)
+ $isValidZip = $true
+ $hasWorkbookXml = $zip.Entries | Where-Object { $_.FullName -eq 'xl/workbook.xml' }
+ $zip.Dispose()
+ }
+ catch {
+ # Not a valid ZIP
+ }
+
+ if (-not $isValidZip) {
+ Write-Host "Root cause: File is not a valid ZIP archive (Excel files are ZIP containers)" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Possible causes:" -ForegroundColor Yellow
+ Write-Host " • File was renamed from .csv to .xlsx (not converted)" -ForegroundColor Gray
+ Write-Host " • File download was interrupted or corrupted" -ForegroundColor Gray
+ Write-Host " • File created by incompatible tool" -ForegroundColor Gray
+ }
+ elseif (-not $hasWorkbookXml) {
+ Write-Host "Root cause: ZIP file is valid but missing 'xl/workbook.xml' (not a proper Excel workbook)" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Possible causes:" -ForegroundColor Yellow
+ Write-Host " • File is corrupted or incomplete" -ForegroundColor Gray
+ Write-Host " • File created by tool that doesn't follow Excel format" -ForegroundColor Gray
+ }
+ else {
+ Write-Host "Root cause: ImportExcel module cannot parse this workbook" -ForegroundColor Yellow
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Possible causes:" -ForegroundColor Yellow
+ Write-Host " • ImportExcel module version incompatibility" -ForegroundColor Gray
+ Write-Host " • File created by different ImportExcel version" -ForegroundColor Gray
+ Write-Host " • Workbook has features ImportExcel can't parse" -ForegroundColor Gray
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Current ImportExcel version:" -ForegroundColor Cyan
+ try {
+ $importExcelModule = Get-Module -Name ImportExcel -ListAvailable | Select-Object -First 1
+ Write-Host " $($importExcelModule.Version)" -ForegroundColor Gray
+ }
+ catch {
+ Write-Host " Unable to detect version" -ForegroundColor Gray
+ }
+ }
+
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "Recommended solutions:" -ForegroundColor Cyan
+ Write-Host " 1. Open file in Excel and verify it opens correctly" -ForegroundColor Gray
+ Write-Host " 2. If it opens: File > Save As > Excel Workbook (.xlsx) to 'clean' it" -ForegroundColor Gray
+ Write-Host " 3. Update ImportExcel: Update-Module ImportExcel -Force" -ForegroundColor Gray
+ Write-Host " 4. Recreate initial export without -AppendFile using current script" -ForegroundColor Gray
+ Write-Host "" -ForegroundColor Yellow
+ Write-Host "DEBUG INFO:" -ForegroundColor Cyan
+ Write-Host " File exists: $(Test-Path $excelFilePath)" -ForegroundColor Gray
+ Write-Host " File size: $((Get-Item $excelFilePath -ErrorAction SilentlyContinue).Length) bytes" -ForegroundColor Gray
+ Write-Host " File extension: $([System.IO.Path]::GetExtension($excelFilePath))" -ForegroundColor Gray
+ Write-Host " First 4 bytes (hex): " -NoNewline -ForegroundColor Gray
+ try {
+ $bytes = [System.IO.File]::ReadAllBytes($excelFilePath) | Select-Object -First 4
+ Write-Host "$(($bytes | ForEach-Object { $_.ToString('X2') }) -join ' ')" -ForegroundColor Gray
+ if ($bytes[0] -eq 0x50 -and $bytes[1] -eq 0x4B) {
+ Write-Host " (Valid ZIP signature: PK)" -ForegroundColor Green
+ } else {
+ Write-Host " (NOT a ZIP file - should start with 'PK' = 50 4B)" -ForegroundColor Red
+ }
+ }
+ catch {
+ Write-Host "Unable to read bytes" -ForegroundColor Gray
+ }
+ exit 1
+ }
+ } else {
+ # Normal mode: CSV path uses the intermediate CsvOutputFile
+ $csvFilePath = $script:CsvOutputFile
+ }
+
+ # Excel conversion - use fast path when possible
+ try {
+ if ($CombineOutput) {
+ # --- Combined Mode: Single-tab workbook ---
+ $tabName = "CombinedUsageActivity"
+
+ if ($AppendFile -and $script:ExistingExcelSheets -contains $tabName) {
+ # Append mode: Need to validate headers (requires loading CSV)
+ Write-LogHost "Reading CSV data for header validation: $csvFilePath" -ForegroundColor Gray
+ $csvData = Import-Csv -Path $csvFilePath -ErrorAction Stop
+ $totalRows = $csvData.Count
+ Write-LogHost "Loaded $totalRows rows from CSV" -ForegroundColor Gray
+
+ # Validate headers match
+ Write-LogHost "Validating headers for tab: $tabName" -ForegroundColor Gray
+ $existingWorkbook = Import-Excel -Path $excelFilePath -WorksheetName $tabName -StartRow 1 -EndRow 1 -NoHeader
+ $existingHeaders = $existingWorkbook[0].PSObject.Properties.Value | Where-Object { $_ }
+ $newHeaders = $csvData[0].PSObject.Properties.Name
+
+ $headerMismatch = $false
+ if ($existingHeaders.Count -ne $newHeaders.Count) {
+ $headerMismatch = $true
+ } else {
+ for ($i = 0; $i -lt $existingHeaders.Count; $i++) {
+ if ($existingHeaders[$i] -ne $newHeaders[$i]) {
+ $headerMismatch = $true
+ break
+ }
+ }
+ }
+
+ if ($headerMismatch) {
+ # Create timestamped duplicate tab
+ $timestampedTabName = "${tabName}_$excelTimestamp"
+ Write-LogHost "WARNING: Header mismatch detected for tab '$tabName'" -ForegroundColor Yellow
+ Write-LogHost "Creating timestamped duplicate tab: $timestampedTabName" -ForegroundColor Yellow
+ $dataTable = $csvData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $dataTable -Path $excelFilePath -WorkSheetName $timestampedTabName -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ } else {
+ # Append to existing tab
+ Write-LogHost "Appending to existing tab: $tabName" -ForegroundColor Gray
+ $dataTable = $csvData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $dataTable -Path $excelFilePath -WorkSheetName $tabName -Append -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ }
+ } else {
+ # Create new tab or new workbook
+ Write-LogHost "Creating tab: $tabName" -ForegroundColor Gray
+ $dataTable = Import-CsvToDataTable -Path $csvFilePath
+ $totalRows = $dataTable.Rows.Count
+ Send-SQLDataToExcel -DataTable $dataTable -Path $excelFilePath -WorkSheetName $tabName -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ # Append EntraUsers tab if requested
+ if ($IncludeUserInfo -and $script:EntraUsersData) {
+ $entraTab = 'EntraUsers_MAClicensing'
+ Write-LogHost "Creating tab: $entraTab" -ForegroundColor Gray
+ $entraDataTable = $script:EntraUsersData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $entraDataTable -Path $excelFilePath -WorkSheetName $entraTab -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ }
+ }
+
+ Write-LogHost "Excel workbook created: $excelFilePath" -ForegroundColor Green
+ Write-LogHost " Tab: $tabName | Rows: $totalRows" -ForegroundColor White } else {
+ # --- Multi-tab Mode: One tab per activity type ---
+ # Multi-tab mode requires loading CSV for grouping
+ Write-LogHost "Reading CSV data for multi-tab grouping: $csvFilePath" -ForegroundColor Gray
+ $csvData = Import-Csv -Path $csvFilePath -ErrorAction Stop
+ Write-LogHost "Loaded $($csvData.Count) rows from CSV" -ForegroundColor Gray
+
+ # Group CSV data by Operation column
+ $groupedData = $csvData | Group-Object -Property Operation
+
+ # Calculate total tab count (activity types + EntraUsers if present)
+ $activityTabCount = $groupedData.Count
+ $totalTabCount = $activityTabCount
+ if ($IncludeUserInfo -and $script:EntraUsersData) {
+ $totalTabCount += 1 # Add EntraUsers tab
+ }
+
+ $tabLabel = if ($totalTabCount -eq 1) { "tab" } else { "tabs" }
+ Write-LogHost "Creating multi-tab workbook with $totalTabCount $tabLabel" -ForegroundColor Gray
+
+ $tabsCreated = @()
+ foreach ($group in $groupedData) {
+ $activityType = $group.Name
+ $activityData = $group.Group
+ $activityRows = $activityData.Count
+
+ if ($AppendFile -and $script:ExistingExcelSheets -contains $activityType) {
+ # Validate headers match
+ Write-LogHost "Validating headers for tab: $activityType" -ForegroundColor Gray
+ $existingWorkbook = Import-Excel -Path $excelFilePath -WorksheetName $activityType -StartRow 1 -EndRow 1 -NoHeader
+ $existingHeaders = $existingWorkbook[0].PSObject.Properties.Value | Where-Object { $_ }
+ $newHeaders = $activityData[0].PSObject.Properties.Name
+
+ $headerMismatch = $false
+ if ($existingHeaders.Count -ne $newHeaders.Count) {
+ $headerMismatch = $true
+ } else {
+ for ($i = 0; $i -lt $existingHeaders.Count; $i++) {
+ if ($existingHeaders[$i] -ne $newHeaders[$i]) {
+ $headerMismatch = $true
+ break
+ }
+ }
+ }
+
+ if ($headerMismatch) {
+ # Create timestamped duplicate tab
+ $timestampedTabName = "${activityType}_$excelTimestamp"
+ Write-LogHost "WARNING: Header mismatch detected for tab '$activityType'" -ForegroundColor Yellow
+ Write-LogHost "Creating timestamped duplicate tab: $timestampedTabName" -ForegroundColor Yellow
+ $activityDataTable = $activityData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $activityDataTable -Path $excelFilePath -WorkSheetName $timestampedTabName -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ $tabsCreated += "$timestampedTabName ($activityRows rows)"
+ } else {
+ # Append to existing tab
+ Write-LogHost "Appending to existing tab: $activityType" -ForegroundColor Gray
+ $activityDataTable = $activityData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $activityDataTable -Path $excelFilePath -WorkSheetName $activityType -Append -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ $tabsCreated += "$activityType ($activityRows rows appended)"
+ }
+ } else {
+ # Create new tab
+ Write-LogHost "Creating tab: $activityType ($activityRows rows)" -ForegroundColor Gray
+ $activityDataTable = $activityData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $activityDataTable -Path $excelFilePath -WorkSheetName $activityType -Force -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ $tabsCreated += "$activityType ($activityRows rows)"
+ }
+
+
+ # After creating all activity tabs, append EntraUsers if requested
+ if ($IncludeUserInfo -and $script:EntraUsersData) {
+ $entraTab = 'EntraUsers_MAClicensing'
+ Write-LogHost "Creating tab: $entraTab ($($script:EntraUsersData.Count) rows)" -ForegroundColor Gray
+ $entraDataTable = $script:EntraUsersData | ConvertTo-DataTable
+ Send-SQLDataToExcel -DataTable $entraDataTable -Path $excelFilePath -WorkSheetName $entraTab -FreezeTopRow -BoldTopRow -AutoSize -NoNumberConversion '*'
+ $tabsCreated += "$entraTab ($($script:EntraUsersData.Count) rows)"
+ }
+ }
+
+
+ Write-LogHost "Excel workbook created: $excelFilePath" -ForegroundColor Green
+ Write-LogHost " Tabs: $($tabsCreated -join ', ')" -ForegroundColor White
+ }
+ # Delete temporary CSV file (with retry for file lock issues)
+ if ($AppendFile -and $script:AppendFileTempCsv) {
+ # AppendFile mode: Remove the temp CSV we created for new data
+ Write-LogHost "Removing temporary CSV file: $script:AppendFileTempCsv" -ForegroundColor Gray
+ Remove-Item -Path $script:AppendFileTempCsv -Force -ErrorAction SilentlyContinue
+ } elseif (-not $AppendFile) {
+ # Normal mode: Remove the intermediate CSV that was converted to Excel
+ Write-LogHost "Removing temporary CSV file: $script:CsvOutputFile" -ForegroundColor Gray
+ # Retry with delay to handle transient file locks (antivirus, OneDrive sync, etc.)
+ $deleteSuccess = $false
+ for ($retryCount = 0; $retryCount -lt 3; $retryCount++) {
+ try {
+ # Force garbage collection to release any .NET file handles
+ [System.GC]::Collect()
+ [System.GC]::WaitForPendingFinalizers()
+ Start-Sleep -Milliseconds 500
+ Remove-Item -Path $script:CsvOutputFile -Force -ErrorAction Stop
+ $deleteSuccess = $true
+ break
+ } catch {
+ if ($retryCount -lt 2) {
+ Write-LogHost " File locked, retrying in 2 seconds... (attempt $($retryCount + 1)/3)" -ForegroundColor DarkYellow
+ Start-Sleep -Seconds 2
+ }
+ }
+ }
+ if (-not $deleteSuccess) {
+ Write-LogHost " Could not delete temp CSV (file may be locked by another process)" -ForegroundColor Yellow
+ Write-LogHost " CSV file preserved at: $script:CsvOutputFile" -ForegroundColor Yellow
+ }
+ }
+ # Note: $OutputFile already points to the final Excel file
+ } catch {
+ Write-LogHost "ERROR: Failed to convert CSV to Excel: $($_.Exception.Message)" -ForegroundColor Red
+ Write-LogHost "CSV file preserved at: $script:CsvOutputFile" -ForegroundColor Yellow
+ }
+ }
+
+ # ============================================================
+ # CHECKPOINT COMPLETION: Rename _PARTIAL file and delete checkpoint
+ # ============================================================
+ if ($script:CheckpointEnabled -and $script:PartialOutputPath -and (Test-Path $script:PartialOutputPath)) {
+ Complete-CheckpointRun -FinalOutputPath $script:FinalOutputPath
+ # Update OutputFile to point to final path (without _PARTIAL) for correct display
+ $OutputFile = $script:FinalOutputPath
+ $LogFile = $script:LogFile # Also update LogFile variable (was updated by Complete-CheckpointRun)
+ }
+
+ Write-LogHost ""; Write-LogHost "=== Enterprise Export Complete ===" -ForegroundColor Green
+
+ if ($OnlyUserInfo) {
+ # User-only export mode summary
+ if ($script:EntraUsersData) {
+ Write-LogHost "Entra users exported: $($script:EntraUsersData.Count)" -ForegroundColor White
+ }
+ } else {
+ # Standard audit log export summary
+ Write-LogHost "Processing mode: $processingMode" -ForegroundColor White
+ Write-LogHost "Records exported: $($script:metrics.TotalStructuredRows)" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM -and $script:EntraUsersData) {
+ Write-LogHost "Entra users exported: $($script:EntraUsersData.Count)" -ForegroundColor White
+ }
+ }
+
+
+ # Display organized tail summary metrics - only for audit log exports
+ if (-not $OnlyUserInfo -and $script:metrics -and $script:metrics.Activities -and $script:metrics.Activities.Count -gt 0) {
+ Write-LogHost ""
+ Write-LogHost "=== Activity Type Breakdown ===" -ForegroundColor Cyan
+
+ # In resume mode with skipped partitions, the "Retrieved" counts only reflect THIS run's fetches
+ # Records from previously-completed partitions (merged from incremental saves) are not counted in Retrieved
+ if ($script:IsResumeMode -and $script:OriginallySkippedPartitionIndices -and $script:OriginallySkippedPartitionIndices.Count -gt 0) {
+ Write-LogHost " Note: Resume mode - 'Retrieved' counts reflect only this run's fetches" -ForegroundColor DarkGray
+ Write-LogHost " (excludes $($script:OriginallySkippedPartitionIndices.Count) previously-completed partition(s))" -ForegroundColor DarkGray
+ Write-LogHost ""
+ }
+
+ $totalRetrieved = $script:metrics.TotalRecordsFetched
+ $totalExported = $script:metrics.TotalStructuredRows
+ $totalFiltered = $script:metrics.FilteringSkippedRecords
+
+ foreach ($actKey in ($script:metrics.Activities.Keys | Sort-Object)) {
+ $actStats = $script:metrics.Activities[$actKey]
+ $retrieved = $actStats.Retrieved
+ $structured = $actStats.Structured
+ $ratio = if ($retrieved -gt 0) { [Math]::Round($structured / $retrieved, 1) } else { 0 }
+
+ Write-LogHost " $actKey" -ForegroundColor White
+ Write-LogHost " Retrieved: $retrieved records" -ForegroundColor Gray
+
+ # Show filtering breakdown if records were filtered for this activity type
+ $actFiltered = $retrieved - $structured
+ if ($actFiltered -gt 0 -and $ratio -le 1) {
+ Write-LogHost " Filtered: $actFiltered records" -ForegroundColor DarkYellow
+
+ # Only show filters that were actually applied (check parameter + impact)
+ # Technical failures (always show if present)
+ if ($script:metrics.FilteringMissingAuditData -gt 0) {
+ Write-LogHost " - Missing/invalid AuditData: $($script:metrics.FilteringMissingAuditData)" -ForegroundColor DarkGray
+ }
+ if ($script:metrics.FilteringParseFailures -gt 0) {
+ Write-LogHost " - Parse failures: $($script:metrics.FilteringParseFailures)" -ForegroundColor DarkGray
+ }
+
+ # User-specified filters (only show if parameter was used)
+ if ($PromptFilter) {
+ $promptCount = if ($script:metrics.FilteringPromptFiltered -gt 0) { $script:metrics.FilteringPromptFiltered } else { 0 }
+ Write-LogHost " - PromptFilter ($PromptFilter): $promptCount" -ForegroundColor DarkGray
+ }
+ if ($AgentId) {
+ $agentCount = if ($script:metrics.FilteringAgentFiltered -gt 0) { $script:metrics.FilteringAgentFiltered } else { 0 }
+ Write-LogHost " - AgentId filter: $agentCount" -ForegroundColor DarkGray
+ }
+ if ($AgentsOnly) {
+ $agentCount = if ($script:metrics.FilteringAgentFiltered -gt 0) { $script:metrics.FilteringAgentFiltered } else { 0 }
+ Write-LogHost " - AgentsOnly filter: $agentCount" -ForegroundColor DarkGray
+ }
+ if ($ExcludeAgents) {
+ $excludeCount = if ($script:metrics.FilteringExcludeAgents -gt 0) { $script:metrics.FilteringExcludeAgents } else { 0 }
+ Write-LogHost " - ExcludeAgents filter: $excludeCount" -ForegroundColor DarkGray
+ }
+ if ($UserIds) {
+ $userCount = if ($script:metrics.FilteringUserIds -gt 0) { $script:metrics.FilteringUserIds } else { 0 }
+ Write-LogHost " - UserIds filter: $userCount" -ForegroundColor DarkGray
+ }
+ if ($GroupNames) {
+ $groupCount = if ($script:metrics.FilteringGroupNames -gt 0) { $script:metrics.FilteringGroupNames } else { 0 }
+ Write-LogHost " - GroupNames filter: $groupCount" -ForegroundColor DarkGray
+ }
+
+ # Calculate explained filtering count
+ $explainedFiltering = 0
+ $explainedFiltering += $script:metrics.FilteringMissingAuditData
+ $explainedFiltering += $script:metrics.FilteringParseFailures
+ if ($PromptFilter) { $explainedFiltering += $script:metrics.FilteringPromptFiltered }
+ if ($AgentId) { $explainedFiltering += $script:metrics.FilteringAgentFiltered }
+ if ($AgentsOnly) { $explainedFiltering += $script:metrics.FilteringAgentFiltered }
+ if ($ExcludeAgents) { $explainedFiltering += $script:metrics.FilteringExcludeAgents }
+ if ($UserIds) { $explainedFiltering += $script:metrics.FilteringUserIds }
+ if ($GroupNames) { $explainedFiltering += $script:metrics.FilteringGroupNames }
+
+ # Show unspecified reason if filtered count doesn't match explained reasons
+ if ($explainedFiltering -eq 0 -or $actFiltered -gt $explainedFiltering) {
+ $unexplained = $actFiltered - $explainedFiltering
+ if ($unexplained -gt 0) {
+ $yieldLabel = if ($ExplodeArrays -or $ExplodeDeep) { "Array explosion yield" } else { "Processing yield" }
+ Write-LogHost " - ${yieldLabel}: $unexplained" -ForegroundColor DarkGray
+ }
+ }
+ }
+
+ # Show explosion details if exploding mode is enabled AND ratio > 1
+ if (($ExplodeArrays -or $ExplodeDeep) -and $ratio -gt 1 -and -not $ExcelOutput) {
+ Write-LogHost " Exported: $structured rows (${ratio}x expansion)" -ForegroundColor Gray
+ if ($script:metrics.ExplosionEvents -gt 0) {
+ $avgExpansion = if ($script:metrics.ExplosionEvents -gt 0) {
+ [Math]::Round(($script:metrics.ExplosionRowsFromEvents / $script:metrics.ExplosionEvents) + 1, 1)
+ } else { 1 }
+ Write-LogHost " - Avg expansion: ${avgExpansion}x per record" -ForegroundColor DarkGray
+ if ($script:metrics.ExplosionMaxPerRecord -gt 0) {
+ Write-LogHost " - Max expansion: $($script:metrics.ExplosionMaxPerRecord)x (single record)" -ForegroundColor DarkGray
+ }
+ }
+ } else {
+ # Always show exported count for consistency
+ Write-LogHost " Exported: $structured rows" -ForegroundColor Gray
+ }
+ }
+
+ # Final pipeline summary
+ Write-LogHost ""
+ Write-LogHost "Pipeline Summary:" -ForegroundColor Cyan
+ Write-LogHost " Retrieved: $totalRetrieved records" -ForegroundColor White
+ if ($totalFiltered -gt 0) {
+ Write-LogHost " Filtered: $totalFiltered records" -ForegroundColor White
+ }
+ Write-LogHost " Exported: $totalExported rows" -ForegroundColor White
+ }
+
+ # Export telemetry CSV for Graph API parallel execution analysis (one row per partition) - only when -IncludeTelemetry switch is used
+ # Always timestamped to prevent overwriting previous telemetry data
+ if ($IncludeTelemetry -and -not $OnlyUserInfo -and -not $UseEOM -and $script:telemetryData -and $script:telemetryData.Count -gt 0) {
+ try {
+ $baseName = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile)
+ $outputDir = Split-Path $OutputFile -Parent
+ $telemetryPath = Join-Path $outputDir "${baseName}_telemetry_$global:ScriptRunTimestamp.csv"
+ $script:telemetryData | Export-Csv -Path $telemetryPath -NoTypeInformation -Encoding UTF8
+ Write-LogHost ""
+ Write-LogHost "Graph API Telemetry: $telemetryPath" -ForegroundColor Cyan
+ } catch {
+ Write-LogHost "Warning: Failed to export telemetry CSV: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+ # DSPM for AI: Log DSPM features enabled
+ if ($IncludeDSPMForAI) {
+ Write-LogHost ""
+ Write-LogHost "DSPM for AI Features:" -ForegroundColor Cyan
+ Write-LogHost " Activity types: ConnectedAIAppInteraction, AIInteraction, AIAppInteraction" -ForegroundColor Cyan
+ if ($ExcludeCopilotInteraction) {
+ Write-LogHost " ✗ CopilotInteraction: EXCLUDED" -ForegroundColor Red
+ }
+ }
+ Write-LogHost ""
+
+ # File output summary (skip for -OnlyUserInfo mode)
+ if (-not $OnlyUserInfo) {
+ if ($ExportWorkbook -and $OutputFile -match '\.xlsx$') {
+ Write-LogHost "Output workbook: $OutputFile" -ForegroundColor White
+ Write-LogHost "Workbook mode: $(if ($CombineOutput) { 'Single-tab (Combined)' } else { 'Multi-tab (By Activity Type)' })" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) { Write-LogHost "Entra Users Tab: EntraUsers_MAClicensing" -ForegroundColor Gray }
+ if (Test-Path $OutputFile) {
+ Write-LogHost "File size: $([math]::Round((Get-Item $OutputFile).Length / 1KB,2)) KB" -ForegroundColor White
+ }
+ if ($AppendFile) {
+ Write-LogHost "Append mode: Enabled" -ForegroundColor Cyan
+ }
+ } elseif ($script:CsvSplitFiles -and $script:CsvSplitFiles.Count -gt 0) {
+ # CSV was split into multiple files
+ Write-LogHost "Output directory: $(Split-Path $OutputFile -Parent)" -ForegroundColor White
+ Write-LogHost "Files created: $($script:CsvSplitFiles.Count) separate CSV files" -ForegroundColor White
+ $totalSize = ($script:CsvSplitFiles | ForEach-Object { (Get-Item $_).Length } | Measure-Object -Sum).Sum
+ Write-LogHost "Total size: $([math]::Round($totalSize / 1KB,2)) KB" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraSplit = (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv")
+ if (Test-Path $entraSplit) { $entraSize = [math]::Round((Get-Item $entraSplit).Length / 1KB,2); Write-LogHost "Entra Users File: $(Split-Path $entraSplit -Leaf) ($entraSize KB)" -ForegroundColor Gray } else { Write-LogHost "Entra Users File: $(Split-Path $entraSplit -Leaf) (pending generation)" -ForegroundColor Gray }
+ }
+
+ # Show filename pattern instead of listing each file
+ $outputDir = Split-Path $OutputFile -Parent
+ $timestamp = [System.IO.Path]::GetFileNameWithoutExtension($script:CsvSplitFiles[0]) -replace '.*_(\d{8}_\d{6}).*', '$1'
+ Write-LogHost "Output pattern: ${outputDir}\Purview_Audit__${timestamp}.csv" -ForegroundColor Gray
+ } elseif (Test-Path $OutputFile) {
+ Write-LogHost "Output file: $OutputFile" -ForegroundColor White
+ Write-LogHost "File size: $([math]::Round((Get-Item $OutputFile).Length / 1KB,2)) KB" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraCombined = (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv")
+ if (Test-Path $entraCombined) { $entraSize = [math]::Round((Get-Item $entraCombined).Length / 1KB,2); Write-LogHost "Entra Users File: $entraCombined ($entraSize KB)" -ForegroundColor Gray } else { Write-LogHost "Entra Users File: $entraCombined (pending)" -ForegroundColor Gray }
+ }
+ } else {
+ Write-LogHost "Output file: $OutputFile" -ForegroundColor White
+ if ($IncludeUserInfo -and -not $UseEOM) {
+ $entraCombined = (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv")
+ Write-LogHost "Entra Users File: $entraCombined (pending)" -ForegroundColor Gray
+ }
+ Write-LogHost "File size: N/A (file may have been deleted/moved during processing)" -ForegroundColor DarkGray
+ }
+
+ # Emit header-only CSV for combined mode when zero rows exported
+ # Use the metric directly in case $totalExported wasn't set (resume mode with no new fetches)
+ if ($CombineOutput -and -not $ExportWorkbook -and ([int]$script:metrics.TotalStructuredRows -eq 0)) {
+ try {
+ $headerColumns = if ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion) { $PurviewExplodedHeader } else { @('RecordType', 'CreationDate', 'UserIds', 'Operations', 'ResultStatus', 'ResultCount', 'Identity', 'IsValid', 'ObjectState', 'Id', 'CreationTime', 'Operation', 'OrganizationId', 'RecordTypeNum', 'ResultStatus_Audit', 'UserKey', 'UserType', 'Version', 'Workload', 'UserId', 'AppId', 'ClientAppId', 'CorrelationId', 'ModelId', 'ModelProvider', 'ModelFamily', 'TokensTotal', 'TokensInput', 'TokensOutput', 'DurationMs', 'OutcomeStatus', 'ConversationId', 'TurnNumber', 'RetryCount', 'ClientVersion', 'ClientPlatform', 'AgentId', 'AgentName', 'AgentVersion', 'AgentCategory', 'AppIdentity', 'ApplicationName', 'AuditData', 'CopilotEventData') }
+ $outputDirEmpty = Split-Path $OutputFile -Parent
+ if (-not (Test-Path $outputDirEmpty)) { New-Item -ItemType Directory -Path $outputDirEmpty -Force | Out-Null }
+ $enc = New-Object System.Text.UTF8Encoding($false)
+ $sw = [System.IO.StreamWriter]::new($OutputFile, $false, $enc)
+ $escapedCols = @()
+ foreach ($col in $headerColumns) {
+ $c = [string]$col
+ $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' ')
+ $escaped = $c -replace '"', '""'
+ if ($needsQuote) { $escaped = '"' + $escaped + '"' }
+ $escapedCols += , $escaped
+ }
+ $sw.WriteLine(($escapedCols -join ','))
+ $sw.Flush(); $sw.Dispose()
+ Write-LogHost "Header-only CSV created at: $OutputFile" -ForegroundColor Green
+ } catch {
+ Write-LogHost "Failed to write header-only CSV for ${OutputFile}: $($_.Exception.Message)" -ForegroundColor Red
+ }
+ }
+
+ # Emit header-only CSVs for per-activity split when zero rows exported
+ # Use the metric directly in case $totalExported wasn't set (resume mode with no new fetches)
+ if (-not $CombineOutput -and -not $ExportWorkbook -and ([int]$script:metrics.TotalStructuredRows -eq 0) -and $ActivityTypes) {
+ try {
+ $outputDir = Split-Path $OutputFile -Parent
+ $timestamp = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile) -replace '.*_(\d{8}_\d{6}).*', '$1'
+ $headerColumns = if ($ExplodeDeep -or $ExplodeArrays -or $ForcedRawInputCsvExplosion) { $PurviewExplodedHeader } else { @('RecordType', 'CreationDate', 'UserIds', 'Operations', 'ResultStatus', 'ResultCount', 'Identity', 'IsValid', 'ObjectState', 'Id', 'CreationTime', 'Operation', 'OrganizationId', 'RecordTypeNum', 'ResultStatus_Audit', 'UserKey', 'UserType', 'Version', 'Workload', 'UserId', 'AppId', 'ClientAppId', 'CorrelationId', 'ModelId', 'ModelProvider', 'ModelFamily', 'TokensTotal', 'TokensInput', 'TokensOutput', 'DurationMs', 'OutcomeStatus', 'ConversationId', 'TurnNumber', 'RetryCount', 'ClientVersion', 'ClientPlatform', 'AgentId', 'AgentName', 'AgentVersion', 'AgentCategory', 'AppIdentity', 'ApplicationName', 'AuditData', 'CopilotEventData') }
+ foreach ($actType in $ActivityTypes) {
+ $file = Join-Path $outputDir ("Purview_Audit_{0}_{1}.csv" -f $actType, $timestamp)
+ try {
+ $outputDirEmpty = Split-Path $file -Parent
+ if (-not (Test-Path $outputDirEmpty)) { New-Item -ItemType Directory -Path $outputDirEmpty -Force | Out-Null }
+ $enc = New-Object System.Text.UTF8Encoding($false)
+ $sw = [System.IO.StreamWriter]::new($file, $false, $enc)
+ $escapedCols = @()
+ foreach ($col in $headerColumns) {
+ $c = [string]$col
+ $needsQuote = ($c -match '[",\r\n]') -or $c.StartsWith(' ') -or $c.EndsWith(' ')
+ $escaped = $c -replace '"', '""'
+ if ($needsQuote) { $escaped = '"' + $escaped + '"' }
+ $escapedCols += , $escaped
+ }
+ $sw.WriteLine(($escapedCols -join ','))
+ $sw.Flush(); $sw.Dispose()
+ Write-LogHost "Header-only CSV created at: $file" -ForegroundColor Green
+ } catch {
+ Write-LogHost "Failed to write header-only CSV for ${file}: $($_.Exception.Message)" -ForegroundColor Red
+ }
+ }
+ } catch {
+ Write-LogHost "Failed to emit per-activity header-only CSVs: $($_.Exception.Message)" -ForegroundColor Red
+ }
+ }
+ } else {
+ # -OnlyUserInfo mode: Show only EntraUsers file
+ $entraFile = (Join-Path (Split-Path $OutputFile -Parent) "EntraUsers_MAClicensing_${global:ScriptRunTimestamp}.csv")
+ if (Test-Path $entraFile) {
+ $entraSize = [math]::Round((Get-Item $entraFile).Length / 1KB, 2)
+ Write-LogHost "EntraUsers file: $entraFile" -ForegroundColor White
+ Write-LogHost "File size: $entraSize KB" -ForegroundColor White
+ } else {
+ Write-LogHost "EntraUsers file: $entraFile (not found)" -ForegroundColor Yellow
+ }
+ }
+
+ Write-LogHost "Log file: $LogFile" -ForegroundColor White
+
+ # Mark script as completed normally (used by finally block to detect Ctrl+C)
+ $script:ScriptCompleted = $true
+
+ # Clean up incremental JSONL files from this run after successful completion
+ # CRITICAL: Must happen AFTER explosion completes, using timestamp to identify this run's files
+ # This avoids the issue where $script:PartialOutputPath is null after Complete-CheckpointRun
+ $incrementalDir = Join-Path (Split-Path $OutputFile -Parent) ".pax_incremental"
+ if (Test-Path $incrementalDir) {
+ $thisRunPattern = "*_${global:ScriptRunTimestamp}_*records.jsonl"
+ $thisRunFiles = Get-ChildItem -Path $incrementalDir -Filter $thisRunPattern -ErrorAction SilentlyContinue
+ if ($thisRunFiles -and $thisRunFiles.Count -gt 0) {
+ try {
+ $thisRunFiles | Remove-Item -Force -ErrorAction Stop
+ Write-LogHost "Incremental JSONL files cleaned up ($($thisRunFiles.Count) files from this run)" -ForegroundColor DarkGray
+ } catch {
+ Write-LogHost "Note: Could not remove incremental JSONL files: $($_.Exception.Message)" -ForegroundColor DarkGray
+ }
+ }
+ # Also remove the directory if it's now empty
+ $remaining = Get-ChildItem -Path $incrementalDir -ErrorAction SilentlyContinue
+ if (-not $remaining -or $remaining.Count -eq 0) {
+ try {
+ Remove-Item -Path $incrementalDir -Force -ErrorAction SilentlyContinue
+ } catch {}
+ }
+ }
+}
+catch {
+ # Handle Ctrl+C (PipelineStoppedException)
+ if ($_.Exception -is [System.Management.Automation.PipelineStoppedException] -or
+ $_.Exception.InnerException -is [System.Management.Automation.PipelineStoppedException]) {
+ $script:CtrlCPressed = $true
+ }
+
+ $msg = $_.Exception.Message
+ if ($msg -eq '__PAX_EARLY_EXIT__' -or $script:EarlyExit) {
+ # Graceful early exit path (e.g., header-only CSV)
+ Write-LogHost "Early exit executed: $script:EarlyExit" -ForegroundColor DarkGray
+ } else {
+ Write-LogHost "Script failed: $msg" -ForegroundColor Red
+ Write-LogHost $_.ScriptStackTrace -ForegroundColor Red
+ }
+}
+finally {
+ # Check if script was interrupted (didn't complete normally and not an early exit)
+ if (-not $script:ScriptCompleted -and -not $script:EarlyExit -and -not $script:CtrlCPressed) {
+ # Script was interrupted - likely Ctrl+C that wasn't caught by PipelineStoppedException
+ $script:CtrlCPressed = $true
+ }
+
+ # Show graceful exit message if interrupted (and not already shown by engine event handler)
+ # Skip in replay mode - no Graph connection to disconnect
+ if ($script:CtrlCPressed -and -not $env:PAX_GRACEFUL_EXIT_DONE -and -not $env:PAX_REPLAY_MODE) {
+ $env:PAX_GRACEFUL_EXIT_DONE = "1" # Prevent engine event handler from also showing message
+ Write-Host ""
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host " Script Interrupted - Performing Graceful Cleanup" -ForegroundColor Yellow
+ Write-Host "============================================================================================================" -ForegroundColor Yellow
+ Write-Host ""
+
+ # Disconnect from Microsoft Graph - ALWAYS attempt disconnect
+ Write-Host " Disconnecting from Microsoft Graph..." -ForegroundColor Cyan
+ try {
+ Disconnect-MgGraph -ErrorAction Stop | Out-Null
+ Write-Host " Microsoft Graph disconnected" -ForegroundColor Green
+ }
+ catch {
+ if ($_.Exception.Message -match 'No application to sign out from') {
+ Write-Host " (Not connected to Microsoft Graph)" -ForegroundColor DarkGray
+ } else {
+ Write-Host " Microsoft Graph session cleared" -ForegroundColor Green
+ }
+ }
+
+ # Disconnect from Exchange Online (if connected via EOM mode)
+ try {
+ $eomSession = Get-PSSession | Where-Object { $_.ConfigurationName -eq 'Microsoft.Exchange' -and $_.State -eq 'Opened' }
+ if ($eomSession) {
+ Write-Host " Disconnecting from Exchange Online Management..." -ForegroundColor Cyan
+ Disconnect-ExchangeOnline -Confirm:$false -ErrorAction SilentlyContinue | Out-Null
+ Write-Host " Exchange Online disconnected" -ForegroundColor Green
+ }
+ }
+ catch {
+ Write-Host " (Exchange Online cleanup completed)" -ForegroundColor Gray
+ }
+
+ # Log the interruption
+ if ($LogFile -and (Test-Path $LogFile -ErrorAction SilentlyContinue)) {
+ try {
+ Write-Output "" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ Write-Output "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] Script interrupted by user (Ctrl+C)" | Out-File -FilePath $LogFile -Append -Encoding utf8
+ } catch {}
+ }
+
+ # Show checkpoint resume message if checkpoint is enabled
+ if ($script:CheckpointEnabled -and $script:CheckpointPath -and (Test-Path $script:CheckpointPath -ErrorAction SilentlyContinue)) {
+ Show-CheckpointExitMessage
+ }
+
+ Write-Host ""
+ Write-Host " Cleanup complete. Exiting..." -ForegroundColor Green
+ Write-Host ""
+ exit 0
+ }
+
+ $endUtc = (Get-Date).ToUniversalTime()
+ if (-not $script:summaryWritten) {
+ try { if ($script:metrics -and $script:metrics.StartTime) { $startTail = $script:metrics.StartTime.ToUniversalTime().ToString('yyyy-MM-dd HH:mm:ss'); Write-Log "Script execution started at $startTail UTC" } } catch {}
+ Write-Log "Script execution completed at $($endUtc.ToString('yyyy-MM-dd HH:mm:ss')) UTC"
+ Write-Log "Script version: v$ScriptVersion"
+ try { if ($script:metrics -and $script:metrics.StartTime) { $elapsed = $endUtc - $script:metrics.StartTime; $totalHours = [math]::Floor($elapsed.TotalHours); $remainder = $elapsed - [TimeSpan]::FromHours($totalHours); $elapsedFormatted = ("{0}:{1:00}:{2:00}.{3:000}" -f $totalHours, $remainder.Minutes, $remainder.Seconds, $remainder.Milliseconds); Write-Log ("Total elapsed time: {0} (hours:minutes:seconds.milliseconds)" -f $elapsedFormatted) } } catch {}
+ $script:summaryWritten = $true
+ }
+
+ # ALWAYS disconnect from Graph/EOM on script exit (completed, early exit, or error)
+ # This ensures credentials are cleared regardless of $script:Connected status
+ if (-not $UseEOM) {
+ # Graph API mode: Disconnect from Microsoft Graph
+ Write-LogHost "Disconnecting from Microsoft Graph..." -ForegroundColor Gray
+ try {
+ Disconnect-MgGraph -ErrorAction Stop | Out-Null
+ Write-LogHost " Microsoft Graph disconnected" -ForegroundColor Green
+ }
+ catch {
+ if ($_.Exception.Message -match 'No application to sign out from') {
+ Write-LogHost " (Not connected to Microsoft Graph)" -ForegroundColor DarkGray
+ } else {
+ Write-LogHost " Microsoft Graph session cleared" -ForegroundColor Green
+ }
+ }
+ }
+
+ # EOM mode: Disconnect from Exchange Online
+ if ($UseEOM) {
+ try {
+ $eomSession = Get-PSSession | Where-Object { $_.ConfigurationName -eq 'Microsoft.Exchange' -and $_.State -eq 'Opened' }
+ if ($eomSession) {
+ Disconnect-ExchangeOnline -Confirm:$false -ErrorAction SilentlyContinue | Out-Null
+ Write-LogHost " Exchange Online disconnected" -ForegroundColor Green
+ }
+ }
+ catch {}
+ }
+ if ($EmitMetricsJson) {
+ try {
+ # Always timestamp metrics to prevent overwriting
+ if ($MetricsPath) {
+ $metricsPath = if ($MetricsPath.ToLower().EndsWith('.json')) { $MetricsPath } else { "$MetricsPath.json" }
+ } else {
+ $baseName = [System.IO.Path]::GetFileNameWithoutExtension($OutputFile)
+ $outputDir = Split-Path $OutputFile -Parent
+ $metricsPath = Join-Path $outputDir "${baseName}_metrics_$global:ScriptRunTimestamp.json"
+ }
+ $emitObj = [ordered]@{ version = $ScriptVersion; timestampUtc = (Get-Date).ToUniversalTime().ToString('o'); parameters = $paramSnapshot; metrics = $script:metrics }
+ ($emitObj | ConvertTo-Json -Depth 6) | Out-File -FilePath $metricsPath -Encoding UTF8
+ Write-LogHost "Metrics JSON emitted: $metricsPath" -ForegroundColor DarkCyan
+ } catch {
+ Write-LogHost "Failed to emit metrics JSON: $($_.Exception.Message)" -ForegroundColor Yellow
+ }
+ }
+
+ # NOTE: JSONL cleanup for successful runs is now handled at true script completion
+ # (after explosion, before this finally block) using timestamp-based file matching.
+ # This finally block only handles abnormal termination scenarios.
+
+ $exitCode = 0; if ($script:circuitBreakerOpen) { $exitCode = 20 } elseif (($script:Hit10KLimit -or $script:Hit1MLimit) -and -not $AutoCompleteness) { $exitCode = 10 }
+ Write-LogHost "Exit code: $exitCode" -ForegroundColor DarkGray
+ exit $exitCode
+}
+
+
+
+
diff --git a/versions.json b/versions.json
index acd0c86..73e6014 100644
--- a/versions.json
+++ b/versions.json
@@ -6,19 +6,13 @@
"products": {
"pax": {
"name": "PAX Infrastructure",
- "version": "1.0.17",
+ "version": "1.0.18",
"status": "development",
"notes": "Core infrastructure, workflows, governance (iterating beyond last tagged release)"
},
- "paxapp": {
- "name": "PAX Desktop Application",
- "version": "0.1.0",
- "status": "development",
- "notes": "Tauri desktop (multiple untagged iterations)"
- },
"purview": {
"name": "Purview Audit Log Processor",
- "version": "1.10.5",
+ "version": "1.10.6",
"status": "development",
"notes": "Canonical root script; prior versions in script_archive/Purview_Audit_Log_Processor."
},
@@ -35,5 +29,5 @@
"notes": "Canonical root script; prior versions in script_archive/CopilotInteractions_Content_Audit_Log_Processor."
}
},
- "lastUpdated": "2026-01-30T00:00:00Z"
+ "lastUpdated": "2026-02-10T00:00:00Z"
}