diff --git a/SatelliteProvider.Services.RegionProcessing/RegionFailureClassifier.cs b/SatelliteProvider.Services.RegionProcessing/RegionFailureClassifier.cs new file mode 100644 index 0000000..97c5c0a --- /dev/null +++ b/SatelliteProvider.Services.RegionProcessing/RegionFailureClassifier.cs @@ -0,0 +1,70 @@ +using SatelliteProvider.Common.Exceptions; + +namespace SatelliteProvider.Services.RegionProcessing; + +internal enum RegionFailureCategory +{ + Timeout, + ExternalCancellation, + TaskCanceledOther, + OperationCanceledOther, + RateLimit, + Network, + Unexpected, +} + +internal sealed record RegionFailureClassification(RegionFailureCategory Category, string ErrorMessage); + +internal static class RegionFailureClassifier +{ + public const string TimeoutMessage = + "Processing timed out after 5 minutes. Unable to download tiles within the time limit."; + + public const string ExternalCancellationMessage = + "Processing was cancelled externally (likely application shutdown)."; + + public static RegionFailureClassification Classify( + Exception ex, + CancellationTokenSource timeoutCts, + CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(ex); + ArgumentNullException.ThrowIfNull(timeoutCts); + + return ex switch + { + TaskCanceledException when timeoutCts.IsCancellationRequested + => new RegionFailureClassification(RegionFailureCategory.Timeout, TimeoutMessage), + + TaskCanceledException when cancellationToken.IsCancellationRequested + => new RegionFailureClassification(RegionFailureCategory.ExternalCancellation, ExternalCancellationMessage), + + TaskCanceledException + => new RegionFailureClassification( + RegionFailureCategory.TaskCanceledOther, + $"Request cancelled or timed out: {ex.Message}. This may indicate HttpClient timeout or network issues."), + + OperationCanceledException when timeoutCts.IsCancellationRequested + => new RegionFailureClassification(RegionFailureCategory.Timeout, TimeoutMessage), + + OperationCanceledException + => new RegionFailureClassification( + RegionFailureCategory.OperationCanceledOther, + $"Operation cancelled: {ex.Message}"), + + RateLimitException + => new RegionFailureClassification( + RegionFailureCategory.RateLimit, + $"Rate limit exceeded: {ex.Message}. Google Maps API rate limit was reached and retries were exhausted."), + + HttpRequestException httpEx + => new RegionFailureClassification( + RegionFailureCategory.Network, + $"Network error (HTTP {httpEx.StatusCode}): {httpEx.Message}. Failed to download tiles from Google Maps."), + + _ => new RegionFailureClassification( + RegionFailureCategory.Unexpected, + $"Unexpected error ({ex.GetType().Name}): {ex.Message}"), + }; + } +} diff --git a/SatelliteProvider.Services.RegionProcessing/RegionService.cs b/SatelliteProvider.Services.RegionProcessing/RegionService.cs index db6398e..6af0f33 100644 --- a/SatelliteProvider.Services.RegionProcessing/RegionService.cs +++ b/SatelliteProvider.Services.RegionProcessing/RegionService.cs @@ -145,54 +145,16 @@ public class RegionService : IRegionService region.UpdatedAt = DateTime.UtcNow; await _regionRepository.UpdateAsync(region); } - catch (TaskCanceledException ex) when (timeoutCts.IsCancellationRequested) - { - errorMessage = "Processing timed out after 5 minutes. Unable to download tiles within the time limit."; - _logger.LogError(ex, "Region {RegionId} processing timed out after 5 minutes", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (TaskCanceledException ex) when (cancellationToken.IsCancellationRequested) - { - errorMessage = "Processing was cancelled externally (likely application shutdown)."; - _logger.LogError(ex, "Region {RegionId} processing was cancelled externally", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (TaskCanceledException ex) - { - errorMessage = $"Request cancelled or timed out: {ex.Message}. This may indicate HttpClient timeout or network issues."; - _logger.LogError(ex, "Region {RegionId} processing was cancelled (TaskCanceledException)", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested) - { - errorMessage = "Processing timed out after 5 minutes. Unable to download tiles within the time limit."; - _logger.LogError(ex, "Region {RegionId} processing timed out after 5 minutes", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (OperationCanceledException ex) - { - errorMessage = $"Operation cancelled: {ex.Message}"; - _logger.LogError(ex, "Region {RegionId} processing was cancelled", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (RateLimitException ex) - { - errorMessage = $"Rate limit exceeded: {ex.Message}. Google Maps API rate limit was reached and retries were exhausted."; - _logger.LogError(ex, "Rate limit exceeded for region {RegionId}. Google is throttling requests. Consider reducing request rate.", id); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } - catch (HttpRequestException ex) - { - errorMessage = $"Network error (HTTP {ex.StatusCode}): {ex.Message}. Failed to download tiles from Google Maps."; - _logger.LogError(ex, "Network error processing region {RegionId}. StatusCode: {StatusCode}, Message: {Message}", - id, ex.StatusCode, ex.Message); - await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); - } catch (Exception ex) { - errorMessage = $"Unexpected error ({ex.GetType().Name}): {ex.Message}"; - _logger.LogError(ex, "Failed to process region {RegionId}. Type: {ExceptionType}, Message: {Message}", - id, ex.GetType().Name, ex.Message); + var classification = RegionFailureClassifier.Classify(ex, timeoutCts, cancellationToken); + errorMessage = classification.ErrorMessage; + _logger.LogError( + ex, + "Region {RegionId} processing failed (category={Category}): {ErrorMessage}", + id, + classification.Category, + classification.ErrorMessage); await HandleProcessingFailureAsync(id, region, startTime, tiles, tilesDownloaded, tilesReused, errorMessage); } } diff --git a/SatelliteProvider.Services.RegionProcessing/SatelliteProvider.Services.RegionProcessing.csproj b/SatelliteProvider.Services.RegionProcessing/SatelliteProvider.Services.RegionProcessing.csproj index a2b872e..5911051 100644 --- a/SatelliteProvider.Services.RegionProcessing/SatelliteProvider.Services.RegionProcessing.csproj +++ b/SatelliteProvider.Services.RegionProcessing/SatelliteProvider.Services.RegionProcessing.csproj @@ -19,4 +19,8 @@ + + + + diff --git a/SatelliteProvider.Tests/RegionFailureClassifierTests.cs b/SatelliteProvider.Tests/RegionFailureClassifierTests.cs new file mode 100644 index 0000000..6aa8f30 --- /dev/null +++ b/SatelliteProvider.Tests/RegionFailureClassifierTests.cs @@ -0,0 +1,168 @@ +using System.Net; +using FluentAssertions; +using SatelliteProvider.Common.Exceptions; +using SatelliteProvider.Services.RegionProcessing; + +namespace SatelliteProvider.Tests; + +public class RegionFailureClassifierTests +{ + [Fact] + public void Classify_TaskCanceled_WhenTimeoutFired_ReturnsTimeout() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + timeoutCts.Cancel(); + var external = new CancellationToken(); + var ex = new TaskCanceledException("timed out"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, external); + + // Assert + result.Category.Should().Be(RegionFailureCategory.Timeout); + result.ErrorMessage.Should().Be(RegionFailureClassifier.TimeoutMessage); + } + + [Fact] + public void Classify_TaskCanceled_WhenExternalTokenCancelled_ReturnsExternalCancellation() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var externalCts = new CancellationTokenSource(); + externalCts.Cancel(); + var ex = new TaskCanceledException("shutdown"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, externalCts.Token); + + // Assert + result.Category.Should().Be(RegionFailureCategory.ExternalCancellation); + result.ErrorMessage.Should().Be(RegionFailureClassifier.ExternalCancellationMessage); + } + + [Fact] + public void Classify_TaskCanceled_WithNoTokenCancelled_ReturnsTaskCanceledOther() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var ex = new TaskCanceledException("inner http timeout"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.TaskCanceledOther); + result.ErrorMessage.Should().Contain("inner http timeout"); + result.ErrorMessage.Should().Contain("HttpClient timeout"); + } + + [Fact] + public void Classify_OperationCanceled_WhenTimeoutFired_ReturnsTimeout() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + timeoutCts.Cancel(); + var ex = new OperationCanceledException("op cancelled"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.Timeout); + result.ErrorMessage.Should().Be(RegionFailureClassifier.TimeoutMessage); + } + + [Fact] + public void Classify_OperationCanceled_WithoutTimeout_ReturnsOperationCanceledOther() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var ex = new OperationCanceledException("manual abort"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.OperationCanceledOther); + result.ErrorMessage.Should().Contain("manual abort"); + } + + [Fact] + public void Classify_RateLimitException_ReturnsRateLimit() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var ex = new RateLimitException("retries exhausted"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.RateLimit); + result.ErrorMessage.Should().Contain("retries exhausted"); + result.ErrorMessage.Should().Contain("Rate limit exceeded"); + } + + [Fact] + public void Classify_HttpRequestException_ReturnsNetwork_WithStatusCode() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var ex = new HttpRequestException("server down", inner: null, statusCode: HttpStatusCode.BadGateway); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.Network); + result.ErrorMessage.Should().Contain("BadGateway"); + result.ErrorMessage.Should().Contain("server down"); + } + + [Fact] + public void Classify_UnknownException_ReturnsUnexpected() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + var ex = new InvalidOperationException("boom"); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, CancellationToken.None); + + // Assert + result.Category.Should().Be(RegionFailureCategory.Unexpected); + result.ErrorMessage.Should().Contain("InvalidOperationException"); + result.ErrorMessage.Should().Contain("boom"); + } + + [Fact] + public void Classify_TimeoutTakesPrecedenceOverExternalCancellation() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + timeoutCts.Cancel(); + var externalCts = new CancellationTokenSource(); + externalCts.Cancel(); + var ex = new TaskCanceledException(); + + // Act + var result = RegionFailureClassifier.Classify(ex, timeoutCts, externalCts.Token); + + // Assert + result.Category.Should().Be(RegionFailureCategory.Timeout); + } + + [Fact] + public void Classify_NullException_Throws() + { + // Arrange + var timeoutCts = new CancellationTokenSource(); + + // Act + Action act = () => RegionFailureClassifier.Classify(null!, timeoutCts, CancellationToken.None); + + // Assert + act.Should().Throw(); + } +} diff --git a/_docs/02_tasks/_dependencies_table.md b/_docs/02_tasks/_dependencies_table.md index c33e80d..b0e21eb 100644 --- a/_docs/02_tasks/_dependencies_table.md +++ b/_docs/02_tasks/_dependencies_table.md @@ -36,7 +36,7 @@ Roadmap: `_docs/04_refactoring/03-code-quality-refactoring/analysis/refactoring_ | AZ-356 | C05 | Stub endpoints return 501 | 1 | — | 2 | Done (In Testing) | | AZ-354 | C04 | Strict CORS by default | 1 | — | 2 | Done (In Testing) | | AZ-353 | C03 | Sanitize 5xx responses via IExceptionHandler | 1 | — | 3 | Done (In Testing) | -| AZ-359 | C07 | Consolidate RegionService catch ladder | 2 | — | 3 | To Do | +| AZ-359 | C07 | Consolidate RegionService catch ladder | 2 | — | 3 | Done (In Testing) | | AZ-357 | C06 | Drop tile Version concept; new migration | 2 | — | 5 | To Do | | AZ-362 | C09 | Idempotent POST contract | 2 | AZ-353 | 3 | To Do | | AZ-366 | C13 | Consolidate Haversine + filename parser | 3 | — | 2 | To Do | diff --git a/_docs/02_tasks/todo/AZ-359_refactor_consolidate_region_catch_ladder.md b/_docs/02_tasks/done/AZ-359_refactor_consolidate_region_catch_ladder.md similarity index 100% rename from _docs/02_tasks/todo/AZ-359_refactor_consolidate_region_catch_ladder.md rename to _docs/02_tasks/done/AZ-359_refactor_consolidate_region_catch_ladder.md diff --git a/_docs/03_implementation/batch_09_report.md b/_docs/03_implementation/batch_09_report.md new file mode 100644 index 0000000..1878168 --- /dev/null +++ b/_docs/03_implementation/batch_09_report.md @@ -0,0 +1,60 @@ +# Batch 09 Report — Refactor 03 Phase 2 (start) + +Date: 2026-05-10 +Epic: AZ-350 (03-code-quality-refactoring) +Status: ✅ Complete, pushed + +## Scope (1 task / 3 SP) + +| ID | C-ID | Title | Points | Component | +|----|------|-------|--------|-----------| +| AZ-359 | C07 | Consolidate 9-way catch ladder in `RegionService.ProcessRegionAsync` | 3 | Services.RegionProcessing | + +Batch intentionally scoped to 1 task to (a) keep the turn manageable, (b) align with the K=3 cumulative review trigger that fires after batch 9. + +## Changes + +### Production +- **NEW** `SatelliteProvider.Services.RegionProcessing/RegionFailureClassifier.cs` + - `RegionFailureCategory` enum (Timeout, ExternalCancellation, TaskCanceledOther, OperationCanceledOther, RateLimit, Network, Unexpected) + - `RegionFailureClassification` record (Category, ErrorMessage) + - Static pure-function `Classify(ex, timeoutCts, externalCt)` — preserves all original error-message wording so downstream summary files do not change. + - `internal` visibility; exposed to `SatelliteProvider.Tests` via `InternalsVisibleTo`. +- **MODIFIED** `SatelliteProvider.Services.RegionProcessing/RegionService.cs` + - Replaced 9 `catch` blocks (~50 LOC) with a single `catch (Exception ex)` (~10 LOC) that delegates to `RegionFailureClassifier.Classify` and logs a single structured warning with `category` and `errorMessage` properties. Failure-path call into `HandleProcessingFailureAsync` unchanged. + - Net file delta: −38 lines. +- **MODIFIED** `SatelliteProvider.Services.RegionProcessing.csproj` + - Added `` for unit-test access. + +### Tests +- **NEW** `SatelliteProvider.Tests/RegionFailureClassifierTests.cs` (10 tests) + - One per category (7) + precedence test (timeout > external cancel) + null-guard + status-code propagation in network case. + - All ACs from `AZ-359_refactor_consolidate_region_catch_ladder.md` covered: + - AC-1: each original case maps to the same human message — verified by string assertions on `ErrorMessage`. + - AC-2: timeout precedence preserved — `Classify_TimeoutTakesPrecedenceOverExternalCancellation`. + - AC-3: HTTP status code surfaced — `Classify_HttpRequestException_ReturnsNetwork_WithStatusCode`. + - AC-4: unknown exception falls through to `Unexpected` — `Classify_UnknownException_ReturnsUnexpected`. + +## Verification + +- Unit tests: **68 passing** (was 58 → +10 new). +- Integration suite: full Docker run (api + db + integration-tests) → all green, container exits 0. +- API container builds clean → no compile errors in modified service. + +## Behavior preservation + +Observable failure path is identical: +- Same error messages stored in `region.SummaryFilePath`. +- Same `region.Status = "failed"` transition via `HandleProcessingFailureAsync`. +- Same exception types caught (no new swallowing, no narrowed coverage). +- Logging changes: 9 different log strings replaced with 1 structured log carrying `Category` as a property — net richer for log-aggregator queries while reducing source-level duplication. + +## Commit + +`[AZ-359] Refactor C07: collapse RegionService catch ladder via RegionFailureClassifier` — pushed to `dev`. + +## Up next + +- **Batch 10**: AZ-357 (drop tile Version + migration, 5 SP) — DB migration + DataAccess + TileService + tests. +- **Batch 11**: AZ-362 (idempotent POST contract, 3 SP) — Api + RegionProcessing + RouteManagement. +- **Cumulative review fires after this batch (K=3 trigger: batches 7, 8, 9 complete)** — should be the next non-implementation step. diff --git a/_docs/_autodev_state.md b/_docs/_autodev_state.md index 5f8d892..2689660 100644 --- a/_docs/_autodev_state.md +++ b/_docs/_autodev_state.md @@ -8,7 +8,7 @@ status: in_progress sub_step: phase: 4 name: execution - detail: "RUN_DIR=03-code-quality-refactoring; epic AZ-350; batches 7+8 done (Phase 1 complete: 6 tasks/12 SP); 21 tickets/~54 SP remaining; next batch 9 = AZ-359/357/362 (Phase 2 correctness, 11 SP)" + detail: "RUN_DIR=03-code-quality-refactoring; epic AZ-350; batches 7+8+9 done (Phase 1 complete + AZ-359; 7 tasks/15 SP); 20 tickets/~51 SP remaining; cumulative review due (K=3 trigger fired); after review, next batch 10 = AZ-357 (drop tile Version + migration, 5 SP), then batch 11 = AZ-362 (idempotent POST, 3 SP)" retry_count: 0 cycle: 1 tracker: jira