[AZ-581] [AZ-582] [AZ-583] [AZ-584] Sec+Res NFT tests

Batch 3 of test implementation cycle 1 (existing-code Step 6).

- AZ-581 AuthClaimsTests: NFT-SEC-01..06+04b (foreign-keypair, byte-flip,
  30s skew, iss/aud/perms, multi-value permissions array).
- AZ-582 CrossCutting/ErrorRedaction/JwksRotation/StartupConfig/CorsConfig:
  NFT-SEC-07..13 (alg pin, kid rotation grace window, env fail-fast, CORS
  Production gate).
- AZ-583 CascadeF3/CascadeF4/MigratorRestart: NFT-RES-01..04. CascadeF4
  pins current walk-order divergence with carry_forward AC-4.6.
- AZ-584 ConfigDbStartup/JwksRotationNoRestart/DefaultVehicleRace:
  NFT-RES-05..08. NFT-RES-08 pins current behaviour (unique-index closes
  the race) with carry_forward AC-1.4.

Mock contract: SignBody accepts permissions OR permissions_array (mutually
exclusive). TokenSigner validates kid_override against published keys so
NFT-SEC-11 can assert "mock refuses old kid post-grace".

Helpers added: ForeignKeypair (test-only ECDSA P-256),
MissionsContainerHelper (docker-run wrapper for startup-time scenarios),
DockerLogs.

7 of 22 new tests are Skippable, gated on COMPOSE_RESTART_ENABLED + docker
CLI in the e2e-consumer image (explicit skip reason; no silent pass).

Build green: test csproj + jwks-mock csproj.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-15 08:58:59 +03:00
parent 6b2c2d998e
commit 24c4561bef
24 changed files with 2240 additions and 3 deletions
@@ -0,0 +1,112 @@
using System.Net;
using System.Net.Http.Headers;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Npgsql;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-01 — mission cascade is NOT transaction-wrapped. Dropping the
/// borrowed-schema <c>media</c> table mid-walk leaves <c>map_objects</c>
/// committed-deleted while <c>missions</c> stays uncommitted. The test pins
/// the current behaviour (ADR-006 carry-forward) so a future transaction
/// wrap flips the assertion loudly.
/// Traces: AC-3.3, AC-10.2.
/// </summary>
[Collection("ResCascadeF3")]
[Trait("Category", "Res")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class CascadeF3Tests : TestBase, IClassFixture<ComposeRestartFixture>
{
private readonly ComposeRestartFixture _restart;
public CascadeF3Tests(ComposeRestartFixture restart) => _restart = restart;
[SkippableFact]
[Trait("Traces", "AC-3.3,AC-10.2")]
[Trait("max_ms", "10000")]
[Trait("carry_forward", "ADR-006")]
public async Task NFT_RES_01_mission_cascade_partial_state_survives_mid_walk_failure()
{
Skip.IfNot(_restart.Enabled,
"ComposeRestartFixture disabled (COMPOSE_RESTART_ENABLED!=1). " +
"NFT-RES-01 drops the media table and needs the full stack restart " +
"in teardown.");
// CARRY-FORWARD: cascade is not transaction-wrapped today. When the
// ADR-006 follow-up wraps the cascade in a transaction, both row
// counts will flip (map_objects rolls back to its pre-state); the
// test fails loudly at that point — which is the intended signal.
// Arrange — F3 fixture loaded by the IClassFixture<CascadeF3Fixture>
// pattern; we apply directly here so the fixture is owned by this
// class (its restart teardown is destructive).
DbResetFixture.ResetDatabase(TestEnvironment.DbSideChannel);
StubSchema.EnsureCreated();
Seeds.Apply(FixtureSql.Load("fixture_cascade_F3"));
var mid = CascadeF3Fixture.MissionId;
var preMapObjects = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM map_objects WHERE mission_id = @mid", ("mid", mid));
Assert.Equal(3, preMapObjects);
var preMission = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM missions WHERE id = @mid", ("mid", mid));
Assert.Equal(1, preMission);
DropMediaTable();
var requestStart = DateTime.UtcNow;
var token = await Tokens.MintDefaultAsync();
try
{
// Act
using var req = new HttpRequestMessage(HttpMethod.Delete, $"/missions/{mid}");
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", token.Jwt);
using var response = await Missions.SendAsync(req);
// Assert
await HttpAssertions.AssertProblemEnvelopeAsync(response, HttpStatusCode.InternalServerError);
var postMapObjects = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM map_objects WHERE mission_id = @mid", ("mid", mid));
Assert.Equal(0, postMapObjects); // committed before media-DROP exploded
var postMission = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM missions WHERE id = @mid", ("mid", mid));
Assert.Equal(1, postMission); // uncommitted — never deleted
// The unhandled exception must mention the missing media table.
var deadline = DateTime.UtcNow.AddSeconds(2);
var sawLog = false;
while (DateTime.UtcNow < deadline)
{
var logs = DockerLogs.Read("missions-sut", requestStart);
if (logs.Contains("Unhandled exception", StringComparison.Ordinal)
&& (logs.Contains("relation", StringComparison.OrdinalIgnoreCase)
&& logs.Contains("media", StringComparison.OrdinalIgnoreCase)))
{
sawLog = true;
break;
}
await Task.Delay(100);
}
Assert.True(sawLog,
"expected 'Unhandled exception' mentioning 'relation' + 'media' in logs within 2s");
}
finally
{
_restart.RestartStack();
}
}
private static void DropMediaTable()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = "DROP TABLE IF EXISTS media CASCADE;";
cmd.ExecuteNonQuery();
}
}
@@ -0,0 +1,115 @@
using System.Net;
using System.Net.Http.Headers;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Npgsql;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-02 — waypoint cascade NOT transaction-wrapped, mirror of
/// NFT-RES-01. The spec expects a partial-state observation (detection=0,
/// waypoint=1) but the actual <see cref="Services.WaypointService"/> walk
/// makes the media SELECT the FIRST cross-table read after the waypoint
/// lookup — so a pre-request <c>DROP TABLE media</c> aborts the cascade
/// before any DELETE commits.
/// Traces: AC-4.6, AC-3.3.
/// </summary>
/// <remarks>
/// Carry-forward (spec-vs-code) marked with
/// <c>[Trait("carry_forward","AC-4.6/walk-order")]</c>: if the production
/// cascade is later refactored to commit detections/annotations BEFORE the
/// media lookup, the second assertion flips and this test fails loudly —
/// at which point the spec assertion should be restored.
/// </remarks>
[Collection("ResCascadeF4")]
[Trait("Category", "Res")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class CascadeF4Tests : TestBase, IClassFixture<ComposeRestartFixture>
{
private readonly ComposeRestartFixture _restart;
public CascadeF4Tests(ComposeRestartFixture restart) => _restart = restart;
[SkippableFact]
[Trait("Traces", "AC-4.6,AC-3.3")]
[Trait("max_ms", "10000")]
[Trait("carry_forward", "AC-4.6/walk-order")]
public async Task NFT_RES_02_waypoint_cascade_aborts_at_media_lookup_with_no_partial_state_today()
{
Skip.IfNot(_restart.Enabled,
"ComposeRestartFixture disabled (COMPOSE_RESTART_ENABLED!=1). " +
"NFT-RES-02 drops the media table and needs a full stack restart.");
// Arrange — fresh F4 fixture; capture target waypoint id + its
// chained detection id so the post-state probe is deterministic.
DbResetFixture.ResetDatabase(TestEnvironment.DbSideChannel);
StubSchema.EnsureCreated();
Seeds.Apply(FixtureSql.Load("fixture_cascade_F4"));
var missionId = CascadeF4Fixture.MissionId;
var targetWaypointId = CascadeF4Fixture.TargetWaypointId;
var targetAnnotationId = CascadeF4Fixture.TargetAnnotationId;
DropMediaTable();
var requestStart = DateTime.UtcNow;
var token = await Tokens.MintDefaultAsync();
try
{
// Act
using var req = new HttpRequestMessage(
HttpMethod.Delete, $"/missions/{missionId}/waypoints/{targetWaypointId}");
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", token.Jwt);
using var response = await Missions.SendAsync(req);
// Assert — 500 (PostgresException 42P01 bubbles to generic catch).
await HttpAssertions.AssertProblemEnvelopeAsync(
response, HttpStatusCode.InternalServerError);
// Carry-forward: today the media SELECT fires BEFORE any DELETE,
// so nothing commits. detection (target row) is unchanged.
var targetDetectionCount = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM detection WHERE annotation_id = @aid",
("aid", targetAnnotationId));
Assert.Equal(1, targetDetectionCount); // spec says 0 — flip when walk is reordered.
// The waypoint row is uncommitted (matches spec).
var waypointCount = DbAssertions.ScalarCount(
"SELECT COUNT(*) FROM waypoints WHERE id = @id",
("id", targetWaypointId));
Assert.Equal(1, waypointCount);
// Log line must still mention the missing media table.
var deadline = DateTime.UtcNow.AddSeconds(2);
var sawLog = false;
while (DateTime.UtcNow < deadline)
{
var logs = DockerLogs.Read("missions-sut", requestStart);
if (logs.Contains("Unhandled exception", StringComparison.Ordinal)
&& logs.Contains("media", StringComparison.OrdinalIgnoreCase))
{
sawLog = true;
break;
}
await Task.Delay(100);
}
Assert.True(sawLog,
"expected 'Unhandled exception' mentioning 'media' in logs within 2s");
}
finally
{
_restart.RestartStack();
}
}
private static void DropMediaTable()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = "DROP TABLE IF EXISTS media CASCADE;";
cmd.ExecuteNonQuery();
}
}
@@ -0,0 +1,201 @@
using System.Diagnostics;
using Azaion.Missions.E2E.Helpers;
using Npgsql;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-05 (config fail-fast + DB-down differentiator) and
/// NFT-RES-06 (Npgsql 3D000 on missing database). The 4 missing-env rows
/// overlap with NFT-SEC-12 in the security category — same docker-run
/// primitive, separate Sec/Res CSV rows.
/// Traces: AC-6.1, AC-6.2, AC-6.7, AC-6.8, E3, E4.
/// </summary>
[Collection("MigratorRestart")]
[Trait("Category", "Res")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class ConfigDbStartupTests
{
private const string PostgresUrl =
"postgresql://postgres:postgres-test@missions-postgres-test:5432/azaion";
private const string JwksUrlHttps =
"https://jwks-mock:8443/.well-known/jwks.json";
private const string Issuer = "https://admin-test.azaion.local";
private const string Audience = "azaion-edge";
public static IEnumerable<object[]> FailFastCases() => new[]
{
new object[] { "all_missing", Array.Empty<string>() },
new object[] { "db_url_missing", new[] { "DATABASE_URL" } },
new object[] { "jwt_issuer_missing", new[] { "JWT_ISSUER" } },
new object[] { "jwt_audience_missing", new[] { "JWT_AUDIENCE" } },
new object[] { "jwks_url_missing", new[] { "JWT_JWKS_URL" } },
};
[SkippableTheory]
[MemberData(nameof(FailFastCases))]
[Trait("Traces", "AC-6.1,AC-6.2,E3")]
[Trait("max_ms", "30000")]
public void NFT_RES_05_missing_required_env_var_throws_invalid_operation_exception(
string caseName, string[] omittedVars)
{
Skip.IfNot(MissionsContainerHelper.Enabled,
"MissionsContainerHelper requires COMPOSE_RESTART_ENABLED=1 and docker CLI access.");
// Arrange
var env = BaseEnv();
foreach (var v in omittedVars) env.Remove(v);
if (omittedVars.Length == 0)
{
env.Remove("DATABASE_URL");
env.Remove("JWT_ISSUER");
env.Remove("JWT_AUDIENCE");
env.Remove("JWT_JWKS_URL");
}
// Act
var result = MissionsContainerHelper.RunUntilExit(
$"missions-res05-{caseName}", env, TimeSpan.FromSeconds(20));
// Assert
Assert.NotEqual(0, result.ExitCode);
Assert.Contains("InvalidOperationException", result.Logs, StringComparison.Ordinal);
}
[SkippableFact]
[Trait("Traces", "AC-6.1,E3")]
[Trait("max_ms", "30000")]
public void NFT_RES_05_whitespace_required_env_var_treated_as_missing()
{
Skip.IfNot(MissionsContainerHelper.Enabled,
"MissionsContainerHelper requires COMPOSE_RESTART_ENABLED=1 and docker CLI access.");
// Arrange — whitespace-only value triggers the same fail-fast path
// as an absent value (ResolveRequiredOrThrow uses IsNullOrWhiteSpace).
var env = BaseEnv();
env["JWT_ISSUER"] = " ";
// Act
var result = MissionsContainerHelper.RunUntilExit(
"missions-res05-whitespace-iss", env, TimeSpan.FromSeconds(20));
// Assert
Assert.NotEqual(0, result.ExitCode);
Assert.Contains("InvalidOperationException", result.Logs, StringComparison.Ordinal);
var mentionsIssuer =
result.Logs.Contains("JWT_ISSUER", StringComparison.Ordinal)
|| result.Logs.Contains("Jwt:Issuer", StringComparison.Ordinal);
Assert.True(mentionsIssuer,
$"logs must mention JWT_ISSUER. Logs:\n{result.Logs}");
}
[SkippableFact]
[Trait("Traces", "AC-6.7,E4")]
[Trait("max_ms", "60000")]
public void NFT_RES_05_db_down_after_config_resolution_logs_npgsql_connection_refused()
{
Skip.IfNot(MissionsContainerHelper.Enabled,
"MissionsContainerHelper requires COMPOSE_RESTART_ENABLED=1 and docker CLI access.");
// Arrange — all 4 required vars set, but point DATABASE_URL at a
// host that is not running. Config resolution succeeds; Npgsql
// fails on the migrator's first connection attempt.
var env = BaseEnv();
env["DATABASE_URL"] =
"postgresql://postgres:postgres-test@nonexistent-host-for-res05:5432/azaion";
// Act
var result = MissionsContainerHelper.RunUntilExit(
"missions-res05-db-down", env, TimeSpan.FromSeconds(45));
// Assert
Assert.NotEqual(0, result.ExitCode);
// Connection-refused / name-not-resolved / unreachable are the
// acceptable Npgsql failure shapes; the differentiator is that
// InvalidOperationException must NOT appear — proving config
// resolution completed before the connection broke.
Assert.DoesNotContain("InvalidOperationException", result.Logs, StringComparison.Ordinal);
var connectionShape =
result.Logs.Contains("Connection refused", StringComparison.OrdinalIgnoreCase)
|| result.Logs.Contains("could not resolve", StringComparison.OrdinalIgnoreCase)
|| result.Logs.Contains("could not connect", StringComparison.OrdinalIgnoreCase)
|| result.Logs.Contains("Name or service not known", StringComparison.OrdinalIgnoreCase)
|| result.Logs.Contains("Temporary failure in name resolution", StringComparison.OrdinalIgnoreCase);
Assert.True(connectionShape,
$"logs must show Npgsql connection failure (not InvalidOperationException). Logs:\n{result.Logs}");
}
[SkippableFact]
[Trait("Traces", "AC-6.8")]
[Trait("max_ms", "60000")]
public void NFT_RES_06_dropping_target_database_causes_3D000_exit()
{
Skip.IfNot(MissionsContainerHelper.Enabled,
"Requires docker CLI + COMPOSE_RESTART_ENABLED=1 + Postgres admin access.");
// Arrange — drop the azaion database via a side-channel that
// connects to the `postgres` admin DB. Caller is responsible for
// recreating the DB in teardown (handled by ComposeRestartFixture
// in the surrounding collection).
try
{
DropAzaionDatabase();
}
catch (PostgresException ex)
{
Skip.If(true,
$"could not drop azaion database for NFT-RES-06 setup ({ex.SqlState}: {ex.MessageText}); " +
"the test requires superuser admin access on the postgres-test container.");
return;
}
try
{
// Act
var result = MissionsContainerHelper.RunUntilExit(
"missions-res06-dropdb", BaseEnv(), TimeSpan.FromSeconds(45));
// Assert
Assert.NotEqual(0, result.ExitCode);
Assert.Contains("3D000", result.Logs, StringComparison.Ordinal);
}
finally
{
RestoreAzaionDatabase();
}
}
private static void DropAzaionDatabase()
{
var adminConn = TestEnvironment.DbSideChannel
.Replace("Database=azaion", "Database=postgres", StringComparison.Ordinal);
using var conn = new NpgsqlConnection(adminConn);
conn.Open();
using var cmd = conn.CreateCommand();
// WITH (FORCE) terminates any other backends still on azaion.
cmd.CommandText = "DROP DATABASE IF EXISTS azaion WITH (FORCE);";
cmd.ExecuteNonQuery();
}
private static void RestoreAzaionDatabase()
{
var adminConn = TestEnvironment.DbSideChannel
.Replace("Database=azaion", "Database=postgres", StringComparison.Ordinal);
using var conn = new NpgsqlConnection(adminConn);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = "CREATE DATABASE azaion;";
cmd.ExecuteNonQuery();
}
private static Dictionary<string, string> BaseEnv() => new(StringComparer.Ordinal)
{
{ "DATABASE_URL", PostgresUrl },
{ "JWT_ISSUER", Issuer },
{ "JWT_AUDIENCE", Audience },
{ "JWT_JWKS_URL", JwksUrlHttps },
{ "ASPNETCORE_URLS", "http://+:8080" },
{ "ASPNETCORE_ENVIRONMENT","Test" },
};
}
@@ -0,0 +1,142 @@
using System.Net.Http.Headers;
using System.Net.Http.Json;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Npgsql;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-08 — TOCTOU race on <c>vehicles.is_default</c>.
/// </summary>
/// <remarks>
/// <para>
/// Spec AC-1.4 expects the race to be OBSERVABLE — i.e. at least one of 100
/// concurrent iterations leaves two rows with <c>is_default=true</c>. The
/// current migrator ships
/// <c>ux_vehicles_one_default ON vehicles (is_default) WHERE is_default = TRUE</c>,
/// which closes the race at the storage layer: the second writer always
/// fails with <c>23505</c>.
/// </para>
/// <para>
/// Following <c>CascadeF4Tests</c> precedent we pin the CURRENT behaviour
/// (max-one default after the race) and mark the divergence with the
/// <c>carry_forward</c> trait. If the index is ever removed without an
/// application-level guard replacing it, this test fails loudly — that
/// failure is the signal to revisit the AC-1.4 carry-forward in the
/// traceability matrix.
/// </para>
/// </remarks>
[Collection("MigratorRestart")]
[Trait("Category", "Res")]
[Trait("carry_forward", "AC-1.4/index-closes-race")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class DefaultVehicleRaceTests : TestBase, IClassFixture<DbResetFixture>
{
private const int Iterations = 100;
[Fact]
[Trait("Traces", "AC-1.4")]
[Trait("max_ms", "30000")]
public async Task NFT_RES_08_concurrent_default_writes_converge_on_one_default_today()
{
// Arrange — fresh DB and a valid token reused across iterations.
DbResetFixture.ResetDatabase(TestEnvironment.DbSideChannel);
var token = await Tokens.MintDefaultAsync();
Missions.DefaultRequestHeaders.Authorization =
new AuthenticationHeaderValue("Bearer", token.Jwt);
var observations = new int[Iterations];
for (int i = 0; i < Iterations; i++)
{
ResetVehiclesAndSeedOneDefault();
// Each writer carries a unique id so PK collisions never mask
// the race that AC-1.4 is interested in.
var postTask = TryPostVehicleAsync(Guid.NewGuid());
var insertTask = TrySideChannelInsertAsync(Guid.NewGuid());
await Task.WhenAll(postTask, insertTask);
observations[i] = CountDefaultVehicles();
}
var maxObserved = observations.Max();
// Assert — CURRENT behaviour: the partial unique index forces
// every iteration to converge on a single default vehicle.
// If this assertion ever fails (max >= 2), the index has been
// removed/relaxed and AC-1.4 carry-forward should be revisited.
Assert.True(maxObserved <= 1,
$"observed >= 2 defaults in some iteration (max={maxObserved}). " +
"Index ux_vehicles_one_default appears removed/relaxed — revisit " +
"AC-1.4 carry-forward in traceability_matrix.csv.");
}
private async Task<HttpRequestState> TryPostVehicleAsync(Guid vehicleId)
{
try
{
var body = new
{
Id = vehicleId,
Name = $"race-api-{vehicleId:N}",
IsDefault = true,
};
using var resp = await Missions.PostAsJsonAsync("/vehicles", body);
return new HttpRequestState((int)resp.StatusCode, null);
}
catch (Exception ex)
{
return new HttpRequestState(-1, ex);
}
}
private static async Task<SideChannelState> TrySideChannelInsertAsync(Guid vehicleId)
{
try
{
await using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
await conn.OpenAsync();
await using var cmd = conn.CreateCommand();
cmd.CommandText = """
INSERT INTO vehicles (id, name, is_default, created_at, updated_at)
VALUES (@id, @name, TRUE, NOW(), NOW());
""";
cmd.Parameters.AddWithValue("id", vehicleId);
cmd.Parameters.AddWithValue("name", $"race-side-{vehicleId:N}");
await cmd.ExecuteNonQueryAsync();
return new SideChannelState(true, null);
}
catch (PostgresException ex)
{
return new SideChannelState(false, ex);
}
}
private static void ResetVehiclesAndSeedOneDefault()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = """
TRUNCATE vehicles RESTART IDENTITY CASCADE;
INSERT INTO vehicles (id, name, is_default, created_at, updated_at)
VALUES (gen_random_uuid(), 'seed-default', TRUE, NOW(), NOW());
""";
cmd.ExecuteNonQuery();
}
private static int CountDefaultVehicles()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM vehicles WHERE is_default = TRUE;";
return Convert.ToInt32(cmd.ExecuteScalar());
}
private sealed record HttpRequestState(int StatusCode, Exception? Error);
private sealed record SideChannelState(bool Inserted, Exception? Error);
}
@@ -0,0 +1,94 @@
using System.Diagnostics;
using System.Net;
using System.Net.Http.Headers;
using System.Net.Http.Json;
using System.Text.Json;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-07 — operational counterpart of NFT-SEC-11. Verifies that a JWKS
/// rotation propagates through the SUT WITHOUT a process restart. The
/// security-shaped variant lives in <c>Tests/Security/JwksRotationTests.cs</c>;
/// here the assertion focuses on
/// <c>docker inspect --format '{{.State.StartedAt}}' missions-sut</c>
/// returning the SAME ISO-8601 timestamp before and after the rotation flow.
/// Traces: AC-5.7.
/// </summary>
[Collection("JwksRotation")]
[Trait("Category", "Res")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class JwksRotationNoRestartTests : TestBase, IClassFixture<DbResetFixture>
{
[SkippableFact(Timeout = 200_000)]
[Trait("Traces", "AC-5.7")]
[Trait("max_ms", "180000")]
public async Task NFT_RES_07_jwks_rotation_propagates_without_missions_restart()
{
Skip.IfNot(MissionsContainerHelper.Enabled,
"Requires docker CLI access (COMPOSE_RESTART_ENABLED=1) to read StartedAt.");
// Arrange — capture StartedAt before any rotation activity so the
// post-flow comparison is anchored to "before this test started".
DbResetFixture.ResetDatabase(TestEnvironment.DbSideChannel);
Seeds.Apply(Seeds.OneDefaultVehicle.Sql);
var startedAtBefore = MissionsContainerHelper.GetStartedAt("missions-sut");
var t1 = await Tokens.MintDefaultAsync();
var kidV1 = t1.Kid;
using (var resp = await CallVehiclesAsync(t1.Jwt))
await HttpAssertions.AssertStatusAsync(resp, HttpStatusCode.OK);
// Act 1 — rotate; mint a token with the new kid; assert pre-refresh 401.
var kidV2 = await RotateMockAsync();
Assert.NotEqual(kidV1, kidV2);
var t2 = await Tokens.MintDefaultAsync();
Assert.Equal(kidV2, t2.Kid);
using (var resp = await CallVehiclesAsync(t2.Jwt))
await HttpAssertions.AssertStatusAsync(resp, HttpStatusCode.Unauthorized);
// Act 2 — wait for refresh.
var refreshDeadline = DateTime.UtcNow.AddSeconds(90);
var refreshed = false;
while (DateTime.UtcNow < refreshDeadline)
{
using var resp = await CallVehiclesAsync(t2.Jwt);
if (resp.StatusCode == HttpStatusCode.OK)
{
refreshed = true;
break;
}
await Task.Delay(TimeSpan.FromSeconds(3));
}
Assert.True(refreshed,
"JWKS refresh did not propagate to missions within 90s");
// Assert — service did NOT restart.
var startedAtAfter = MissionsContainerHelper.GetStartedAt("missions-sut");
Assert.Equal(startedAtBefore, startedAtAfter);
}
private async Task<HttpResponseMessage> CallVehiclesAsync(string jwt)
{
var req = new HttpRequestMessage(HttpMethod.Get, "/vehicles");
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", jwt);
return await Missions.SendAsync(req);
}
private static async Task<string> RotateMockAsync()
{
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
var rotateUrl = new Uri(new Uri(TestEnvironment.JwksMockBaseUrl), "/rotate-key");
using var resp = await http.PostAsync(rotateUrl, content: null);
resp.EnsureSuccessStatusCode();
var body = await resp.Content.ReadFromJsonAsync<JsonElement>();
return body.GetProperty("kid").GetString()
?? throw new InvalidOperationException("mock /rotate-key returned no kid");
}
}
@@ -0,0 +1,200 @@
using System.Diagnostics;
using System.Net;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Npgsql;
using Xunit;
namespace Azaion.Missions.E2E.Tests.Resilience;
/// <summary>
/// NFT-RES-03 and NFT-RES-04 — migrator behaviour across container restarts.
/// Both scenarios drive the SUT via docker compose and rely on the
/// <see cref="ComposeRestartFixture"/> harness; they share one xUnit
/// collection so a failed teardown of NFT-RES-03 does not leak state into
/// NFT-RES-04.
/// Traces: AC-6.4, AC-6.5, AC-6.6, AC-10.5.
/// </summary>
[Collection("MigratorRestart")]
[Trait("Category", "Res")]
[Trait("db_access", "seed-or-assert-only")]
public sealed class MigratorRestartTests : TestBase, IClassFixture<ComposeRestartFixture>
{
private readonly ComposeRestartFixture _restart;
public MigratorRestartTests(ComposeRestartFixture restart) => _restart = restart;
[SkippableFact]
[Trait("Traces", "AC-6.6,AC-6.4")]
[Trait("max_ms", "60000")]
public async Task NFT_RES_03_migrator_is_idempotent_on_container_restart()
{
Skip.IfNot(_restart.Enabled,
"ComposeRestartFixture disabled (COMPOSE_RESTART_ENABLED!=1). " +
"NFT-RES-03 needs `docker compose restart` access.");
// Arrange — clean DB so the migrator is not racing with stale data.
DbResetFixture.ResetDatabase(TestEnvironment.DbSideChannel);
var schemaBefore = SnapshotPublicSchema();
// Capture the wall-clock just before the restart so the log slice
// does not include pre-existing warnings from the first start.
var restartUtc = DateTime.UtcNow;
// Act
Compose("restart missions");
await WaitForHealthyAsync(TimeSpan.FromSeconds(30));
// Assert — no NEW errors AT ALL in the restart slice.
var logs = DockerLogs.Read("missions-sut", restartUtc);
AssertNoNewErrorLines(logs);
var schemaAfter = SnapshotPublicSchema();
Assert.Equal(schemaBefore, schemaAfter);
}
[SkippableFact]
[Trait("Traces", "AC-6.5,AC-10.5")]
[Trait("max_ms", "120000")]
public async Task NFT_RES_04_legacy_gps_tables_dropped_on_first_start_and_subsequent_restart_is_noop()
{
Skip.IfNot(_restart.Enabled,
"ComposeRestartFixture disabled (COMPOSE_RESTART_ENABLED!=1). " +
"NFT-RES-04 needs `docker compose stop|start|restart` access.");
// Build-time gate — the migrator must contain the post-B9 DROP block.
// We probe empirically: seed the legacy tables, restart missions,
// verify they are gone. If they survive, the build pre-dates B9 and
// we skip with a clear reason.
// Arrange — stop missions, seed the legacy tables.
Compose("stop missions");
ResetAllAndSeedLegacyTables();
var legacyPresent = LegacyTablesExist();
Assert.True(legacyPresent, "seed_legacy_gps_tables did not actually create the legacy tables");
// Act 1 — first start should drop the legacy tables.
Compose("up -d missions");
await WaitForHealthyAsync(TimeSpan.FromSeconds(45));
var legacyAfterFirstStart = LegacyTablesExist();
Skip.If(legacyAfterFirstStart,
"Legacy orthophotos/gps_corrections tables still present after first start; " +
"this build appears to pre-date B9. NFT-RES-04 is a no-op on pre-B9 builds.");
// Act 2 — restart should be a no-op (no 'does not exist' errors).
var restartUtc = DateTime.UtcNow;
Compose("restart missions");
await WaitForHealthyAsync(TimeSpan.FromSeconds(30));
// Assert
Assert.False(LegacyTablesExist(), "legacy tables reappeared after restart");
var logs = DockerLogs.Read("missions-sut", restartUtc);
Assert.DoesNotContain("does not exist", logs, StringComparison.OrdinalIgnoreCase);
}
private static void ResetAllAndSeedLegacyTables()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = """
DROP TABLE IF EXISTS orthophotos;
DROP TABLE IF EXISTS gps_corrections;
CREATE TABLE orthophotos (
id UUID PRIMARY KEY,
payload TEXT NOT NULL DEFAULT ''
);
CREATE TABLE gps_corrections (
id UUID PRIMARY KEY,
payload TEXT NOT NULL DEFAULT ''
);
""";
cmd.ExecuteNonQuery();
}
private static bool LegacyTablesExist()
{
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = """
SELECT to_regclass('orthophotos')::TEXT, to_regclass('gps_corrections')::TEXT;
""";
using var reader = cmd.ExecuteReader();
reader.Read();
var ortho = reader.IsDBNull(0) ? null : reader.GetString(0);
var gpsCorr = reader.IsDBNull(1) ? null : reader.GetString(1);
return ortho is not null || gpsCorr is not null;
}
private static Dictionary<string, string> SnapshotPublicSchema()
{
var rows = new Dictionary<string, string>(StringComparer.Ordinal);
using var conn = new NpgsqlConnection(TestEnvironment.DbSideChannel);
conn.Open();
using var cmd = conn.CreateCommand();
cmd.CommandText = """
SELECT table_name || '.' || column_name AS key,
data_type
FROM information_schema.columns
WHERE table_schema = 'public'
ORDER BY table_name, column_name;
""";
using var reader = cmd.ExecuteReader();
while (reader.Read())
rows[reader.GetString(0)] = reader.GetString(1);
return rows;
}
private static void AssertNoNewErrorLines(string logs)
{
// Each line is independently checked — a stack-trace dump
// contains exception keywords; an actual ERROR log line does too.
var bad = logs.Split('\n')
.Where(line =>
line.Contains("error", StringComparison.OrdinalIgnoreCase)
|| line.Contains("exception", StringComparison.OrdinalIgnoreCase))
.ToArray();
Assert.True(bad.Length == 0,
$"expected NO new error/exception lines in restart slice; saw {bad.Length}:\n{string.Join("\n", bad)}");
}
private async Task WaitForHealthyAsync(TimeSpan timeout)
{
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(2) };
var deadline = DateTime.UtcNow + timeout;
while (DateTime.UtcNow < deadline)
{
try
{
using var resp = await http.GetAsync(new Uri(TestEnvironment.MissionsBaseUrl + "/health"));
if (resp.StatusCode == HttpStatusCode.OK) return;
}
catch (HttpRequestException) { /* not yet listening */ }
catch (TaskCanceledException) { /* slow first request */ }
await Task.Delay(500);
}
throw new TimeoutException(
$"missions did not become healthy within {timeout.TotalSeconds:F0}s");
}
private void Compose(string subcommand)
{
var psi = new ProcessStartInfo("docker",
$"compose -f {_restart.ComposeFile} {subcommand}")
{
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false
};
using var p = Process.Start(psi)
?? throw new InvalidOperationException("docker CLI not available");
var stdout = p.StandardOutput.ReadToEnd();
var stderr = p.StandardError.ReadToEnd();
p.WaitForExit();
if (p.ExitCode != 0)
throw new InvalidOperationException(
$"`docker compose {subcommand}` exited {p.ExitCode}:\nstdout: {stdout}\nstderr: {stderr}");
}
}