[AZ-585] [AZ-586] ResLim+Perf NFT tests; close test cycle 1

Batch 4 of test implementation cycle 1 (existing-code Step 6, final batch).

- AZ-585 SteadyStateLoadTests + ColdStartRssTests: NFT-RES-LIM-01..04.
  SteadyStateLoadFixture runs one 5-min sustained-load window and samples
  RSS (docker stats), Npgsql conns (pg_stat_activity), and FDs
  (/proc/1/fd) every 5s; three test methods assert independently. All
  SkippableFact-gated on docker primitives.
- AZ-586 PerformanceTests: NFT-PERF-01..04. Sequential single-client,
  5 warm-ups + N measured calls, P50+P95 via LatencyPercentiles, recorded
  to PERF_RESULTS_FILE. Tagged Category=Perf so default gate excludes them.

Infrastructure:
- entrypoint.sh now applies --filter "${TEST_FILTER:-Category!=Perf}"
  per AZ-586 (default CI gate excludes performance).
- MetricCsvRecorder: idempotent CSV appender keyed on env var, used by
  both Perf and ResLim categories.

Step 6 (Implement Tests) is complete. Final report at
_docs/03_implementation/implementation_report_tests.md handoffs the
full-suite gate to test-run/SKILL.md (Step 7).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-15 09:11:53 +03:00
parent 26126e6216
commit 001e80fe96
14 changed files with 1181 additions and 52 deletions
@@ -0,0 +1,131 @@
using System.Diagnostics;
using System.Globalization;
using System.Net;
using Azaion.Missions.E2E.Helpers;
using Xunit;
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
/// <summary>
/// NFT-RES-LIM-04 — cold-start RSS. Driven independently from the
/// steady-state window because it requires a fresh container start; lives
/// in the <c>MigratorRestart</c> collection so it serialises with the
/// other docker-compose-restarting tests rather than racing them.
/// </summary>
/// <remarks>
/// The 30-second wait between health-OK and the measurement is the spec's
/// way of letting the JIT and the JWKS prefetch settle without doing any
/// real work — measuring at health-OK alone would conflate the genuine cold
/// baseline with bootstrap noise.
/// </remarks>
[Collection("MigratorRestart")]
[Trait("Category", "ResLim")]
public sealed class ColdStartRssTests
{
private static readonly MetricCsvRecorder Csv = new("RESLIM_RESULTS_FILE");
private const long ProvisionalColdRssCapMiB = 200;
private const string ComposeFile = "/workspace/docker-compose.test.yml";
[SkippableFact]
[Trait("Traces", "H1|H3")]
[Trait("max_ms", "120000")]
public async Task NFT_RES_LIM_04_cold_start_rss_within_provisional_200_MiB()
{
Skip.IfNot(Environment.GetEnvironmentVariable("COMPOSE_RESTART_ENABLED") == "1",
"COMPOSE_RESTART_ENABLED!=1 — docker compose restart unavailable in this consumer image");
Skip.IfNot(MissionsContainerHelper.Enabled,
"MissionsContainerHelper disabled — docker CLI unavailable");
// Arrange — bring missions down hard and start it fresh. The
// surrounding "MigratorRestart" collection serialises us against
// any other test that touches the SUT.
DockerCompose("stop missions");
DockerCompose("rm -f missions");
DockerCompose("up -d missions");
await WaitForHealthOkAsync(TimeSpan.FromSeconds(60));
// Act — wait 30s after health-OK so JIT/JWKS settle, then measure.
await Task.Delay(TimeSpan.FromSeconds(30));
var rssBytes = ReadRssBytes("missions-sut");
var rssMiB = rssBytes / (double)(1024 * 1024);
var pass = rssMiB <= ProvisionalColdRssCapMiB;
Csv.Record(
category: "ResLim",
scenario: "NFT-RES-LIM-04",
result: pass ? "pass" : "fail",
traces: $"H1|H3; COLD_RSS_MiB={rssMiB.ToString("F1", CultureInfo.InvariantCulture)}");
// Assert — provisional gate.
Assert.True(pass,
$"cold-start RSS {rssMiB:F1} MiB exceeds provisional {ProvisionalColdRssCapMiB} MiB gate");
}
private static async Task WaitForHealthOkAsync(TimeSpan timeout)
{
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(2) };
var deadline = DateTime.UtcNow + timeout;
var healthUrl = new Uri(TestEnvironment.MissionsBaseUrl + "/health");
while (DateTime.UtcNow < deadline)
{
try
{
using var resp = await http.GetAsync(healthUrl);
if (resp.StatusCode == HttpStatusCode.OK) return;
}
catch (HttpRequestException) { /* not yet listening */ }
catch (TaskCanceledException) { /* slow first response */ }
await Task.Delay(500);
}
throw new TimeoutException(
$"missions did not become healthy within {timeout.TotalSeconds:F0}s of cold start");
}
private static long ReadRssBytes(string containerName)
{
var raw = Run("docker",
$"stats --no-stream --format '{{{{.MemUsage}}}}' {containerName}");
var lhs = raw.Split('/')[0].Trim().Trim('\'');
return ParseHumanBytes(lhs);
}
private static long ParseHumanBytes(string text)
{
var unitIx = text.IndexOfAny(new[] { 'K', 'M', 'G', 'T', 'B' });
if (unitIx < 0) return long.Parse(text, CultureInfo.InvariantCulture);
var num = double.Parse(text.Substring(0, unitIx), CultureInfo.InvariantCulture);
var unit = text.Substring(unitIx);
return unit switch
{
"B" => (long)num,
"KiB" or "KB" or "K" => (long)(num * 1024),
"MiB" or "MB" or "M" => (long)(num * 1024 * 1024),
"GiB" or "GB" or "G" => (long)(num * 1024 * 1024 * 1024),
"TiB" or "TB" or "T" => (long)(num * 1024L * 1024 * 1024 * 1024),
_ => throw new FormatException($"unknown human-bytes unit in '{text}'")
};
}
private static void DockerCompose(string subcommand) =>
Run("docker", $"compose -f {ComposeFile} {subcommand}");
private static string Run(string file, string args)
{
var psi = new ProcessStartInfo(file, args)
{
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false
};
using var p = Process.Start(psi)
?? throw new InvalidOperationException($"failed to launch `{file} {args}`");
var stdout = p.StandardOutput.ReadToEnd();
var stderr = p.StandardError.ReadToEnd();
p.WaitForExit();
if (p.ExitCode != 0)
throw new InvalidOperationException(
$"`{file} {args}` exited {p.ExitCode}: {stderr}");
return stdout;
}
}
@@ -1,23 +0,0 @@
using Xunit;
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
/// <summary>
/// Discovery-only smoke test for the ResourceLimits category. Real
/// ResourceLimits scenarios (NFT-RES-LIM-01..04) land in AZ-585.
/// </summary>
public sealed class Sanity
{
[Fact]
[Trait("Category", "ResLim")]
[Trait("Traces", "AC-3")]
public void Discovery_smoke_test_runs()
{
// Arrange
const int sentinel = 1;
// Act
var result = sentinel + 0;
// Assert
Assert.Equal(1, result);
}
}
@@ -0,0 +1,156 @@
using System.Globalization;
using Azaion.Missions.E2E.Fixtures;
using Azaion.Missions.E2E.Helpers;
using Xunit;
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
/// <summary>
/// NFT-RES-LIM-01..03 — three observations on a SINGLE 5-minute sustained
/// load window. The window itself lives in
/// <see cref="SteadyStateLoadFixture"/> (class-scoped, runs once); each
/// test asserts one metric against its provisional gate.
/// </summary>
/// <remarks>
/// The fixture skips itself when docker primitives are unavailable; the
/// tests detect that via <see cref="SteadyStateLoadFixture.SkipReason"/>
/// and surface the same reason through <c>Skip.IfNot</c>. The fixture
/// also flips <see cref="SteadyStateLoadFixture.SutExitedDuringWindow"/>
/// if the SUT crashes mid-window — every test fails fast with a clear
/// message rather than reporting a misleading metric.
/// </remarks>
[Collection("ResLimSteadyState")]
[Trait("Category", "ResLim")]
public sealed class SteadyStateLoadTests : TestBase, IClassFixture<SteadyStateLoadFixture>
{
private static readonly MetricCsvRecorder Csv = new("RESLIM_RESULTS_FILE");
private const long ProvisionalRssCapMiB = 250;
private const int ProvisionalConnectionCap = 100;
private const int ProvisionalFdCap = 1024;
private readonly SteadyStateLoadFixture _load;
public SteadyStateLoadTests(SteadyStateLoadFixture load) => _load = load;
[SkippableFact]
[Trait("Traces", "H1|H6|O10")]
[Trait("max_ms", "360000")]
public void NFT_RES_LIM_01_steady_state_rss_within_provisional_gate_and_no_leak()
{
Skip.If(_load.SkipReason is not null, _load.SkipReason);
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
// Arrange
var samplesMiB = _load.RssBytesSamples.Select(b => b / (double)(1024 * 1024)).ToList();
Assert.True(samplesMiB.Count >= 30,
$"expected ≥ 30 RSS samples over 5-min window, got {samplesMiB.Count}");
// Act
var p95 = LatencyPercentiles.P95(samplesMiB);
var finalMiB = samplesMiB[^1];
var leakRatio = Math.Abs(finalMiB - p95) / Math.Max(p95, 1.0);
var withinCap = p95 <= ProvisionalRssCapMiB;
var noLeak = leakRatio <= 0.20;
var pass = withinCap && noLeak;
Csv.Record(
category: "ResLim",
scenario: "NFT-RES-LIM-01",
result: pass ? "pass" : "fail",
traces: $"H1|H6|O10; "
+ $"P95_RSS_MiB={p95.ToString("F1", CultureInfo.InvariantCulture)}; "
+ $"FINAL_RSS_MiB={finalMiB.ToString("F1", CultureInfo.InvariantCulture)}; "
+ $"LEAK_RATIO={leakRatio.ToString("F2", CultureInfo.InvariantCulture)}");
// Assert — provisional gate; lock at measured + 50% after first green run.
Assert.True(withinCap,
$"P95 RSS {p95:F1} MiB exceeds provisional {ProvisionalRssCapMiB} MiB gate");
Assert.True(noLeak,
$"final RSS {finalMiB:F1} MiB diverges {leakRatio:P0} from P95 {p95:F1} MiB (gate 20%)");
}
[SkippableFact]
[Trait("Traces", "O10")]
[Trait("max_ms", "360000")]
public void NFT_RES_LIM_02_npgsql_connection_pool_within_100_no_unbounded_growth()
{
Skip.If(_load.SkipReason is not null, _load.SkipReason);
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
var samples = _load.NpgsqlConnectionSamples;
Assert.True(samples.Count >= 30,
$"expected ≥ 30 connection samples over 5-min window, got {samples.Count}");
// Act
var max = samples.Max();
var firstMinuteSampleCount = 60 / SteadyStateLoadFixture.SampleIntervalSeconds;
var firstMinute = samples.Take(firstMinuteSampleCount).ToList();
var firstMinuteMean = firstMinute.Average();
var finalCount = samples[^1];
var withinCap = max <= ProvisionalConnectionCap;
var noUnboundedGrowth = finalCount <= 1.3 * Math.Max(firstMinuteMean, 1.0);
var pass = withinCap && noUnboundedGrowth;
Csv.Record(
category: "ResLim",
scenario: "NFT-RES-LIM-02",
result: pass ? "pass" : "fail",
traces: $"O10; MAX_NPGSQL_CONNS={max}; "
+ $"FINAL_CONNS={finalCount}; "
+ $"MINUTE1_MEAN={firstMinuteMean.ToString("F1", CultureInfo.InvariantCulture)}");
// Assert
Assert.True(withinCap,
$"max Npgsql connections {max} exceeds provisional cap {ProvisionalConnectionCap}");
Assert.True(noUnboundedGrowth,
$"final connection count {finalCount} > 1.3 × first-minute mean {firstMinuteMean:F1}");
}
[SkippableFact]
[Trait("Traces", "H6|O10")]
[Trait("max_ms", "360000")]
public void NFT_RES_LIM_03_file_descriptors_within_1024_no_leak()
{
Skip.If(_load.SkipReason is not null, _load.SkipReason);
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
var samples = _load.FileDescriptorSamples;
Assert.True(samples.Count >= 30,
$"expected ≥ 30 FD samples over 5-min window, got {samples.Count}");
// Act
var max = samples.Max();
var minuteOneSampleCount = 60 / SteadyStateLoadFixture.SampleIntervalSeconds;
// The spec calls out "count at t=1min" — anchor on the sample whose
// timestamp is closest to (start + 60s).
var minuteOneIx = Math.Min(minuteOneSampleCount - 1, samples.Count - 1);
var minuteOneCount = samples[minuteOneIx];
var finalCount = samples[^1];
var withinCap = max <= ProvisionalFdCap;
var noLeak = finalCount <= 1.3 * Math.Max(minuteOneCount, 1);
var pass = withinCap && noLeak;
Csv.Record(
category: "ResLim",
scenario: "NFT-RES-LIM-03",
result: pass ? "pass" : "fail",
traces: $"H6|O10; MAX_FD={max}; "
+ $"FINAL_FD={finalCount}; MINUTE1_FD={minuteOneCount}");
// Assert
Assert.True(withinCap,
$"max FD count {max} exceeds provisional cap {ProvisionalFdCap}");
Assert.True(noLeak,
$"final FD count {finalCount} > 1.3 × minute-1 count {minuteOneCount}");
}
}