mirror of
https://github.com/azaion/missions.git
synced 2026-06-22 20:21:07 +00:00
[AZ-585] [AZ-586] ResLim+Perf NFT tests; close test cycle 1
Batch 4 of test implementation cycle 1 (existing-code Step 6, final batch).
- AZ-585 SteadyStateLoadTests + ColdStartRssTests: NFT-RES-LIM-01..04.
SteadyStateLoadFixture runs one 5-min sustained-load window and samples
RSS (docker stats), Npgsql conns (pg_stat_activity), and FDs
(/proc/1/fd) every 5s; three test methods assert independently. All
SkippableFact-gated on docker primitives.
- AZ-586 PerformanceTests: NFT-PERF-01..04. Sequential single-client,
5 warm-ups + N measured calls, P50+P95 via LatencyPercentiles, recorded
to PERF_RESULTS_FILE. Tagged Category=Perf so default gate excludes them.
Infrastructure:
- entrypoint.sh now applies --filter "${TEST_FILTER:-Category!=Perf}"
per AZ-586 (default CI gate excludes performance).
- MetricCsvRecorder: idempotent CSV appender keyed on env var, used by
both Perf and ResLim categories.
Step 6 (Implement Tests) is complete. Final report at
_docs/03_implementation/implementation_report_tests.md handoffs the
full-suite gate to test-run/SKILL.md (Step 7).
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,131 @@
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
using Azaion.Missions.E2E.Helpers;
|
||||
using Xunit;
|
||||
|
||||
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
|
||||
|
||||
/// <summary>
|
||||
/// NFT-RES-LIM-04 — cold-start RSS. Driven independently from the
|
||||
/// steady-state window because it requires a fresh container start; lives
|
||||
/// in the <c>MigratorRestart</c> collection so it serialises with the
|
||||
/// other docker-compose-restarting tests rather than racing them.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The 30-second wait between health-OK and the measurement is the spec's
|
||||
/// way of letting the JIT and the JWKS prefetch settle without doing any
|
||||
/// real work — measuring at health-OK alone would conflate the genuine cold
|
||||
/// baseline with bootstrap noise.
|
||||
/// </remarks>
|
||||
[Collection("MigratorRestart")]
|
||||
[Trait("Category", "ResLim")]
|
||||
public sealed class ColdStartRssTests
|
||||
{
|
||||
private static readonly MetricCsvRecorder Csv = new("RESLIM_RESULTS_FILE");
|
||||
private const long ProvisionalColdRssCapMiB = 200;
|
||||
private const string ComposeFile = "/workspace/docker-compose.test.yml";
|
||||
|
||||
[SkippableFact]
|
||||
[Trait("Traces", "H1|H3")]
|
||||
[Trait("max_ms", "120000")]
|
||||
public async Task NFT_RES_LIM_04_cold_start_rss_within_provisional_200_MiB()
|
||||
{
|
||||
Skip.IfNot(Environment.GetEnvironmentVariable("COMPOSE_RESTART_ENABLED") == "1",
|
||||
"COMPOSE_RESTART_ENABLED!=1 — docker compose restart unavailable in this consumer image");
|
||||
Skip.IfNot(MissionsContainerHelper.Enabled,
|
||||
"MissionsContainerHelper disabled — docker CLI unavailable");
|
||||
|
||||
// Arrange — bring missions down hard and start it fresh. The
|
||||
// surrounding "MigratorRestart" collection serialises us against
|
||||
// any other test that touches the SUT.
|
||||
DockerCompose("stop missions");
|
||||
DockerCompose("rm -f missions");
|
||||
DockerCompose("up -d missions");
|
||||
|
||||
await WaitForHealthOkAsync(TimeSpan.FromSeconds(60));
|
||||
|
||||
// Act — wait 30s after health-OK so JIT/JWKS settle, then measure.
|
||||
await Task.Delay(TimeSpan.FromSeconds(30));
|
||||
var rssBytes = ReadRssBytes("missions-sut");
|
||||
var rssMiB = rssBytes / (double)(1024 * 1024);
|
||||
|
||||
var pass = rssMiB <= ProvisionalColdRssCapMiB;
|
||||
Csv.Record(
|
||||
category: "ResLim",
|
||||
scenario: "NFT-RES-LIM-04",
|
||||
result: pass ? "pass" : "fail",
|
||||
traces: $"H1|H3; COLD_RSS_MiB={rssMiB.ToString("F1", CultureInfo.InvariantCulture)}");
|
||||
|
||||
// Assert — provisional gate.
|
||||
Assert.True(pass,
|
||||
$"cold-start RSS {rssMiB:F1} MiB exceeds provisional {ProvisionalColdRssCapMiB} MiB gate");
|
||||
}
|
||||
|
||||
private static async Task WaitForHealthOkAsync(TimeSpan timeout)
|
||||
{
|
||||
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(2) };
|
||||
var deadline = DateTime.UtcNow + timeout;
|
||||
var healthUrl = new Uri(TestEnvironment.MissionsBaseUrl + "/health");
|
||||
while (DateTime.UtcNow < deadline)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var resp = await http.GetAsync(healthUrl);
|
||||
if (resp.StatusCode == HttpStatusCode.OK) return;
|
||||
}
|
||||
catch (HttpRequestException) { /* not yet listening */ }
|
||||
catch (TaskCanceledException) { /* slow first response */ }
|
||||
await Task.Delay(500);
|
||||
}
|
||||
throw new TimeoutException(
|
||||
$"missions did not become healthy within {timeout.TotalSeconds:F0}s of cold start");
|
||||
}
|
||||
|
||||
private static long ReadRssBytes(string containerName)
|
||||
{
|
||||
var raw = Run("docker",
|
||||
$"stats --no-stream --format '{{{{.MemUsage}}}}' {containerName}");
|
||||
var lhs = raw.Split('/')[0].Trim().Trim('\'');
|
||||
return ParseHumanBytes(lhs);
|
||||
}
|
||||
|
||||
private static long ParseHumanBytes(string text)
|
||||
{
|
||||
var unitIx = text.IndexOfAny(new[] { 'K', 'M', 'G', 'T', 'B' });
|
||||
if (unitIx < 0) return long.Parse(text, CultureInfo.InvariantCulture);
|
||||
var num = double.Parse(text.Substring(0, unitIx), CultureInfo.InvariantCulture);
|
||||
var unit = text.Substring(unitIx);
|
||||
return unit switch
|
||||
{
|
||||
"B" => (long)num,
|
||||
"KiB" or "KB" or "K" => (long)(num * 1024),
|
||||
"MiB" or "MB" or "M" => (long)(num * 1024 * 1024),
|
||||
"GiB" or "GB" or "G" => (long)(num * 1024 * 1024 * 1024),
|
||||
"TiB" or "TB" or "T" => (long)(num * 1024L * 1024 * 1024 * 1024),
|
||||
_ => throw new FormatException($"unknown human-bytes unit in '{text}'")
|
||||
};
|
||||
}
|
||||
|
||||
private static void DockerCompose(string subcommand) =>
|
||||
Run("docker", $"compose -f {ComposeFile} {subcommand}");
|
||||
|
||||
private static string Run(string file, string args)
|
||||
{
|
||||
var psi = new ProcessStartInfo(file, args)
|
||||
{
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false
|
||||
};
|
||||
using var p = Process.Start(psi)
|
||||
?? throw new InvalidOperationException($"failed to launch `{file} {args}`");
|
||||
var stdout = p.StandardOutput.ReadToEnd();
|
||||
var stderr = p.StandardError.ReadToEnd();
|
||||
p.WaitForExit();
|
||||
if (p.ExitCode != 0)
|
||||
throw new InvalidOperationException(
|
||||
$"`{file} {args}` exited {p.ExitCode}: {stderr}");
|
||||
return stdout;
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
using Xunit;
|
||||
|
||||
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
|
||||
|
||||
/// <summary>
|
||||
/// Discovery-only smoke test for the ResourceLimits category. Real
|
||||
/// ResourceLimits scenarios (NFT-RES-LIM-01..04) land in AZ-585.
|
||||
/// </summary>
|
||||
public sealed class Sanity
|
||||
{
|
||||
[Fact]
|
||||
[Trait("Category", "ResLim")]
|
||||
[Trait("Traces", "AC-3")]
|
||||
public void Discovery_smoke_test_runs()
|
||||
{
|
||||
// Arrange
|
||||
const int sentinel = 1;
|
||||
// Act
|
||||
var result = sentinel + 0;
|
||||
// Assert
|
||||
Assert.Equal(1, result);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
using System.Globalization;
|
||||
using Azaion.Missions.E2E.Fixtures;
|
||||
using Azaion.Missions.E2E.Helpers;
|
||||
using Xunit;
|
||||
|
||||
namespace Azaion.Missions.E2E.Tests.ResourceLimits;
|
||||
|
||||
/// <summary>
|
||||
/// NFT-RES-LIM-01..03 — three observations on a SINGLE 5-minute sustained
|
||||
/// load window. The window itself lives in
|
||||
/// <see cref="SteadyStateLoadFixture"/> (class-scoped, runs once); each
|
||||
/// test asserts one metric against its provisional gate.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The fixture skips itself when docker primitives are unavailable; the
|
||||
/// tests detect that via <see cref="SteadyStateLoadFixture.SkipReason"/>
|
||||
/// and surface the same reason through <c>Skip.IfNot</c>. The fixture
|
||||
/// also flips <see cref="SteadyStateLoadFixture.SutExitedDuringWindow"/>
|
||||
/// if the SUT crashes mid-window — every test fails fast with a clear
|
||||
/// message rather than reporting a misleading metric.
|
||||
/// </remarks>
|
||||
[Collection("ResLimSteadyState")]
|
||||
[Trait("Category", "ResLim")]
|
||||
public sealed class SteadyStateLoadTests : TestBase, IClassFixture<SteadyStateLoadFixture>
|
||||
{
|
||||
private static readonly MetricCsvRecorder Csv = new("RESLIM_RESULTS_FILE");
|
||||
private const long ProvisionalRssCapMiB = 250;
|
||||
private const int ProvisionalConnectionCap = 100;
|
||||
private const int ProvisionalFdCap = 1024;
|
||||
|
||||
private readonly SteadyStateLoadFixture _load;
|
||||
|
||||
public SteadyStateLoadTests(SteadyStateLoadFixture load) => _load = load;
|
||||
|
||||
[SkippableFact]
|
||||
[Trait("Traces", "H1|H6|O10")]
|
||||
[Trait("max_ms", "360000")]
|
||||
public void NFT_RES_LIM_01_steady_state_rss_within_provisional_gate_and_no_leak()
|
||||
{
|
||||
Skip.If(_load.SkipReason is not null, _load.SkipReason);
|
||||
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
|
||||
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
|
||||
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
|
||||
|
||||
// Arrange
|
||||
var samplesMiB = _load.RssBytesSamples.Select(b => b / (double)(1024 * 1024)).ToList();
|
||||
Assert.True(samplesMiB.Count >= 30,
|
||||
$"expected ≥ 30 RSS samples over 5-min window, got {samplesMiB.Count}");
|
||||
|
||||
// Act
|
||||
var p95 = LatencyPercentiles.P95(samplesMiB);
|
||||
var finalMiB = samplesMiB[^1];
|
||||
|
||||
var leakRatio = Math.Abs(finalMiB - p95) / Math.Max(p95, 1.0);
|
||||
var withinCap = p95 <= ProvisionalRssCapMiB;
|
||||
var noLeak = leakRatio <= 0.20;
|
||||
var pass = withinCap && noLeak;
|
||||
|
||||
Csv.Record(
|
||||
category: "ResLim",
|
||||
scenario: "NFT-RES-LIM-01",
|
||||
result: pass ? "pass" : "fail",
|
||||
traces: $"H1|H6|O10; "
|
||||
+ $"P95_RSS_MiB={p95.ToString("F1", CultureInfo.InvariantCulture)}; "
|
||||
+ $"FINAL_RSS_MiB={finalMiB.ToString("F1", CultureInfo.InvariantCulture)}; "
|
||||
+ $"LEAK_RATIO={leakRatio.ToString("F2", CultureInfo.InvariantCulture)}");
|
||||
|
||||
// Assert — provisional gate; lock at measured + 50% after first green run.
|
||||
Assert.True(withinCap,
|
||||
$"P95 RSS {p95:F1} MiB exceeds provisional {ProvisionalRssCapMiB} MiB gate");
|
||||
Assert.True(noLeak,
|
||||
$"final RSS {finalMiB:F1} MiB diverges {leakRatio:P0} from P95 {p95:F1} MiB (gate 20%)");
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
[Trait("Traces", "O10")]
|
||||
[Trait("max_ms", "360000")]
|
||||
public void NFT_RES_LIM_02_npgsql_connection_pool_within_100_no_unbounded_growth()
|
||||
{
|
||||
Skip.If(_load.SkipReason is not null, _load.SkipReason);
|
||||
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
|
||||
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
|
||||
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
|
||||
|
||||
var samples = _load.NpgsqlConnectionSamples;
|
||||
Assert.True(samples.Count >= 30,
|
||||
$"expected ≥ 30 connection samples over 5-min window, got {samples.Count}");
|
||||
|
||||
// Act
|
||||
var max = samples.Max();
|
||||
var firstMinuteSampleCount = 60 / SteadyStateLoadFixture.SampleIntervalSeconds;
|
||||
var firstMinute = samples.Take(firstMinuteSampleCount).ToList();
|
||||
var firstMinuteMean = firstMinute.Average();
|
||||
var finalCount = samples[^1];
|
||||
|
||||
var withinCap = max <= ProvisionalConnectionCap;
|
||||
var noUnboundedGrowth = finalCount <= 1.3 * Math.Max(firstMinuteMean, 1.0);
|
||||
var pass = withinCap && noUnboundedGrowth;
|
||||
|
||||
Csv.Record(
|
||||
category: "ResLim",
|
||||
scenario: "NFT-RES-LIM-02",
|
||||
result: pass ? "pass" : "fail",
|
||||
traces: $"O10; MAX_NPGSQL_CONNS={max}; "
|
||||
+ $"FINAL_CONNS={finalCount}; "
|
||||
+ $"MINUTE1_MEAN={firstMinuteMean.ToString("F1", CultureInfo.InvariantCulture)}");
|
||||
|
||||
// Assert
|
||||
Assert.True(withinCap,
|
||||
$"max Npgsql connections {max} exceeds provisional cap {ProvisionalConnectionCap}");
|
||||
Assert.True(noUnboundedGrowth,
|
||||
$"final connection count {finalCount} > 1.3 × first-minute mean {firstMinuteMean:F1}");
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
[Trait("Traces", "H6|O10")]
|
||||
[Trait("max_ms", "360000")]
|
||||
public void NFT_RES_LIM_03_file_descriptors_within_1024_no_leak()
|
||||
{
|
||||
Skip.If(_load.SkipReason is not null, _load.SkipReason);
|
||||
Skip.IfNot(_load.LoadGeneratorMetTargetRps,
|
||||
"runner cannot sustain target load (NFR Reliability — not a SUT defect)");
|
||||
Assert.False(_load.SutExitedDuringWindow, "SUT exited during measurement window");
|
||||
|
||||
var samples = _load.FileDescriptorSamples;
|
||||
Assert.True(samples.Count >= 30,
|
||||
$"expected ≥ 30 FD samples over 5-min window, got {samples.Count}");
|
||||
|
||||
// Act
|
||||
var max = samples.Max();
|
||||
var minuteOneSampleCount = 60 / SteadyStateLoadFixture.SampleIntervalSeconds;
|
||||
// The spec calls out "count at t=1min" — anchor on the sample whose
|
||||
// timestamp is closest to (start + 60s).
|
||||
var minuteOneIx = Math.Min(minuteOneSampleCount - 1, samples.Count - 1);
|
||||
var minuteOneCount = samples[minuteOneIx];
|
||||
var finalCount = samples[^1];
|
||||
|
||||
var withinCap = max <= ProvisionalFdCap;
|
||||
var noLeak = finalCount <= 1.3 * Math.Max(minuteOneCount, 1);
|
||||
var pass = withinCap && noLeak;
|
||||
|
||||
Csv.Record(
|
||||
category: "ResLim",
|
||||
scenario: "NFT-RES-LIM-03",
|
||||
result: pass ? "pass" : "fail",
|
||||
traces: $"H6|O10; MAX_FD={max}; "
|
||||
+ $"FINAL_FD={finalCount}; MINUTE1_FD={minuteOneCount}");
|
||||
|
||||
// Assert
|
||||
Assert.True(withinCap,
|
||||
$"max FD count {max} exceeds provisional cap {ProvisionalFdCap}");
|
||||
Assert.True(noLeak,
|
||||
$"final FD count {finalCount} > 1.3 × minute-1 count {minuteOneCount}");
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user