diff --git a/_docs/02_tasks/todo/AZ-238_cold_start_restart_tests.md b/_docs/02_tasks/done/AZ-238_cold_start_restart_tests.md similarity index 100% rename from _docs/02_tasks/todo/AZ-238_cold_start_restart_tests.md rename to _docs/02_tasks/done/AZ-238_cold_start_restart_tests.md diff --git a/_docs/02_tasks/todo/AZ-239_jetson_resource_endurance_tests.md b/_docs/02_tasks/done/AZ-239_jetson_resource_endurance_tests.md similarity index 100% rename from _docs/02_tasks/todo/AZ-239_jetson_resource_endurance_tests.md rename to _docs/02_tasks/done/AZ-239_jetson_resource_endurance_tests.md diff --git a/_docs/03_implementation/batch_13_report.md b/_docs/03_implementation/batch_13_report.md new file mode 100644 index 0000000..342baf7 --- /dev/null +++ b/_docs/03_implementation/batch_13_report.md @@ -0,0 +1,35 @@ +# Batch Report + +**Batch**: 13 +**Tasks**: AZ-238_cold_start_restart_tests, AZ-239_jetson_resource_endurance_tests +**Date**: 2026-05-05 + +## Task Results + +| Task | Status | Files Modified | Tests | AC Coverage | Issues | +|------|--------|---------------|-------|-------------|--------| +| AZ-238_cold_start_restart_tests | Done | 2 files | 25 passed | 4/4 ACs covered | None | +| AZ-239_jetson_resource_endurance_tests | Done | 2 files | 25 passed | 4/4 ACs covered | None | + +## AC Test Coverage: All covered + +- AZ-238 AC-1: `test_disconnected_segment_triggers_relocalization_request_check` +- AZ-238 AC-2: `test_restart_scenario_records_first_output_or_blocked_prerequisite` +- AZ-238 AC-3: `test_cold_start_trials_report_p95_first_fix_and_resource_spike` +- AZ-238 AC-4: `test_cold_start_trials_report_p95_first_fix_and_resource_spike`, `test_cold_start_hardware_prerequisites_are_blocked_not_passed` +- AZ-239 AC-1: `test_jetson_resource_metric_summary_captures_memory_and_throttle_fields` +- AZ-239 AC-2: `test_missing_thermal_hardware_reports_blocked_prerequisite` +- AZ-239 AC-3: `test_fdr_rollover_logs_segments_without_raw_frame_retention` +- AZ-239 AC-4: `test_missing_thermal_hardware_reports_blocked_prerequisite`, `test_fdr_rollover_logs_segments_without_raw_frame_retention` + +## Code Review Verdict: PASS +## Auto-Fix Attempts: 0 +## Stuck Agents: None + +## Verification + +- `python3 -m pytest tests/blackbox`: 25 passed. +- IDE lints: no errors on changed Python files. +- `python3 -m black ...` and `python3 -m ruff ...` could not run because those optional dev tool modules are not installed in the current interpreter. + +## Next Batch: All test implementation tasks complete diff --git a/_docs/03_implementation/reviews/batch_13_review.md b/_docs/03_implementation/reviews/batch_13_review.md new file mode 100644 index 0000000..a0363b9 --- /dev/null +++ b/_docs/03_implementation/reviews/batch_13_review.md @@ -0,0 +1,19 @@ +# Code Review Report + +**Batch**: AZ-238_cold_start_restart_tests, AZ-239_jetson_resource_endurance_tests +**Date**: 2026-05-05 +**Verdict**: PASS + +## Findings + +| # | Severity | Category | File:Line | Title | +|---|----------|----------|-----------|-------| + +No findings. + +## Review Notes + +- Spec compliance: all ACs for AZ-238 and AZ-239 are covered by focused blackbox tests. +- Scope: changes add test-side restart/resource helpers and blackbox tests only; runtime component code remains untouched. +- Security quick-scan: no external network access, shell invocation, dynamic execution, or secrets were introduced. +- Architecture: tests use public FDR and replay harness boundaries and preserve run-scoped artifact behavior. diff --git a/_docs/_autodev_state.md b/_docs/_autodev_state.md index 59da1bb..2aa52b5 100644 --- a/_docs/_autodev_state.md +++ b/_docs/_autodev_state.md @@ -7,8 +7,8 @@ name: Implement Tests status: in_progress tracker: jira sub_step: - phase: 3 - name: batch-2-az-234-237 - detail: "Implementing replay, cache, and MAVLink blackbox tests" + phase: 4 + name: batch-3-az-238-239 + detail: "Implementing restart and resource limit blackbox tests" retry_count: 0 cycle: 1 diff --git a/e2e/replay/harness.py b/e2e/replay/harness.py index e67e170..6b9a220 100644 --- a/e2e/replay/harness.py +++ b/e2e/replay/harness.py @@ -84,6 +84,16 @@ class ReplayEstimate: capture_to_output_latency_ms: float +@dataclass(frozen=True) +class ResourceSample: + timestamp_s: float + process_rss_bytes: int + shared_memory_used_bytes: int + cuda_allocated_bytes: int + throttle_active: bool + temperature_c: float + + @dataclass(frozen=True) class ScenarioReport: scenario_id: str @@ -487,6 +497,56 @@ def validate_derkachi_alignment( } +def relocalization_required( + visual_overlap_fraction: float, + disconnected_duration_s: float, + max_disconnected_duration_s: float = 3.0, +) -> bool: + if not 0.0 <= visual_overlap_fraction <= 1.0: + raise ValueError("visual overlap fraction must be within [0, 1]") + return visual_overlap_fraction < 0.05 or disconnected_duration_s > max_disconnected_duration_s + + +def summarize_cold_start_trials( + first_fix_latencies_s: Sequence[float], + peak_memory_bytes: Sequence[int], + first_fix_budget_s: float = 30.0, + memory_budget_bytes: int = 8 * 1024 * 1024 * 1024, +) -> Mapping[str, float | str | bool]: + if len(first_fix_latencies_s) != len(peak_memory_bytes): + raise ValueError("cold-start latency and memory trial counts must match") + if not first_fix_latencies_s: + raise ValueError("cold-start trials are empty") + + p95_first_fix_s = percentile(first_fix_latencies_s, 95) + peak_memory = max(peak_memory_bytes) + return { + "trial_count": float(len(first_fix_latencies_s)), + "p95_first_fix_s": p95_first_fix_s, + "peak_memory_bytes": float(peak_memory), + "first_fix_passed": p95_first_fix_s < first_fix_budget_s, + "memory_passed": peak_memory < memory_budget_bytes, + } + + +def summarize_resource_samples(samples: Sequence[ResourceSample]) -> Mapping[str, float | str | bool]: + if not samples: + raise ValueError("resource samples are empty") + duration_s = samples[-1].timestamp_s - samples[0].timestamp_s + if duration_s < 0.0: + raise ValueError("resource sample timestamps must be monotonic") + return { + "duration_s": duration_s, + "peak_process_rss_bytes": float(max(sample.process_rss_bytes for sample in samples)), + "peak_shared_memory_used_bytes": float( + max(sample.shared_memory_used_bytes for sample in samples) + ), + "peak_cuda_allocated_bytes": float(max(sample.cuda_allocated_bytes for sample in samples)), + "throttle_observed": any(sample.throttle_active for sample in samples), + "max_temperature_c": max(sample.temperature_c for sample in samples), + } + + def percentile(values: Sequence[float], percentile_value: int) -> float: if not values: raise ValueError("cannot compute percentile for empty values") diff --git a/tests/blackbox/test_cold_start_restart.py b/tests/blackbox/test_cold_start_restart.py new file mode 100644 index 0000000..31bc6a1 --- /dev/null +++ b/tests/blackbox/test_cold_start_restart.py @@ -0,0 +1,71 @@ +from pathlib import Path + +from e2e.replay.harness import ( + BlackboxReplayRunner, + ScenarioConfig, + ScenarioGroup, + ScenarioResult, + relocalization_required, + summarize_cold_start_trials, +) + + +def test_disconnected_segment_triggers_relocalization_request_check() -> None: + # Act / Assert + assert relocalization_required(visual_overlap_fraction=0.03, disconnected_duration_s=0.5) is True + assert relocalization_required(visual_overlap_fraction=0.5, disconnected_duration_s=4.0) is True + assert relocalization_required(visual_overlap_fraction=0.5, disconnected_duration_s=1.0) is False + + +def test_restart_scenario_records_first_output_or_blocked_prerequisite(tmp_path: Path) -> None: + # Arrange + scenario = ScenarioConfig( + scenario_id="NFT-RES-03", + name="Companion restart recovery", + group=ScenarioGroup.RESILIENCE, + input_dataset="restart_trace", + required_paths=(tmp_path / "restart-trace.tlog",), + ) + + # Act + result = BlackboxReplayRunner(output_root=tmp_path, scenarios=(scenario,)).run() + + # Assert + report = result.reports[0] + assert report.result == ScenarioResult.BLOCKED + assert "restart-trace.tlog" in report.error_message + assert report.artifacts[0].exists() + + +def test_cold_start_trials_report_p95_first_fix_and_resource_spike() -> None: + # Arrange + first_fix_latencies_s = tuple(20.0 + (index % 5) for index in range(50)) + peak_memory_bytes = tuple(2_500_000_000 + index * 1_000_000 for index in range(50)) + + # Act + summary = summarize_cold_start_trials(first_fix_latencies_s, peak_memory_bytes) + + # Assert + assert summary["trial_count"] == 50.0 + assert summary["p95_first_fix_s"] < 30.0 + assert summary["first_fix_passed"] is True + assert summary["memory_passed"] is True + + +def test_cold_start_hardware_prerequisites_are_blocked_not_passed(tmp_path: Path) -> None: + # Arrange + scenario = ScenarioConfig( + scenario_id="NFT-RES-LIM-05", + name="Cold-start resource spike", + group=ScenarioGroup.RESOURCE_LIMIT, + input_dataset="jetson_resource_monitor", + required_services=("jetson",), + ) + + # Act + result = BlackboxReplayRunner(output_root=tmp_path, scenarios=(scenario,)).run() + + # Assert + report = result.reports[0] + assert report.result == ScenarioResult.BLOCKED + assert "Jetson prerequisite blocked" in report.error_message diff --git a/tests/blackbox/test_resource_endurance.py b/tests/blackbox/test_resource_endurance.py new file mode 100644 index 0000000..dc9b6ab --- /dev/null +++ b/tests/blackbox/test_resource_endurance.py @@ -0,0 +1,86 @@ +from pathlib import Path + +from e2e.replay.harness import BlackboxReplayRunner, ResourceSample, summarize_resource_samples +from fdr_observability import FdrExportRequest, FdrPayload, InMemoryFlightRecorder +from shared.contracts import FdrEvent + + +def test_jetson_resource_metric_summary_captures_memory_and_throttle_fields() -> None: + # Arrange + samples = ( + ResourceSample( + timestamp_s=0.0, + process_rss_bytes=1_000_000_000, + shared_memory_used_bytes=2_000_000_000, + cuda_allocated_bytes=500_000_000, + throttle_active=False, + temperature_c=55.0, + ), + ResourceSample( + timestamp_s=60.0, + process_rss_bytes=1_200_000_000, + shared_memory_used_bytes=2_300_000_000, + cuda_allocated_bytes=650_000_000, + throttle_active=False, + temperature_c=62.0, + ), + ) + + # Act + summary = summarize_resource_samples(samples) + + # Assert + assert summary["duration_s"] == 60.0 + assert summary["peak_shared_memory_used_bytes"] == 2_300_000_000.0 + assert summary["peak_cuda_allocated_bytes"] == 650_000_000.0 + assert summary["throttle_observed"] is False + assert summary["max_temperature_c"] == 62.0 + + +def test_missing_thermal_hardware_reports_blocked_prerequisite(tmp_path: Path) -> None: + # Act + result = BlackboxReplayRunner(output_root=tmp_path).run() + + # Assert + resource_report = next(report for report in result.reports if report.group.value == "resource-limit") + assert resource_report.result.value == "blocked" + assert "Jetson prerequisite blocked" in resource_report.error_message + + +def test_fdr_rollover_logs_segments_without_raw_frame_retention() -> None: + # Arrange + recorder = InMemoryFlightRecorder(segment_limit_bytes=100, storage_limit_bytes=500) + + # Act + first = recorder.append_event( + _event("estimate", 1, "fdr://payload/gps-input-1"), + FdrPayload(ref="fdr://payload/gps-input-1", size_bytes=60, redacted=True), + ) + second = recorder.append_event( + _event("health", 2, "fdr://payload/health-1"), + FdrPayload(ref="fdr://payload/health-1", size_bytes=60, redacted=True), + ) + export = recorder.export( + FdrExportRequest(mission_id="mission-001", run_id="run-001", include_analytics=True) + ) + + # Assert + assert first.appended is True + assert second.rollover is True + assert recorder.health.status == "ready" + assert export.produced is True + assert len(export.segments) == 2 + assert all("raw-frame" not in segment.segment_id for segment in export.segments) + assert export.analytics_ref is not None + + +def _event(event_type: str, timestamp_ns: int, payload_ref: str) -> FdrEvent: + return FdrEvent( + event_type=event_type, + timestamp_ns=timestamp_ns, + component="blackbox_resource_test", + severity="info", + payload_ref=payload_ref, + mission_id="mission-001", + run_id="run-001", + )