From ca0430a44d53a3f952e2f95261489c96e7b82da6 Mon Sep 17 00:00:00 2001 From: Oleksandr Bezdieniezhnykh Date: Wed, 13 May 2026 05:24:06 +0300 Subject: [PATCH] [AZ-515] Extract C10 canonical hash helpers to shared module Cumulative-review F1 (batches 34-36, carried into batch 37): both manifest_verifier.py (AZ-324) and provisioner.py (AZ-325) imported leading-underscore privates _aggregate_tile_hash + _compute_manifest_hash from manifest_builder.py (AZ-323). The helpers encode the trust-chain formula shared across all three components; the import shape gave readers no static signal that a refactor would silently break two modules. Move the formula into c10_provisioning/_canonical_hash.py: - TileHashRecord (moved from manifest_builder) - aggregate_tile_hash (renamed, public) - compute_manifest_hash (renamed, public) - TAKEOFF_ORIGIN_DECIMALS constant (moved) Callers updated to import directly from _canonical_hash. Bodies unchanged; manifest hashes are byte-for-byte identical. Tests: c10_provisioning suite 86/86 pass; full project 1370/1370 pass. Co-authored-by: Cursor --- .../components/c10_provisioning/__init__.py | 8 +- .../c10_provisioning/_canonical_hash.py | 151 ++++++++++++++++++ .../c10_provisioning/manifest_builder.py | 93 +---------- .../c10_provisioning/manifest_verifier.py | 6 +- .../c10_provisioning/provisioner.py | 27 ++-- 5 files changed, 183 insertions(+), 102 deletions(-) create mode 100644 src/gps_denied_onboard/components/c10_provisioning/_canonical_hash.py diff --git a/src/gps_denied_onboard/components/c10_provisioning/__init__.py b/src/gps_denied_onboard/components/c10_provisioning/__init__.py index 773c4f1..90dde33 100644 --- a/src/gps_denied_onboard/components/c10_provisioning/__init__.py +++ b/src/gps_denied_onboard/components/c10_provisioning/__init__.py @@ -11,6 +11,11 @@ them through this single contract surface. from gps_denied_onboard._types.inference import EngineCacheEntry from gps_denied_onboard._types.manifests import Manifest +from gps_denied_onboard.components.c10_provisioning._canonical_hash import ( + TileHashRecord, + aggregate_tile_hash, + compute_manifest_hash, +) from gps_denied_onboard.components.c10_provisioning.c7_engine_embedder import ( C7EngineBackboneEmbedder, ) @@ -66,7 +71,6 @@ from gps_denied_onboard.components.c10_provisioning.manifest_builder import ( ManifestArtifact, ManifestBuilder, ManifestBuildInput, - TileHashRecord, TilesByBboxQuery, ) from gps_denied_onboard.components.c10_provisioning.manifest_verifier import ( @@ -140,4 +144,6 @@ __all__ = [ "VerificationResult", "VerifyFailReason", "VerifyOutcome", + "aggregate_tile_hash", + "compute_manifest_hash", ] diff --git a/src/gps_denied_onboard/components/c10_provisioning/_canonical_hash.py b/src/gps_denied_onboard/components/c10_provisioning/_canonical_hash.py new file mode 100644 index 0000000..beddb8c --- /dev/null +++ b/src/gps_denied_onboard/components/c10_provisioning/_canonical_hash.py @@ -0,0 +1,151 @@ +"""Canonical build-identity hash — shared between AZ-323 / AZ-324 / AZ-325. + +The build-identity hash is the trust-chain glue that lets three +independently-built C10 components agree byte-for-byte on whether two +build inputs are equivalent: + +* :class:`ManifestBuilder` (AZ-323) emits the hash into + ``Manifest.json``'s ``build.manifest_hash`` field. +* :class:`ManifestVerifier` (AZ-324) recomputes the tile-coverage + aggregate to confirm the on-disk Manifest still matches the C6 corpus. +* :class:`CacheProvisionerImpl` (AZ-325) recomputes the full hash to + decide whether a warm re-run is idempotent. + +Living in its own intra-component module makes that contract status +explicit. Resolves cumulative-review Finding F1 (batches 34–36) — the +verifier and provisioner used to import leading-underscore privates +from :mod:`.manifest_builder`, leaving readers no static signal that a +refactor of the builder's hash format would silently break two other +modules. + +The exported surface is intentionally narrow: + +* :class:`TileHashRecord` — the consumer-side DTO carrying the four + sort keys + per-tile digest. +* :func:`aggregate_tile_hash` — canonical SHA-256 over the sorted + ``TileHashRecord`` sequence. +* :func:`compute_manifest_hash` — canonical SHA-256 over the + build-identity tuple (engines + calibration + descriptor index + + tiles coverage + sector + bbox + zooms + takeoff origin + flight ID). + +Any change to the formats below is a breaking change to the cache +identity; bump :class:`ManifestArtifact.build.manifest_hash`'s schema +version in lockstep with the verifier and provisioner. +""" + +from __future__ import annotations + +import hashlib +from dataclasses import dataclass +from uuid import UUID + +import orjson + +from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt +from gps_denied_onboard._types.inference import EngineCacheEntry + +__all__ = [ + "TAKEOFF_ORIGIN_DECIMALS", + "TileHashRecord", + "aggregate_tile_hash", + "compute_manifest_hash", +] + +TAKEOFF_ORIGIN_DECIMALS = 9 + + +@dataclass(frozen=True) +class TileHashRecord: + """Consumer-side DTO carrying the four sort keys + per-tile digest. + + AZ-323 only needs ``(zoom, lat, lon, source)`` for canonical + ordering and ``sha256_hex`` for the aggregate hash. The + composition-root adapter wraps C6's ``TileMetadata`` rows into + this shape so the AZ-270 lint stays green (no + ``components.c6_tile_cache`` import from C10). + """ + + zoom: int + lat: float + lon: float + source: str + sha256_hex: str + + +def aggregate_tile_hash(records: tuple[TileHashRecord, ...]) -> str: + """SHA-256 over the canonical newline-delimited tile encoding. + + Records MUST be pre-sorted by ``(zoom, lat, lon, source)``; the + helper does NOT re-sort because callers in different invariants + sort in different scopes (verifier vs. provisioner). The encoding + matches the byte sequence AZ-323 first emitted; changing the + format here breaks every Manifest already on disk. + """ + + hasher = hashlib.sha256() + for r in records: + hasher.update( + ( + f"z{r.zoom}|lat{r.lat:.9f}|lon{r.lon:.9f}|src{r.source}" + f":{r.sha256_hex}\n" + ).encode("ascii") + ) + return hasher.hexdigest() + + +def compute_manifest_hash( + *, + engine_entries: tuple[EngineCacheEntry, ...], + calibration_sha256: str, + descriptor_index_sha256: str, + tiles_coverage_sha256: str, + sector_class: str, + bbox: BoundingBox, + zoom_levels: tuple[int, ...], + takeoff_origin: LatLonAlt | None, + flight_id: UUID | None, +) -> str: + """SHA-256 of the canonical build-identity JSON. + + Engine identity is ``(engine_path_str, sha256_hex)`` because path + encodes the AZ-281 filename schema fields (model_name, sm, + jetpack, trt, precision) modulo the precision axis (which fp16 vs + int8 makes load-bearing). ``takeoff_origin`` (CP-INV-8) and + ``flight_id`` (ADR-010) are first-class identity fields — a + re-planned route invalidates the cached build. + """ + + model_ids = sorted( + ( + str(entry.engine_path), + entry.sha256_hex, + ) + for entry in engine_entries + ) + origin_tuple: tuple[float, float, float] | None + if takeoff_origin is not None: + origin_tuple = ( + round(takeoff_origin.lat_deg, TAKEOFF_ORIGIN_DECIMALS), + round(takeoff_origin.lon_deg, TAKEOFF_ORIGIN_DECIMALS), + round(takeoff_origin.alt_m, TAKEOFF_ORIGIN_DECIMALS), + ) + else: + origin_tuple = None + build_identity = { + "model_ids": [list(entry) for entry in model_ids], + "calibration_sha256": calibration_sha256, + "descriptor_index_sha256": descriptor_index_sha256, + "tiles_coverage_sha256": tiles_coverage_sha256, + "sector_class": sector_class, + "bbox": [ + bbox.min_lat_deg, + bbox.min_lon_deg, + bbox.max_lat_deg, + bbox.max_lon_deg, + ], + "zoom_levels": sorted(zoom_levels), + "takeoff_origin": list(origin_tuple) if origin_tuple is not None else None, + "flight_id": str(flight_id) if flight_id is not None else None, + } + canonical = orjson.dumps(build_identity, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(canonical).hexdigest() diff --git a/src/gps_denied_onboard/components/c10_provisioning/manifest_builder.py b/src/gps_denied_onboard/components/c10_provisioning/manifest_builder.py index e4f3d69..26780b2 100644 --- a/src/gps_denied_onboard/components/c10_provisioning/manifest_builder.py +++ b/src/gps_denied_onboard/components/c10_provisioning/manifest_builder.py @@ -34,6 +34,11 @@ from cryptography.hazmat.primitives.serialization import load_pem_private_key from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt from gps_denied_onboard._types.inference import EngineCacheEntry from gps_denied_onboard.clock import Clock +from gps_denied_onboard.components.c10_provisioning._canonical_hash import ( + TileHashRecord, + aggregate_tile_hash, + compute_manifest_hash, +) from gps_denied_onboard.components.c10_provisioning.config import ( C10ManifestConfig, SigningMode, @@ -56,12 +61,10 @@ __all__ = [ "ManifestArtifact", "ManifestBuildInput", "ManifestBuilder", - "TileHashRecord", "TilesByBboxQuery", ] _BUILD_LOG_KIND_PREFIX = "c10.manifest" -_TAKEOFF_ORIGIN_DECIMALS = 9 _MANIFEST_FILENAME = "Manifest.json" _SIGNATURE_FILENAME = "Manifest.json.sig" _ED25519_PUBKEY_BYTES = 32 @@ -72,24 +75,6 @@ VALID_SECTOR_CLASSES: frozenset[str] = frozenset( ) -@dataclass(frozen=True) -class TileHashRecord: - """Consumer-side DTO carrying the four sort keys + the per-tile digest. - - AZ-323 only needs ``(zoom, lat, lon, source)`` for canonical - ordering and ``sha256_hex`` for the aggregate hash. The - composition-root adapter wraps C6's ``TileMetadata`` rows into - this shape so the AZ-270 lint stays green (no - ``components.c6_tile_cache`` import from C10). - """ - - zoom: int - lat: float - lon: float - source: str - sha256_hex: str - - @runtime_checkable class TilesByBboxQuery(Protocol): """Consumer-side structural cut over C6's ``TileMetadataStore``. @@ -294,7 +279,7 @@ class ManifestBuilder: zoom_levels=request.zoom_levels, sector_class=request.sector_class, ) - tiles_coverage_sha256 = _aggregate_tile_hash(sorted_tiles) + tiles_coverage_sha256 = aggregate_tile_hash(sorted_tiles) engine_artifacts = tuple( { @@ -304,7 +289,7 @@ class ManifestBuilder: for entry in request.engine_entries ) - manifest_hash = _compute_manifest_hash( + manifest_hash = compute_manifest_hash( engine_entries=request.engine_entries, calibration_sha256=calibration_sha256, descriptor_index_sha256=descriptor_index_sha256, @@ -589,18 +574,6 @@ class ManifestBuilder: ) from exc -def _aggregate_tile_hash(records: tuple[TileHashRecord, ...]) -> str: - hasher = hashlib.sha256() - for r in records: - hasher.update( - ( - f"z{r.zoom}|lat{r.lat:.9f}|lon{r.lon:.9f}|src{r.source}" - f":{r.sha256_hex}\n" - ).encode("ascii") - ) - return hasher.hexdigest() - - def _canonical_json_with_trailing_newline(payload: dict[str, object]) -> bytes: body = orjson.dumps( payload, @@ -611,58 +584,6 @@ def _canonical_json_with_trailing_newline(payload: dict[str, object]) -> bytes: return body -def _compute_manifest_hash( - *, - engine_entries: tuple[EngineCacheEntry, ...], - calibration_sha256: str, - descriptor_index_sha256: str, - tiles_coverage_sha256: str, - sector_class: str, - bbox: BoundingBox, - zoom_levels: tuple[int, ...], - takeoff_origin: LatLonAlt | None, - flight_id: UUID | None, -) -> str: - # Engine identity is `(model_name, precision, sm, jetpack, trt, sha256)` - # so a stale-host fp16 build never collides with a fresh int8 build — - # this matches the AZ-281 filename schema fields modulo the precision - # axis (which fp16 vs int8 makes load-bearing). - model_ids = sorted( - ( - str(entry.engine_path), - entry.sha256_hex, - ) - for entry in engine_entries - ) - origin_tuple: tuple[float, float, float] | None - if takeoff_origin is not None: - origin_tuple = ( - round(takeoff_origin.lat_deg, _TAKEOFF_ORIGIN_DECIMALS), - round(takeoff_origin.lon_deg, _TAKEOFF_ORIGIN_DECIMALS), - round(takeoff_origin.alt_m, _TAKEOFF_ORIGIN_DECIMALS), - ) - else: - origin_tuple = None - build_identity = { - "model_ids": [list(entry) for entry in model_ids], - "calibration_sha256": calibration_sha256, - "descriptor_index_sha256": descriptor_index_sha256, - "tiles_coverage_sha256": tiles_coverage_sha256, - "sector_class": sector_class, - "bbox": [ - bbox.min_lat_deg, - bbox.min_lon_deg, - bbox.max_lat_deg, - bbox.max_lon_deg, - ], - "zoom_levels": sorted(zoom_levels), - "takeoff_origin": list(origin_tuple) if origin_tuple is not None else None, - "flight_id": str(flight_id) if flight_id is not None else None, - } - canonical = orjson.dumps(build_identity, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(canonical).hexdigest() - - def _ns_to_iso_utc(time_ns: int) -> str: """Format ns-since-epoch as RFC 3339 UTC with second precision. diff --git a/src/gps_denied_onboard/components/c10_provisioning/manifest_verifier.py b/src/gps_denied_onboard/components/c10_provisioning/manifest_verifier.py index 0e53593..9323e70 100644 --- a/src/gps_denied_onboard/components/c10_provisioning/manifest_verifier.py +++ b/src/gps_denied_onboard/components/c10_provisioning/manifest_verifier.py @@ -32,9 +32,11 @@ from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt from gps_denied_onboard.clock import Clock +from gps_denied_onboard.components.c10_provisioning._canonical_hash import ( + aggregate_tile_hash, +) from gps_denied_onboard.components.c10_provisioning.manifest_builder import ( TilesByBboxQuery, - _aggregate_tile_hash, ) from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar @@ -444,7 +446,7 @@ class ManifestVerifierImpl: records = tuple( sorted(records, key=lambda r: (r.zoom, r.lat, r.lon, r.source)) ) - computed = _aggregate_tile_hash(records) + computed = aggregate_tile_hash(records) except Exception as exc: per_artifact_checks.append( ArtifactCheck( diff --git a/src/gps_denied_onboard/components/c10_provisioning/provisioner.py b/src/gps_denied_onboard/components/c10_provisioning/provisioner.py index f55e37e..da8a327 100644 --- a/src/gps_denied_onboard/components/c10_provisioning/provisioner.py +++ b/src/gps_denied_onboard/components/c10_provisioning/provisioner.py @@ -40,14 +40,13 @@ Cross-component imports: this module never imports (``runtime_root.c10_factory.build_cache_provisioner``) wires the real C6 store into the same adapter the AZ-323 builder consumes. -The build-identity hash formula matches AZ-323's -``_compute_manifest_hash`` byte-for-byte; both modules import the -canonical helper (currently a leading-underscore export from -``manifest_builder``). Cumulative-review Finding F1 (carryover from -batches 31–33) tracks promoting the helper to a shared -``_build_identity`` module so AZ-323 / AZ-324 / AZ-325 share a single -definition; that hygiene PBI is intentionally deferred — the import -is documented here so a reader sees the intent. +The build-identity hash formula matches AZ-323's emitted +``build.manifest_hash`` byte-for-byte. AZ-323 / AZ-324 / AZ-325 all +share a single definition by importing :func:`aggregate_tile_hash` and +:func:`compute_manifest_hash` from +``components.c10_provisioning._canonical_hash``. Resolves cumulative- +review Finding F1 (batches 34–36) — the verifier and provisioner used +to import leading-underscore privates from ``manifest_builder``. """ from __future__ import annotations @@ -89,13 +88,15 @@ from gps_denied_onboard.components.c10_provisioning.interface import ( BuildRequest, FileLockFactory, ) +from gps_denied_onboard.components.c10_provisioning._canonical_hash import ( + TileHashRecord, + aggregate_tile_hash, + compute_manifest_hash, +) from gps_denied_onboard.components.c10_provisioning.manifest_builder import ( ManifestBuildInput, ManifestBuilder, - TileHashRecord, TilesByBboxQuery, - _aggregate_tile_hash, - _compute_manifest_hash, ) from gps_denied_onboard.helpers.engine_filename_schema import ( EngineFilenameSchema, @@ -574,9 +575,9 @@ class CacheProvisionerImpl: return None calibration_sha256 = hashlib.sha256(calibration_bytes).hexdigest() - tiles_coverage_sha256 = _aggregate_tile_hash(sorted_tiles) + tiles_coverage_sha256 = aggregate_tile_hash(sorted_tiles) - request_hash = _compute_manifest_hash( + request_hash = compute_manifest_hash( engine_entries=tuple(engine_entries), calibration_sha256=calibration_sha256, descriptor_index_sha256=descriptor_index_sha256,