[AZ-515] Extract C10 canonical hash helpers to shared module

Cumulative-review F1 (batches 34-36, carried into batch 37): both
manifest_verifier.py (AZ-324) and provisioner.py (AZ-325) imported
leading-underscore privates _aggregate_tile_hash + _compute_manifest_hash
from manifest_builder.py (AZ-323). The helpers encode the trust-chain
formula shared across all three components; the import shape gave
readers no static signal that a refactor would silently break two
modules.

Move the formula into c10_provisioning/_canonical_hash.py:

- TileHashRecord (moved from manifest_builder)
- aggregate_tile_hash (renamed, public)
- compute_manifest_hash (renamed, public)
- TAKEOFF_ORIGIN_DECIMALS constant (moved)

Callers updated to import directly from _canonical_hash. Bodies
unchanged; manifest hashes are byte-for-byte identical.

Tests: c10_provisioning suite 86/86 pass; full project 1370/1370 pass.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-13 05:24:06 +03:00
parent a9c8d60087
commit ca0430a44d
5 changed files with 183 additions and 102 deletions
@@ -11,6 +11,11 @@ them through this single contract surface.
from gps_denied_onboard._types.inference import EngineCacheEntry
from gps_denied_onboard._types.manifests import Manifest
from gps_denied_onboard.components.c10_provisioning._canonical_hash import (
TileHashRecord,
aggregate_tile_hash,
compute_manifest_hash,
)
from gps_denied_onboard.components.c10_provisioning.c7_engine_embedder import (
C7EngineBackboneEmbedder,
)
@@ -66,7 +71,6 @@ from gps_denied_onboard.components.c10_provisioning.manifest_builder import (
ManifestArtifact,
ManifestBuilder,
ManifestBuildInput,
TileHashRecord,
TilesByBboxQuery,
)
from gps_denied_onboard.components.c10_provisioning.manifest_verifier import (
@@ -140,4 +144,6 @@ __all__ = [
"VerificationResult",
"VerifyFailReason",
"VerifyOutcome",
"aggregate_tile_hash",
"compute_manifest_hash",
]
@@ -0,0 +1,151 @@
"""Canonical build-identity hash — shared between AZ-323 / AZ-324 / AZ-325.
The build-identity hash is the trust-chain glue that lets three
independently-built C10 components agree byte-for-byte on whether two
build inputs are equivalent:
* :class:`ManifestBuilder` (AZ-323) emits the hash into
``Manifest.json``'s ``build.manifest_hash`` field.
* :class:`ManifestVerifier` (AZ-324) recomputes the tile-coverage
aggregate to confirm the on-disk Manifest still matches the C6 corpus.
* :class:`CacheProvisionerImpl` (AZ-325) recomputes the full hash to
decide whether a warm re-run is idempotent.
Living in its own intra-component module makes that contract status
explicit. Resolves cumulative-review Finding F1 (batches 3436) — the
verifier and provisioner used to import leading-underscore privates
from :mod:`.manifest_builder`, leaving readers no static signal that a
refactor of the builder's hash format would silently break two other
modules.
The exported surface is intentionally narrow:
* :class:`TileHashRecord` — the consumer-side DTO carrying the four
sort keys + per-tile digest.
* :func:`aggregate_tile_hash` — canonical SHA-256 over the sorted
``TileHashRecord`` sequence.
* :func:`compute_manifest_hash` — canonical SHA-256 over the
build-identity tuple (engines + calibration + descriptor index +
tiles coverage + sector + bbox + zooms + takeoff origin + flight ID).
Any change to the formats below is a breaking change to the cache
identity; bump :class:`ManifestArtifact.build.manifest_hash`'s schema
version in lockstep with the verifier and provisioner.
"""
from __future__ import annotations
import hashlib
from dataclasses import dataclass
from uuid import UUID
import orjson
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
from gps_denied_onboard._types.inference import EngineCacheEntry
__all__ = [
"TAKEOFF_ORIGIN_DECIMALS",
"TileHashRecord",
"aggregate_tile_hash",
"compute_manifest_hash",
]
TAKEOFF_ORIGIN_DECIMALS = 9
@dataclass(frozen=True)
class TileHashRecord:
"""Consumer-side DTO carrying the four sort keys + per-tile digest.
AZ-323 only needs ``(zoom, lat, lon, source)`` for canonical
ordering and ``sha256_hex`` for the aggregate hash. The
composition-root adapter wraps C6's ``TileMetadata`` rows into
this shape so the AZ-270 lint stays green (no
``components.c6_tile_cache`` import from C10).
"""
zoom: int
lat: float
lon: float
source: str
sha256_hex: str
def aggregate_tile_hash(records: tuple[TileHashRecord, ...]) -> str:
"""SHA-256 over the canonical newline-delimited tile encoding.
Records MUST be pre-sorted by ``(zoom, lat, lon, source)``; the
helper does NOT re-sort because callers in different invariants
sort in different scopes (verifier vs. provisioner). The encoding
matches the byte sequence AZ-323 first emitted; changing the
format here breaks every Manifest already on disk.
"""
hasher = hashlib.sha256()
for r in records:
hasher.update(
(
f"z{r.zoom}|lat{r.lat:.9f}|lon{r.lon:.9f}|src{r.source}"
f":{r.sha256_hex}\n"
).encode("ascii")
)
return hasher.hexdigest()
def compute_manifest_hash(
*,
engine_entries: tuple[EngineCacheEntry, ...],
calibration_sha256: str,
descriptor_index_sha256: str,
tiles_coverage_sha256: str,
sector_class: str,
bbox: BoundingBox,
zoom_levels: tuple[int, ...],
takeoff_origin: LatLonAlt | None,
flight_id: UUID | None,
) -> str:
"""SHA-256 of the canonical build-identity JSON.
Engine identity is ``(engine_path_str, sha256_hex)`` because path
encodes the AZ-281 filename schema fields (model_name, sm,
jetpack, trt, precision) modulo the precision axis (which fp16 vs
int8 makes load-bearing). ``takeoff_origin`` (CP-INV-8) and
``flight_id`` (ADR-010) are first-class identity fields — a
re-planned route invalidates the cached build.
"""
model_ids = sorted(
(
str(entry.engine_path),
entry.sha256_hex,
)
for entry in engine_entries
)
origin_tuple: tuple[float, float, float] | None
if takeoff_origin is not None:
origin_tuple = (
round(takeoff_origin.lat_deg, TAKEOFF_ORIGIN_DECIMALS),
round(takeoff_origin.lon_deg, TAKEOFF_ORIGIN_DECIMALS),
round(takeoff_origin.alt_m, TAKEOFF_ORIGIN_DECIMALS),
)
else:
origin_tuple = None
build_identity = {
"model_ids": [list(entry) for entry in model_ids],
"calibration_sha256": calibration_sha256,
"descriptor_index_sha256": descriptor_index_sha256,
"tiles_coverage_sha256": tiles_coverage_sha256,
"sector_class": sector_class,
"bbox": [
bbox.min_lat_deg,
bbox.min_lon_deg,
bbox.max_lat_deg,
bbox.max_lon_deg,
],
"zoom_levels": sorted(zoom_levels),
"takeoff_origin": list(origin_tuple) if origin_tuple is not None else None,
"flight_id": str(flight_id) if flight_id is not None else None,
}
canonical = orjson.dumps(build_identity, option=orjson.OPT_SORT_KEYS)
return hashlib.sha256(canonical).hexdigest()
@@ -34,6 +34,11 @@ from cryptography.hazmat.primitives.serialization import load_pem_private_key
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
from gps_denied_onboard._types.inference import EngineCacheEntry
from gps_denied_onboard.clock import Clock
from gps_denied_onboard.components.c10_provisioning._canonical_hash import (
TileHashRecord,
aggregate_tile_hash,
compute_manifest_hash,
)
from gps_denied_onboard.components.c10_provisioning.config import (
C10ManifestConfig,
SigningMode,
@@ -56,12 +61,10 @@ __all__ = [
"ManifestArtifact",
"ManifestBuildInput",
"ManifestBuilder",
"TileHashRecord",
"TilesByBboxQuery",
]
_BUILD_LOG_KIND_PREFIX = "c10.manifest"
_TAKEOFF_ORIGIN_DECIMALS = 9
_MANIFEST_FILENAME = "Manifest.json"
_SIGNATURE_FILENAME = "Manifest.json.sig"
_ED25519_PUBKEY_BYTES = 32
@@ -72,24 +75,6 @@ VALID_SECTOR_CLASSES: frozenset[str] = frozenset(
)
@dataclass(frozen=True)
class TileHashRecord:
"""Consumer-side DTO carrying the four sort keys + the per-tile digest.
AZ-323 only needs ``(zoom, lat, lon, source)`` for canonical
ordering and ``sha256_hex`` for the aggregate hash. The
composition-root adapter wraps C6's ``TileMetadata`` rows into
this shape so the AZ-270 lint stays green (no
``components.c6_tile_cache`` import from C10).
"""
zoom: int
lat: float
lon: float
source: str
sha256_hex: str
@runtime_checkable
class TilesByBboxQuery(Protocol):
"""Consumer-side structural cut over C6's ``TileMetadataStore``.
@@ -294,7 +279,7 @@ class ManifestBuilder:
zoom_levels=request.zoom_levels,
sector_class=request.sector_class,
)
tiles_coverage_sha256 = _aggregate_tile_hash(sorted_tiles)
tiles_coverage_sha256 = aggregate_tile_hash(sorted_tiles)
engine_artifacts = tuple(
{
@@ -304,7 +289,7 @@ class ManifestBuilder:
for entry in request.engine_entries
)
manifest_hash = _compute_manifest_hash(
manifest_hash = compute_manifest_hash(
engine_entries=request.engine_entries,
calibration_sha256=calibration_sha256,
descriptor_index_sha256=descriptor_index_sha256,
@@ -589,18 +574,6 @@ class ManifestBuilder:
) from exc
def _aggregate_tile_hash(records: tuple[TileHashRecord, ...]) -> str:
hasher = hashlib.sha256()
for r in records:
hasher.update(
(
f"z{r.zoom}|lat{r.lat:.9f}|lon{r.lon:.9f}|src{r.source}"
f":{r.sha256_hex}\n"
).encode("ascii")
)
return hasher.hexdigest()
def _canonical_json_with_trailing_newline(payload: dict[str, object]) -> bytes:
body = orjson.dumps(
payload,
@@ -611,58 +584,6 @@ def _canonical_json_with_trailing_newline(payload: dict[str, object]) -> bytes:
return body
def _compute_manifest_hash(
*,
engine_entries: tuple[EngineCacheEntry, ...],
calibration_sha256: str,
descriptor_index_sha256: str,
tiles_coverage_sha256: str,
sector_class: str,
bbox: BoundingBox,
zoom_levels: tuple[int, ...],
takeoff_origin: LatLonAlt | None,
flight_id: UUID | None,
) -> str:
# Engine identity is `(model_name, precision, sm, jetpack, trt, sha256)`
# so a stale-host fp16 build never collides with a fresh int8 build —
# this matches the AZ-281 filename schema fields modulo the precision
# axis (which fp16 vs int8 makes load-bearing).
model_ids = sorted(
(
str(entry.engine_path),
entry.sha256_hex,
)
for entry in engine_entries
)
origin_tuple: tuple[float, float, float] | None
if takeoff_origin is not None:
origin_tuple = (
round(takeoff_origin.lat_deg, _TAKEOFF_ORIGIN_DECIMALS),
round(takeoff_origin.lon_deg, _TAKEOFF_ORIGIN_DECIMALS),
round(takeoff_origin.alt_m, _TAKEOFF_ORIGIN_DECIMALS),
)
else:
origin_tuple = None
build_identity = {
"model_ids": [list(entry) for entry in model_ids],
"calibration_sha256": calibration_sha256,
"descriptor_index_sha256": descriptor_index_sha256,
"tiles_coverage_sha256": tiles_coverage_sha256,
"sector_class": sector_class,
"bbox": [
bbox.min_lat_deg,
bbox.min_lon_deg,
bbox.max_lat_deg,
bbox.max_lon_deg,
],
"zoom_levels": sorted(zoom_levels),
"takeoff_origin": list(origin_tuple) if origin_tuple is not None else None,
"flight_id": str(flight_id) if flight_id is not None else None,
}
canonical = orjson.dumps(build_identity, option=orjson.OPT_SORT_KEYS)
return hashlib.sha256(canonical).hexdigest()
def _ns_to_iso_utc(time_ns: int) -> str:
"""Format ns-since-epoch as RFC 3339 UTC with second precision.
@@ -32,9 +32,11 @@ from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
from gps_denied_onboard.clock import Clock
from gps_denied_onboard.components.c10_provisioning._canonical_hash import (
aggregate_tile_hash,
)
from gps_denied_onboard.components.c10_provisioning.manifest_builder import (
TilesByBboxQuery,
_aggregate_tile_hash,
)
from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar
@@ -444,7 +446,7 @@ class ManifestVerifierImpl:
records = tuple(
sorted(records, key=lambda r: (r.zoom, r.lat, r.lon, r.source))
)
computed = _aggregate_tile_hash(records)
computed = aggregate_tile_hash(records)
except Exception as exc:
per_artifact_checks.append(
ArtifactCheck(
@@ -40,14 +40,13 @@ Cross-component imports: this module never imports
(``runtime_root.c10_factory.build_cache_provisioner``) wires the real
C6 store into the same adapter the AZ-323 builder consumes.
The build-identity hash formula matches AZ-323's
``_compute_manifest_hash`` byte-for-byte; both modules import the
canonical helper (currently a leading-underscore export from
``manifest_builder``). Cumulative-review Finding F1 (carryover from
batches 3133) tracks promoting the helper to a shared
``_build_identity`` module so AZ-323 / AZ-324 / AZ-325 share a single
definition; that hygiene PBI is intentionally deferred — the import
is documented here so a reader sees the intent.
The build-identity hash formula matches AZ-323's emitted
``build.manifest_hash`` byte-for-byte. AZ-323 / AZ-324 / AZ-325 all
share a single definition by importing :func:`aggregate_tile_hash` and
:func:`compute_manifest_hash` from
``components.c10_provisioning._canonical_hash``. Resolves cumulative-
review Finding F1 (batches 3436) — the verifier and provisioner used
to import leading-underscore privates from ``manifest_builder``.
"""
from __future__ import annotations
@@ -89,13 +88,15 @@ from gps_denied_onboard.components.c10_provisioning.interface import (
BuildRequest,
FileLockFactory,
)
from gps_denied_onboard.components.c10_provisioning._canonical_hash import (
TileHashRecord,
aggregate_tile_hash,
compute_manifest_hash,
)
from gps_denied_onboard.components.c10_provisioning.manifest_builder import (
ManifestBuildInput,
ManifestBuilder,
TileHashRecord,
TilesByBboxQuery,
_aggregate_tile_hash,
_compute_manifest_hash,
)
from gps_denied_onboard.helpers.engine_filename_schema import (
EngineFilenameSchema,
@@ -574,9 +575,9 @@ class CacheProvisionerImpl:
return None
calibration_sha256 = hashlib.sha256(calibration_bytes).hexdigest()
tiles_coverage_sha256 = _aggregate_tile_hash(sorted_tiles)
tiles_coverage_sha256 = aggregate_tile_hash(sorted_tiles)
request_hash = _compute_manifest_hash(
request_hash = compute_manifest_hash(
engine_entries=tuple(engine_entries),
calibration_sha256=calibration_sha256,
descriptor_index_sha256=descriptor_index_sha256,