mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 21:31:13 +00:00
[AZ-304] C6 Postgres schema: additive 0002 migration + UUIDv5
Strictly additive Alembic migration on the AZ-263 baseline (data_model
.md § 6.1 / § 6.3): six new tiles columns (tile_uuid UNIQUE,
location_hash, content_sha256, disk_bytes, accessed_at, uploaded_at),
four new btree indices, one UNIQUE expression index over the
COALESCE-zero-uuid natural key, CHECK widening of
ck_tiles_freshness_status to the AZ-263 + AZ-303 vocabulary UNION,
four NULLable bbox columns on sector_classifications, and a new
tile_freshness_rules table seeded with the two default thresholds.
Pinned UUIDv5 namespace (TILE_NAMESPACE_UUID =
5b8d0c2e-1a4f-4b3a-8c9d-e7f6a3b2c1d0) + derive_tile_id /
derive_location_hash helpers cross-coordinated with
satellite-provider. Migration runner apply_migrations(config) drives
Alembic command.upgrade("head") against the AZ-263 env with one
retry on PG SQLSTATE 40001 and structured INFO logs on apply / no-op.
Contract bump tile_metadata_store.md v1.1.0 -> v1.2.0 adds
TileMetadata.location_hash: UUID | None = None (non-breaking).
module-layout.md updated so c6_tile_cache explicitly Owns
db/migrations/**.
Tier-1 tests: UUIDv5 determinism + locked vectors + DSN resolution +
retry mocked DBAPIError -> 1180 passed, 32 skipped. Tier-2 docker
schema tests gated by @pytest.mark.docker run against the existing
docker-compose.test.yml db service.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -17,6 +17,7 @@ from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
__all__ = [
|
||||
"Bbox",
|
||||
@@ -181,6 +182,12 @@ class TileMetadata:
|
||||
companion_id: str | None
|
||||
quality_metadata: TileQualityMetadata | None
|
||||
voting_status: VotingStatus
|
||||
# AZ-304: deterministic per-cell-bag identifier (`uuidv5` over
|
||||
# ``(zoom, tile_x, tile_y)`` from :mod:`_uuid_namespace`). Defaults to
|
||||
# ``None`` so existing AZ-303-era constructors keep working; the
|
||||
# ``PostgresFilesystemStore`` derives the value at insert time when
|
||||
# ``None`` and the DB NOT-NULL constraint is the safety net.
|
||||
location_hash: UUID | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
"""C6 tile-cache UUIDv5 namespace + derivation helpers (AZ-304).
|
||||
|
||||
This module is the *cross-repo coordination point* between
|
||||
``gps-denied-onboard`` (Python) and ``satellite-provider`` (C#) for the
|
||||
deterministic per-tile and per-cell-bag identifiers used in:
|
||||
|
||||
- the ``tiles.tile_uuid`` column (per-``(zoom, tile_x, tile_y, source, flight_id)``
|
||||
identity);
|
||||
- the ``tiles.location_hash`` column (per-``(zoom, tile_x, tile_y)`` cell-bag
|
||||
identifier shared across sources and flights — Scenario 1 UI lookup,
|
||||
Scenario 6 voting query of the 2026-05-12 tile-schema scenario analysis).
|
||||
|
||||
The pinned :data:`TILE_NAMESPACE_UUID` constant MUST be byte-identical to
|
||||
the corresponding C# constant in
|
||||
``satellite-provider/SatelliteProvider.Common/Utils/Uuidv5.cs`` per
|
||||
``AZ-TBD_tile_identity_uuidv5_bulk_list``. Changing this value invalidates
|
||||
every existing tile identifier on both sides and requires a coordinated
|
||||
cross-repo release.
|
||||
|
||||
Name format passed into :func:`uuid.uuid5`:
|
||||
|
||||
- :func:`derive_tile_id`:
|
||||
``"{zoom_level}/{tile_x}/{tile_y}/{source}/{flight_id}"`` with
|
||||
``flight_id`` rendered as the canonical 8-4-4-4-12 lowercase UUID
|
||||
string. ``None`` collapses to ``"00000000-0000-0000-0000-000000000000"``
|
||||
so per-source googlemaps tiles (no flight) yield a single deterministic
|
||||
identity per cell + source.
|
||||
- :func:`derive_location_hash`: ``"{zoom_level}/{tile_x}/{tile_y}"`` — no
|
||||
source, no flight; shared across all rows for the cell.
|
||||
|
||||
The ``name`` is UTF-8 encoded inside :func:`uuid.uuid5` (CPython
|
||||
``uuid.py`` ``_uuid_generate_random`` path). The satellite-provider C#
|
||||
implementation MUST use the same UTF-8 encoding for the locked test
|
||||
vectors in ``tests/unit/c6_tile_cache/test_uuid_namespace.py`` to match
|
||||
byte-for-byte.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from uuid import UUID, uuid5
|
||||
|
||||
TILE_NAMESPACE_UUID: UUID = UUID("5b8d0c2e-1a4f-4b3a-8c9d-e7f6a3b2c1d0")
|
||||
"""Pinned cross-repo UUIDv5 namespace; DO NOT regenerate (AZ-304 § Constraints)."""
|
||||
|
||||
_ZERO_UUID: UUID = UUID("00000000-0000-0000-0000-000000000000")
|
||||
|
||||
|
||||
def _normalize_source(source: object) -> str:
|
||||
if isinstance(source, Enum):
|
||||
value = source.value
|
||||
else:
|
||||
value = source
|
||||
if not isinstance(value, str):
|
||||
raise TypeError(
|
||||
"derive_tile_id: source must be a str-Enum (TileSource) or str; "
|
||||
f"got {type(source).__name__}"
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
def _normalize_flight_id(flight_id: object) -> str:
|
||||
if flight_id is None:
|
||||
return str(_ZERO_UUID)
|
||||
if isinstance(flight_id, UUID):
|
||||
return str(flight_id)
|
||||
if isinstance(flight_id, str):
|
||||
return str(UUID(flight_id))
|
||||
raise TypeError(
|
||||
f"derive_tile_id: flight_id must be UUID, str, or None; got {type(flight_id).__name__}"
|
||||
)
|
||||
|
||||
|
||||
def derive_tile_id(
|
||||
zoom_level: int,
|
||||
tile_x: int,
|
||||
tile_y: int,
|
||||
source: object,
|
||||
flight_id: object,
|
||||
) -> UUID:
|
||||
"""Compute the deterministic per-row ``tile_uuid``.
|
||||
|
||||
See module docstring for the name format and cross-repo invariants.
|
||||
"""
|
||||
source_str = _normalize_source(source)
|
||||
flight_str = _normalize_flight_id(flight_id)
|
||||
name = f"{int(zoom_level)}/{int(tile_x)}/{int(tile_y)}/{source_str}/{flight_str}"
|
||||
return uuid5(TILE_NAMESPACE_UUID, name)
|
||||
|
||||
|
||||
def derive_location_hash(zoom_level: int, tile_x: int, tile_y: int) -> UUID:
|
||||
"""Compute the per-cell-bag ``location_hash`` (AC-11 invariant)."""
|
||||
name = f"{int(zoom_level)}/{int(tile_x)}/{int(tile_y)}"
|
||||
return uuid5(TILE_NAMESPACE_UUID, name)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TILE_NAMESPACE_UUID",
|
||||
"derive_location_hash",
|
||||
"derive_tile_id",
|
||||
]
|
||||
@@ -0,0 +1,236 @@
|
||||
"""C6 tile-cache Alembic migration runner (AZ-304).
|
||||
|
||||
Public surface:
|
||||
|
||||
- :class:`MigrationResult` — frozen dataclass describing the outcome of a
|
||||
single :func:`apply_migrations` invocation.
|
||||
- :func:`apply_migrations` — invoked by the composition root at startup
|
||||
AFTER config load and BEFORE ``PostgresFilesystemStore`` construction.
|
||||
- :class:`MigrationError` — raised on terminal migration failure;
|
||||
deliberately NOT a member of the :class:`TileCacheError` family
|
||||
because migrations run before any runtime error consumer is wired.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- The runner drives Alembic's ``command.upgrade(cfg, "head")`` against the
|
||||
project-pinned env at ``db/migrations/`` (AZ-263 bootstrap). The env
|
||||
already wires ``engine_from_config(..., poolclass=pool.NullPool)`` so
|
||||
no separate connection pool is needed at apply time.
|
||||
- Connection resolution: prefer ``config.c6_tile_cache.postgres_dsn`` if
|
||||
non-empty; else fall back to the ``DB_URL`` env var (matches the
|
||||
fallback in ``db/migrations/env.py``).
|
||||
- One transient retry on serialization failures (PG SQLSTATE ``40001``) —
|
||||
more conservative than the AZ-263 bootstrap because migrations are a
|
||||
one-shot startup step where a second loud failure is the right signal.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from alembic import command
|
||||
from alembic.config import Config as AlembicConfig
|
||||
from alembic.runtime.migration import MigrationContext
|
||||
from alembic.script import ScriptDirectory
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.exc import DBAPIError, OperationalError
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache._uuid_namespace import (
|
||||
TILE_NAMESPACE_UUID,
|
||||
)
|
||||
from gps_denied_onboard.logging.structured import get_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
|
||||
_LOGGER = get_logger("c6_tile_cache.migrations")
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[4]
|
||||
_ALEMBIC_INI = _PROJECT_ROOT / "alembic.ini"
|
||||
_ALEMBIC_SCRIPT_LOCATION = _PROJECT_ROOT / "db" / "migrations"
|
||||
|
||||
|
||||
class MigrationError(RuntimeError):
|
||||
"""Terminal failure applying c6_tile_cache Alembic migrations.
|
||||
|
||||
NOT a member of the ``TileCacheError`` family by design: migrations
|
||||
run during composition-root startup, before any runtime error
|
||||
consumer (`c6_tile_cache.errors.TileCacheError`) exists.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MigrationResult:
|
||||
"""Outcome of one :func:`apply_migrations` invocation."""
|
||||
|
||||
applied: list[str]
|
||||
current_revision: str
|
||||
no_op: bool
|
||||
|
||||
|
||||
def _resolve_dsn(config: Config) -> str:
|
||||
block = config.components.get("c6_tile_cache")
|
||||
dsn = getattr(block, "postgres_dsn", "") if block is not None else ""
|
||||
if dsn:
|
||||
return dsn
|
||||
env_dsn = os.environ.get("DB_URL")
|
||||
if env_dsn:
|
||||
return env_dsn
|
||||
raise MigrationError(
|
||||
"c6_tile_cache.apply_migrations: no DSN available — "
|
||||
"set config.components['c6_tile_cache'].postgres_dsn or "
|
||||
"the DB_URL environment variable"
|
||||
)
|
||||
|
||||
|
||||
def _to_sqlalchemy_url(raw_dsn: str) -> str:
|
||||
"""Normalise ``postgresql://`` → ``postgresql+psycopg://`` (psycopg3 driver).
|
||||
|
||||
Matches the same transformation ``db/migrations/env.py`` applies on
|
||||
the runtime path; doing it here as well so the pre-flight current-rev
|
||||
probe uses the same driver.
|
||||
"""
|
||||
if raw_dsn.startswith("postgresql://"):
|
||||
return raw_dsn.replace("postgresql://", "postgresql+psycopg://", 1)
|
||||
return raw_dsn
|
||||
|
||||
|
||||
def _alembic_config(sqlalchemy_url: str) -> AlembicConfig:
|
||||
cfg = AlembicConfig(str(_ALEMBIC_INI))
|
||||
cfg.set_main_option("script_location", str(_ALEMBIC_SCRIPT_LOCATION))
|
||||
cfg.set_main_option("sqlalchemy.url", sqlalchemy_url)
|
||||
return cfg
|
||||
|
||||
|
||||
def _current_revision(sqlalchemy_url: str) -> str | None:
|
||||
engine = create_engine(sqlalchemy_url, poolclass=None)
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
context = MigrationContext.configure(conn)
|
||||
return context.get_current_revision()
|
||||
finally:
|
||||
engine.dispose()
|
||||
|
||||
|
||||
def _head_revision(cfg: AlembicConfig) -> str:
|
||||
script = ScriptDirectory.from_config(cfg)
|
||||
head = script.get_current_head()
|
||||
if head is None:
|
||||
raise MigrationError(
|
||||
"c6_tile_cache.apply_migrations: Alembic script directory has no head; "
|
||||
f"check {_ALEMBIC_SCRIPT_LOCATION}"
|
||||
)
|
||||
return head
|
||||
|
||||
|
||||
def _is_serialization_failure(exc: BaseException) -> bool:
|
||||
"""SQLSTATE 40001 — PG serialization conflict.
|
||||
|
||||
SQLAlchemy wraps psycopg's ``SerializationFailure`` inside
|
||||
:class:`sqlalchemy.exc.DBAPIError`; we duck-type rather than import
|
||||
``psycopg.errors`` to keep this module independent of the underlying
|
||||
driver package layout.
|
||||
"""
|
||||
if isinstance(exc, DBAPIError):
|
||||
sqlstate = getattr(getattr(exc, "orig", None), "sqlstate", None)
|
||||
return sqlstate == "40001"
|
||||
return getattr(exc, "sqlstate", None) == "40001"
|
||||
|
||||
|
||||
def apply_migrations(config: Config) -> MigrationResult:
|
||||
"""Apply pending c6_tile_cache Alembic migrations against the configured DB.
|
||||
|
||||
Returns a :class:`MigrationResult` describing what was applied. Logs
|
||||
an INFO record on every revision applied; logs an INFO ``no_op``
|
||||
record when the DB is already at head. Retries once on a PG
|
||||
serialization failure (SQLSTATE 40001); raises :class:`MigrationError`
|
||||
on the second failure or any other terminal error.
|
||||
|
||||
The :data:`TILE_NAMESPACE_UUID` value is included in the structured
|
||||
log payload on every apply / no-op so post-mortem drift detection is
|
||||
trivial against the satellite-provider C# constant.
|
||||
"""
|
||||
raw_dsn = _resolve_dsn(config)
|
||||
sqlalchemy_url = _to_sqlalchemy_url(raw_dsn)
|
||||
cfg = _alembic_config(sqlalchemy_url)
|
||||
head_rev = _head_revision(cfg)
|
||||
|
||||
last_exc: BaseException | None = None
|
||||
for attempt in (0, 1):
|
||||
try:
|
||||
pre_rev = _current_revision(sqlalchemy_url)
|
||||
if pre_rev == head_rev:
|
||||
_LOGGER.info(
|
||||
"c6.migrations: no-op (already at head)",
|
||||
extra={
|
||||
"kind": "c6.migration.no_op",
|
||||
"kv": {
|
||||
"current_revision": head_rev,
|
||||
"namespace_uuid": str(TILE_NAMESPACE_UUID),
|
||||
},
|
||||
},
|
||||
)
|
||||
return MigrationResult(applied=[], current_revision=head_rev, no_op=True)
|
||||
|
||||
command.upgrade(cfg, "head")
|
||||
post_rev = _current_revision(sqlalchemy_url) or head_rev
|
||||
applied = _resolve_applied(cfg, pre_rev, post_rev)
|
||||
_LOGGER.info(
|
||||
"c6.migrations: applied",
|
||||
extra={
|
||||
"kind": "c6.migration.applied",
|
||||
"kv": {
|
||||
"revisions": applied,
|
||||
"from_revision": pre_rev,
|
||||
"to_revision": post_rev,
|
||||
"namespace_uuid": str(TILE_NAMESPACE_UUID),
|
||||
},
|
||||
},
|
||||
)
|
||||
return MigrationResult(applied=applied, current_revision=post_rev, no_op=False)
|
||||
except (DBAPIError, OperationalError) as exc:
|
||||
last_exc = exc
|
||||
if _is_serialization_failure(exc) and attempt == 0:
|
||||
# PG SQLSTATE 40001 — serialization conflict. Retry immediately:
|
||||
# migrations run as a one-shot startup step before the injected
|
||||
# Clock is constructed, so we cannot use `Clock.sleep`. Components
|
||||
# are forbidden from using `time.sleep` directly (replay-determinism
|
||||
# Invariant 2 / `tests/_meta/test_no_direct_time_in_components.py`),
|
||||
# and a 0-50 ms backoff buys nothing here in practice: Alembic's
|
||||
# NullPool starts a fresh connection on retry which already
|
||||
# introduces enough natural jitter.
|
||||
continue
|
||||
raise MigrationError(f"c6_tile_cache.apply_migrations: database error: {exc}") from exc
|
||||
except Exception as exc:
|
||||
raise MigrationError(f"c6_tile_cache.apply_migrations: terminal error: {exc}") from exc
|
||||
|
||||
# Defensive: loop must either return or raise; if we ever fall out, surface it.
|
||||
raise MigrationError(
|
||||
f"c6_tile_cache.apply_migrations: exhausted retry attempts (last_exc={last_exc!r})"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_applied(cfg: AlembicConfig, pre_rev: str | None, post_rev: str) -> list[str]:
|
||||
"""Enumerate revisions between ``pre_rev`` (exclusive) and ``post_rev`` (inclusive)."""
|
||||
if pre_rev == post_rev:
|
||||
return []
|
||||
script = ScriptDirectory.from_config(cfg)
|
||||
descending = [r.revision for r in script.walk_revisions(base="base", head=post_rev)]
|
||||
if pre_rev is None:
|
||||
return list(reversed(descending))
|
||||
try:
|
||||
pre_idx = descending.index(pre_rev)
|
||||
except ValueError:
|
||||
# pre_rev is no longer in the chain (e.g., squashed) — return everything we walked.
|
||||
return list(reversed(descending))
|
||||
return list(reversed(descending[:pre_idx]))
|
||||
|
||||
|
||||
__all__ = [
|
||||
"MigrationError",
|
||||
"MigrationResult",
|
||||
"apply_migrations",
|
||||
]
|
||||
Reference in New Issue
Block a user