mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 18:31:13 +00:00
[AZ-308] c6 CacheBudgetEnforcer: 10 GB hard cap + LRU sweep
CacheBudgetEnforcer.reserve_headroom(needed_bytes) returns immediately when total_disk_bytes() + needed_bytes <= budget, otherwise iterates lru_candidates in eviction_batch_size batches, deletes via delete_tile, emits one INFO log per evicted tile (c6.evicted) and one FDR record per eviction batch (c6.eviction_batch, evicted_tile_ids capped to 5). Raises CacheBudgetExhaustedError AFTER a full sweep if the budget cannot be met. BudgetEnforcedTileStore decorates a TileStore so the policy stays separable from PostgresFilesystemStore. Composition root in storage_factory.build_tile_store wires the wrapper unconditionally. PostgresFilesystemStore now accepts lru_clock: Clock | None = None; when set, read_tile_pixels calls record_lru_access(tile_id, now) so eviction picks the right LRU candidates. Production wiring injects WallClock(); AZ-305 unit tests still construct without the clock and keep their pass-through semantics. Contract tile_store.md bumped to v1.1.0 to add CacheBudgetExhaustedError to the TileCacheError family; shared FDR schema bumped to v1.3.0 for the new c6.eviction_batch kind. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,448 @@
|
||||
"""C6 cache-budget enforcer (AZ-308).
|
||||
|
||||
RESTRICT-SAT-2 enforcement: the on-disk tile cache MUST NOT exceed a
|
||||
configurable hard cap (default 10 GiB). Every :meth:`TileStore.write_tile`
|
||||
goes through the :class:`BudgetEnforcedTileStore` decorator which calls
|
||||
:meth:`CacheBudgetEnforcer.reserve_headroom` BEFORE the underlying store
|
||||
writes a byte to disk. If the cap would be breached, the enforcer runs
|
||||
an LRU sweep using the store's :meth:`TileMetadataStore.lru_candidates`
|
||||
+ :meth:`TileStore.delete_tile` primitives (AZ-305) until enough
|
||||
head-room is freed; if even total eviction cannot fit ``needed_bytes``
|
||||
it raises :class:`CacheBudgetExhaustedError` AFTER the loop completes
|
||||
(partial eviction is preferable to no eviction — AC-5).
|
||||
|
||||
The enforcer is the SOLE eviction path during a flight: no other
|
||||
component evicts tiles (Reliability constraint of AZ-308). Per-eviction
|
||||
INFO logs (``kind="c6.evicted"``) carry the tile-level detail; the
|
||||
per-batch FDR record (``kind="c6.eviction_batch"``) is bounded to the
|
||||
first 5 evicted ids (AC-11) so the F4 producer never blows the FDR
|
||||
ring with a runaway sweep.
|
||||
|
||||
The decorator pattern is mandatory — moving the budget check inside
|
||||
``PostgresFilesystemStore.write_tile`` would couple policy to the
|
||||
filesystem impl and break the single-responsibility design that lets
|
||||
the store remain unit-testable in isolation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING, Final
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache._types import (
|
||||
TileId,
|
||||
TileMetadata,
|
||||
)
|
||||
from gps_denied_onboard.components.c6_tile_cache.errors import (
|
||||
CacheBudgetExhaustedError,
|
||||
TileCacheError,
|
||||
TileFsError,
|
||||
TileMetadataError,
|
||||
)
|
||||
from gps_denied_onboard.components.c6_tile_cache.interface import (
|
||||
TileMetadataStore,
|
||||
TileStore,
|
||||
)
|
||||
from gps_denied_onboard.fdr_client.records import CURRENT_SCHEMA_VERSION, FdrRecord
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.components.c6_tile_cache._tile_pixel_handle import (
|
||||
TilePixelHandle,
|
||||
)
|
||||
from gps_denied_onboard.fdr_client.client import FdrClient
|
||||
|
||||
__all__ = [
|
||||
"BudgetEnforcedTileStore",
|
||||
"CacheBudgetEnforcer",
|
||||
"EvictionResult",
|
||||
]
|
||||
|
||||
|
||||
_PRODUCER_ID: Final[str] = "c6_tile_cache.budget"
|
||||
_FDR_TILE_IDS_CAP: Final[int] = 5
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EvictionResult:
|
||||
"""Outcome of one :meth:`CacheBudgetEnforcer.reserve_headroom` call.
|
||||
|
||||
``evicted`` is the ordered list of :class:`TileMetadata` rows that
|
||||
were removed; empty if the no-eviction fast path fired. ``freed_bytes``
|
||||
is the integer total reclaimed (each candidate's ``disk_bytes``).
|
||||
"""
|
||||
|
||||
evicted: list[TileMetadata]
|
||||
freed_bytes: int
|
||||
|
||||
|
||||
def _iso_ts_now() -> str:
|
||||
"""RFC 3339 UTC timestamp with microsecond precision and ``Z`` suffix.
|
||||
|
||||
Used only on the FDR record envelope ``ts`` field — distinct from the
|
||||
per-row ``accessed_at`` / ``evicted_at`` datetimes which use the same
|
||||
wall-clock source but carry the operator-facing semantics.
|
||||
"""
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
|
||||
class CacheBudgetEnforcer:
|
||||
"""LRU-driven 10 GiB hard-cap enforcer for the C6 tile cache.
|
||||
|
||||
Construction reads :meth:`TileMetadataStore.total_disk_bytes` once
|
||||
to emit a startup INFO log (AC-12) and warns if the prior flight
|
||||
ended over-budget. Construction does NOT proactively evict — the
|
||||
first :meth:`reserve_headroom` call drives any required sweep.
|
||||
|
||||
The enforcer holds no per-flight state of its own; the store is the
|
||||
source of truth. This keeps the enforcer trivially restartable and
|
||||
means a mid-flight process restart resumes with whatever disk
|
||||
state the prior process left behind (subject to AZ-305's startup
|
||||
orphan-reconciliation scan).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
store: TileMetadataStore,
|
||||
fdr_client: FdrClient,
|
||||
logger: logging.Logger,
|
||||
budget_bytes: int,
|
||||
eviction_batch_size: int = 32,
|
||||
) -> None:
|
||||
if budget_bytes <= 0:
|
||||
raise TileMetadataError(
|
||||
f"CacheBudgetEnforcer: budget_bytes must be > 0; got {budget_bytes}"
|
||||
)
|
||||
if eviction_batch_size <= 0:
|
||||
raise TileMetadataError(
|
||||
f"CacheBudgetEnforcer: eviction_batch_size must be > 0; got {eviction_batch_size}"
|
||||
)
|
||||
self._store = store
|
||||
# The runtime ``PostgresFilesystemStore`` instance satisfies both
|
||||
# the metadata-store and tile-store Protocols; the type hint stays
|
||||
# ``TileMetadataStore`` for clarity but we duck-type ``delete_tile``
|
||||
# off the same reference (AZ-308 spec § Outcome).
|
||||
self._tile_store: TileStore = store # type: ignore[assignment]
|
||||
self._fdr_client = fdr_client
|
||||
self._logger = logger
|
||||
self._budget_bytes = budget_bytes
|
||||
self._eviction_batch_size = eviction_batch_size
|
||||
|
||||
current = self._store.total_disk_bytes()
|
||||
headroom = max(self._budget_bytes - current, 0)
|
||||
self._logger.info(
|
||||
"c6.budget.loaded",
|
||||
extra={
|
||||
"kind": "c6.budget.loaded",
|
||||
"kv": {
|
||||
"budget_bytes": self._budget_bytes,
|
||||
"current_disk_bytes": current,
|
||||
"headroom_bytes": headroom,
|
||||
"eviction_batch_size": self._eviction_batch_size,
|
||||
},
|
||||
},
|
||||
)
|
||||
if current > self._budget_bytes:
|
||||
# AC-12: prior flight left the cache over-budget; surface
|
||||
# the overage so operators can choose whether to inspect
|
||||
# before the first F4 burst triggers cascade eviction.
|
||||
self._logger.warning(
|
||||
"c6.budget.over_budget_at_construction",
|
||||
extra={
|
||||
"kind": "c6.budget.over_budget_at_construction",
|
||||
"kv": {
|
||||
"budget_bytes": self._budget_bytes,
|
||||
"current_disk_bytes": current,
|
||||
"overage_bytes": current - self._budget_bytes,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def budget_bytes(self) -> int:
|
||||
return self._budget_bytes
|
||||
|
||||
@property
|
||||
def eviction_batch_size(self) -> int:
|
||||
return self._eviction_batch_size
|
||||
|
||||
def reserve_headroom(
|
||||
self,
|
||||
needed_bytes: int,
|
||||
*,
|
||||
trigger_tile_id: TileId | None = None,
|
||||
) -> EvictionResult:
|
||||
"""Ensure at least ``needed_bytes`` of head-room.
|
||||
|
||||
Reads :meth:`total_disk_bytes` once, computes the available
|
||||
head-room, and either:
|
||||
|
||||
- returns an empty :class:`EvictionResult` if there is already
|
||||
room (AC-1, the no-evict fast path), or
|
||||
- sweeps LRU candidates in batches of ``eviction_batch_size``
|
||||
until ``freed_bytes >= shortfall`` (AC-2, AC-3, AC-4), or
|
||||
- raises :class:`CacheBudgetExhaustedError` AFTER the sweep
|
||||
exhausts all candidates without reaching the target (AC-5).
|
||||
"""
|
||||
if needed_bytes < 0:
|
||||
raise TileMetadataError(
|
||||
f"reserve_headroom: needed_bytes must be >= 0; got {needed_bytes}"
|
||||
)
|
||||
current = self._store.total_disk_bytes()
|
||||
available = self._budget_bytes - current
|
||||
if available >= needed_bytes:
|
||||
return EvictionResult(evicted=[], freed_bytes=0)
|
||||
|
||||
shortfall = needed_bytes - available
|
||||
evicted_metadata: list[TileMetadata] = []
|
||||
freed_bytes = 0
|
||||
while freed_bytes < shortfall:
|
||||
candidates = self._store.lru_candidates(max_count=self._eviction_batch_size)
|
||||
if not candidates:
|
||||
break
|
||||
for persistent in candidates:
|
||||
if freed_bytes >= shortfall:
|
||||
break
|
||||
md = persistent.metadata
|
||||
evicted_at = datetime.now(timezone.utc)
|
||||
try:
|
||||
removed = self._tile_store.delete_tile(md.tile_id)
|
||||
except TileFsError as exc:
|
||||
# Row delete succeeded (AZ-305 contract) but the
|
||||
# filesystem unlink failed; the budget already
|
||||
# reflects the row's absence so we still count
|
||||
# disk_bytes per the spec § Exclusions.
|
||||
removed = True
|
||||
self._logger.warning(
|
||||
"c6.evict.fs_error",
|
||||
extra={
|
||||
"kind": "c6.evict.fs_error",
|
||||
"kv": {
|
||||
"tile_id_str": str(md.tile_id),
|
||||
"disk_bytes": persistent.disk_bytes,
|
||||
"error": str(exc),
|
||||
},
|
||||
},
|
||||
)
|
||||
if not removed:
|
||||
# NFR-reliability-delete-already-gone: a concurrent
|
||||
# path already evicted this row. Count the bytes
|
||||
# anyway (per spec § Exclusions) so the loop makes
|
||||
# progress; the next ``lru_candidates`` call won't
|
||||
# return this id.
|
||||
self._logger.info(
|
||||
"c6.evict.already_gone",
|
||||
extra={
|
||||
"kind": "c6.evict.already_gone",
|
||||
"kv": {
|
||||
"tile_id_str": str(md.tile_id),
|
||||
"disk_bytes": persistent.disk_bytes,
|
||||
},
|
||||
},
|
||||
)
|
||||
evicted_metadata.append(md)
|
||||
freed_bytes += persistent.disk_bytes
|
||||
self._logger.info(
|
||||
"c6.evicted",
|
||||
extra={
|
||||
"kind": "c6.evicted",
|
||||
"kv": {
|
||||
"tile_id_str": str(md.tile_id),
|
||||
"disk_bytes": persistent.disk_bytes,
|
||||
"accessed_at": persistent.accessed_at.isoformat(),
|
||||
"evicted_at": evicted_at.isoformat(),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
if evicted_metadata:
|
||||
self._emit_eviction_batch(
|
||||
trigger_tile_id=trigger_tile_id,
|
||||
evicted=evicted_metadata,
|
||||
freed_bytes=freed_bytes,
|
||||
)
|
||||
|
||||
if freed_bytes < shortfall:
|
||||
available_post = available + freed_bytes
|
||||
raise CacheBudgetExhaustedError(
|
||||
f"CacheBudgetEnforcer: cannot reserve {needed_bytes} bytes — "
|
||||
f"available_bytes={available_post} after evicting "
|
||||
f"{len(evicted_metadata)} tiles (freed {freed_bytes} bytes); "
|
||||
f"budget_bytes={self._budget_bytes}",
|
||||
needed_bytes=needed_bytes,
|
||||
available_bytes=available_post,
|
||||
evicted_count=len(evicted_metadata),
|
||||
)
|
||||
|
||||
return EvictionResult(evicted=evicted_metadata, freed_bytes=freed_bytes)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _emit_eviction_batch(
|
||||
self,
|
||||
*,
|
||||
trigger_tile_id: TileId | None,
|
||||
evicted: list[TileMetadata],
|
||||
freed_bytes: int,
|
||||
) -> None:
|
||||
capped_ids = [str(md.tile_id) for md in evicted[:_FDR_TILE_IDS_CAP]]
|
||||
self._fdr_client.enqueue(
|
||||
FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_now(),
|
||||
producer_id=_PRODUCER_ID,
|
||||
kind="c6.eviction_batch",
|
||||
payload={
|
||||
"trigger_tile_id": str(trigger_tile_id) if trigger_tile_id else "",
|
||||
"freed_bytes": freed_bytes,
|
||||
"evicted_count": len(evicted),
|
||||
"evicted_tile_ids": capped_ids,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class BudgetEnforcedTileStore:
|
||||
"""Decorator wrapping a :class:`TileStore` with pre-write budget check.
|
||||
|
||||
Implements the :class:`TileStore` Protocol — :meth:`write_tile` adds
|
||||
the :meth:`CacheBudgetEnforcer.reserve_headroom` step before
|
||||
delegating; the other three methods pass through unchanged so the
|
||||
decorator is transparent to read-side consumers.
|
||||
|
||||
Crucially, the decorator does NOT swallow or rewrap exceptions from
|
||||
the wrapped store (AC-7). Every :class:`TileCacheError` subclass
|
||||
raised by the underlying ``write_tile`` (content-hash mismatch,
|
||||
freshness rejection, fs/metadata errors) propagates unchanged so
|
||||
the F4 producer's existing error-handling stays correct.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
wrapped: TileStore,
|
||||
enforcer: CacheBudgetEnforcer,
|
||||
) -> None:
|
||||
self._wrapped = wrapped
|
||||
self._enforcer = enforcer
|
||||
|
||||
def read_tile_pixels(self, tile_id: TileId) -> TilePixelHandle:
|
||||
return self._wrapped.read_tile_pixels(tile_id)
|
||||
|
||||
def write_tile(self, tile_blob: bytes, metadata: TileMetadata) -> None:
|
||||
# Pre-write budget check (AC-6). On eviction, the freed disk
|
||||
# space is committed BEFORE the wrapped store opens its write
|
||||
# transaction — there is no window where the budget is
|
||||
# transiently breached.
|
||||
self._enforcer.reserve_headroom(len(tile_blob), trigger_tile_id=metadata.tile_id)
|
||||
self._wrapped.write_tile(tile_blob, metadata)
|
||||
|
||||
def tile_exists(self, tile_id: TileId) -> bool:
|
||||
return self._wrapped.tile_exists(tile_id)
|
||||
|
||||
def delete_tile(self, tile_id: TileId) -> bool:
|
||||
return self._wrapped.delete_tile(tile_id)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Operator CLI — `python -m c6_tile_cache.cache_budget_enforcer dry-run ...`
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="c6_tile_cache.cache_budget_enforcer",
|
||||
description=(
|
||||
"Operator-side dry-run of the cache-budget enforcer. Reports "
|
||||
"what the LRU sweep WOULD evict to make room for a hypothetical "
|
||||
"write of N bytes, without actually deleting anything."
|
||||
),
|
||||
)
|
||||
sub = parser.add_subparsers(dest="cmd", required=True)
|
||||
dry = sub.add_parser(
|
||||
"dry-run",
|
||||
help="Show what would be evicted for a given needed-bytes target.",
|
||||
)
|
||||
dry.add_argument(
|
||||
"--pretend-needed-bytes",
|
||||
type=int,
|
||||
required=True,
|
||||
help="Hypothetical incoming write size (bytes).",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def _dry_run(args: argparse.Namespace) -> int:
|
||||
from gps_denied_onboard.components.c6_tile_cache.postgres_filesystem_store import (
|
||||
PostgresFilesystemStore,
|
||||
)
|
||||
from gps_denied_onboard.config import load_config
|
||||
|
||||
config = load_config(os.environ)
|
||||
block = config.components["c6_tile_cache"]
|
||||
store = PostgresFilesystemStore.from_config(config)
|
||||
|
||||
current = store.total_disk_bytes()
|
||||
budget = block.lru_eviction_threshold_bytes
|
||||
available = budget - current
|
||||
needed = args.pretend_needed_bytes
|
||||
print(f"budget_bytes: {budget}")
|
||||
print(f"current_disk_bytes: {current}")
|
||||
print(f"available_bytes: {available}")
|
||||
print(f"needed_bytes: {needed}")
|
||||
if available >= needed:
|
||||
print("decision: NO_EVICTION (already enough head-room)")
|
||||
return 0
|
||||
shortfall = needed - available
|
||||
print(f"shortfall_bytes: {shortfall}")
|
||||
# Walk the LRU snapshot in the same batch shape the enforcer uses
|
||||
# so the dry-run reports exactly what the real sweep would evict.
|
||||
print(f"eviction_batch_size: {block.eviction_batch_size}")
|
||||
print(f"would_evict (up to {block.eviction_batch_size}):")
|
||||
freed = 0
|
||||
count = 0
|
||||
for persistent in store.lru_candidates(max_count=block.eviction_batch_size):
|
||||
if freed >= shortfall:
|
||||
break
|
||||
print(
|
||||
f" - tile_id={persistent.metadata.tile_id} "
|
||||
f"accessed_at={persistent.accessed_at.isoformat()} "
|
||||
f"disk_bytes={persistent.disk_bytes}"
|
||||
)
|
||||
freed += persistent.disk_bytes
|
||||
count += 1
|
||||
print(f"would_free_bytes: {freed}")
|
||||
print(f"would_evict_count: {count}")
|
||||
if freed < shortfall:
|
||||
print("decision: WOULD_RAISE_CacheBudgetExhaustedError (not enough candidates)")
|
||||
else:
|
||||
print("decision: EVICT (sweep stops as soon as shortfall is covered)")
|
||||
return 0
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = _build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
if args.cmd == "dry-run":
|
||||
try:
|
||||
return _dry_run(args)
|
||||
except TileCacheError as exc:
|
||||
print(f"error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
parser.error(f"unknown subcommand {args.cmd!r}")
|
||||
return 2 # unreachable; argparse exits non-zero on error
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -59,6 +59,7 @@ class C6TileCacheConfig:
|
||||
postgres_dsn: str = ""
|
||||
postgres_pool_size: int = 4
|
||||
lru_eviction_threshold_bytes: int = 10 * 1024**3
|
||||
eviction_batch_size: int = 32
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.store_runtime not in KNOWN_TILE_STORE_RUNTIMES:
|
||||
@@ -88,3 +89,7 @@ class C6TileCacheConfig:
|
||||
f"C6TileCacheConfig.lru_eviction_threshold_bytes must be > 0; "
|
||||
f"got {self.lru_eviction_threshold_bytes}"
|
||||
)
|
||||
if self.eviction_batch_size <= 0:
|
||||
raise ConfigError(
|
||||
f"C6TileCacheConfig.eviction_batch_size must be > 0; got {self.eviction_batch_size}"
|
||||
)
|
||||
|
||||
@@ -22,6 +22,7 @@ if TYPE_CHECKING:
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CacheBudgetExhaustedError",
|
||||
"ContentHashMismatchError",
|
||||
"FreshnessRejectionError",
|
||||
"IndexBuildError",
|
||||
@@ -107,6 +108,35 @@ class FreshnessRejectionError(TileCacheError):
|
||||
self.rule = rule
|
||||
|
||||
|
||||
class CacheBudgetExhaustedError(TileCacheError):
|
||||
"""The cache-budget enforcer cannot make room for a new write.
|
||||
|
||||
Raised by :class:`CacheBudgetEnforcer.reserve_headroom` AFTER the
|
||||
LRU sweep has run to completion and still could not free
|
||||
``needed_bytes``. Partial eviction is preferable to no eviction
|
||||
even when the budget cannot be met, so the loop runs first and the
|
||||
error fires last (AZ-308 AC-5).
|
||||
|
||||
Diagnostic fields are populated for the F4 producer / operator
|
||||
recovery path: ``needed_bytes`` is what the failed write asked
|
||||
for; ``available_bytes`` is what was left after the sweep;
|
||||
``evicted_count`` is how many tiles the sweep removed.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
needed_bytes: int | None = None,
|
||||
available_bytes: int | None = None,
|
||||
evicted_count: int | None = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.needed_bytes = needed_bytes
|
||||
self.available_bytes = available_bytes
|
||||
self.evicted_count = evicted_count
|
||||
|
||||
|
||||
class IndexUnavailableError(TileCacheError):
|
||||
"""The descriptor index could not satisfy a read.
|
||||
|
||||
|
||||
@@ -83,6 +83,7 @@ from gps_denied_onboard.helpers.sha256_sidecar import (
|
||||
from gps_denied_onboard.helpers.wgs_converter import WgsConverter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.clock.interface import Clock
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = ["MmapTilePixelHandle", "PostgresFilesystemStore"]
|
||||
@@ -182,6 +183,7 @@ class PostgresFilesystemStore:
|
||||
fdr_client: FdrClient,
|
||||
logger: logging.Logger,
|
||||
freshness_gate: FreshnessGate | None = None,
|
||||
lru_clock: Clock | None = None,
|
||||
) -> None:
|
||||
self._root_dir = Path(root_dir)
|
||||
self._tiles_dir = self._root_dir / "tiles"
|
||||
@@ -194,6 +196,11 @@ class PostgresFilesystemStore:
|
||||
# ``None`` keeps the AZ-305-only test path working (no gate wiring
|
||||
# required for unit tests of the store in isolation).
|
||||
self._freshness_gate = freshness_gate
|
||||
# AZ-308 AC-8: when injected, every ``read_tile_pixels`` call
|
||||
# records an LRU access stamp so :meth:`lru_candidates` picks
|
||||
# accurate eviction targets. ``None`` keeps AZ-305 unit-test
|
||||
# wiring (no LRU UPDATE on read) intact.
|
||||
self._lru_clock = lru_clock
|
||||
try:
|
||||
self._tiles_dir.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc:
|
||||
@@ -268,6 +275,7 @@ class PostgresFilesystemStore:
|
||||
fdr_client=fdr_client,
|
||||
logger=logger,
|
||||
freshness_gate=freshness_gate,
|
||||
lru_clock=WallClock(),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -322,6 +330,15 @@ class PostgresFilesystemStore:
|
||||
},
|
||||
)
|
||||
raise TileMetadataError(msg)
|
||||
# AZ-308 AC-8: refresh the LRU clock on every read so eviction
|
||||
# picks the actually-coldest tiles. The UPDATE runs only when the
|
||||
# Clock was injected at construction (production wiring); AZ-305
|
||||
# unit tests pass ``lru_clock=None`` and pay zero cost.
|
||||
if self._lru_clock is not None:
|
||||
now_dt = datetime.fromtimestamp(
|
||||
self._lru_clock.time_ns() / 1_000_000_000, tz=timezone.utc
|
||||
)
|
||||
self.record_lru_access(tile_id, now_dt)
|
||||
return MmapTilePixelHandle(path)
|
||||
|
||||
def write_tile(self, tile_blob: bytes, metadata: TileMetadata) -> None:
|
||||
|
||||
@@ -145,6 +145,19 @@ KNOWN_PAYLOAD_KEYS: Final[dict[str, frozenset[str]]] = {
|
||||
"c6.freshness.downgraded": frozenset(
|
||||
{"tile_id", "age_seconds", "classification", "rule_action", "rule_max_age_seconds"}
|
||||
),
|
||||
# AZ-308 / E-C6: emitted by the CacheBudgetEnforcer at the end of every
|
||||
# LRU sweep that evicted at least one tile (RESTRICT-SAT-2). One record
|
||||
# per ``reserve_headroom`` call that hit the eviction path, never per
|
||||
# evicted tile (per-tile detail goes to logs). ``trigger_tile_id`` is
|
||||
# the canonical UUIDv5 of the tile whose write triggered the sweep;
|
||||
# ``freed_bytes`` is the integer total reclaimed; ``evicted_count`` is
|
||||
# the full count regardless of how many ids fit in
|
||||
# ``evicted_tile_ids`` (capped to 5 to keep the record bounded —
|
||||
# AC-11). The full eviction list is replayable from the per-tile
|
||||
# ``c6.evicted`` INFO log records.
|
||||
"c6.eviction_batch": frozenset(
|
||||
{"trigger_tile_id", "freed_bytes", "evicted_count", "evicted_tile_ids"}
|
||||
),
|
||||
}
|
||||
|
||||
KNOWN_KINDS: Final[frozenset[str]] = frozenset(KNOWN_PAYLOAD_KEYS.keys())
|
||||
|
||||
@@ -71,14 +71,26 @@ def build_tile_store(config: Config) -> TileStore:
|
||||
invoked via ``PostgresFilesystemStore.from_config(config)`` which
|
||||
wires the ``ConnectionPool`` / ``FdrClient`` / logger / static
|
||||
helper dependencies from the config block.
|
||||
|
||||
AZ-308: the returned :class:`TileStore` is wrapped in a
|
||||
:class:`BudgetEnforcedTileStore` so every ``write_tile`` first
|
||||
reserves head-room against the configured
|
||||
``lru_eviction_threshold_bytes`` budget (RESTRICT-SAT-2). The
|
||||
wrapper is transparent for read-side consumers.
|
||||
"""
|
||||
block = _c6_config(config)
|
||||
runtime = block.store_runtime
|
||||
if runtime == "postgres_filesystem":
|
||||
try:
|
||||
from gps_denied_onboard.components.c6_tile_cache.cache_budget_enforcer import (
|
||||
BudgetEnforcedTileStore,
|
||||
CacheBudgetEnforcer,
|
||||
)
|
||||
from gps_denied_onboard.components.c6_tile_cache.postgres_filesystem_store import (
|
||||
PostgresFilesystemStore,
|
||||
)
|
||||
from gps_denied_onboard.fdr_client.client import make_fdr_client
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
except ModuleNotFoundError as exc:
|
||||
raise RuntimeNotAvailableError(
|
||||
f"TileStore runtime {runtime!r} is configured but its "
|
||||
@@ -86,7 +98,15 @@ def build_tile_store(config: Config) -> TileStore:
|
||||
"'c6_tile_cache.postgres_filesystem_store' has not been "
|
||||
"built into this binary yet (AZ-305 pending)."
|
||||
) from exc
|
||||
return PostgresFilesystemStore.from_config(config)
|
||||
store = PostgresFilesystemStore.from_config(config)
|
||||
enforcer = CacheBudgetEnforcer(
|
||||
store=store,
|
||||
fdr_client=make_fdr_client("c6_tile_cache.budget", config),
|
||||
logger=get_logger("c6_tile_cache.budget"),
|
||||
budget_bytes=block.lru_eviction_threshold_bytes,
|
||||
eviction_batch_size=block.eviction_batch_size,
|
||||
)
|
||||
return BudgetEnforcedTileStore(wrapped=store, enforcer=enforcer)
|
||||
raise RuntimeNotAvailableError(
|
||||
f"TileStore runtime {runtime!r} is not buildable in this binary."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user