[AZ-505] Tile inventory endpoint + HTTP/2 + Leaflet covering index

Production code:
- POST /api/satellite/tiles/inventory (XOR body, 5000-cap,
  most-recent-per-location_hash select, present/absent shaping).
- Kestrel HttpProtocols.Http1AndHttp2 on every listener (AC-5).
- Migration 015 creates tiles_leaflet_path covering index over
  (location_hash, captured_at DESC, updated_at DESC, id DESC)
  INCLUDE (file_path, source); drops superseded idx_tiles_location_hash.
- TileRepository.GetByTileCoordinatesAsync rewired to filter by
  location_hash (Index Only Scan via tiles_leaflet_path).
- TileRepository.GetTilesByLocationHashesAsync added with Npgsql-
  direct ANY($1::uuid[]) binding (Dapper IEnumerable expansion is
  incompatible with the array form).
- Uuidv5.LocationHashForTile centralises the UUIDv5(TileNamespace,
  "{z}/{x}/{y}") formula — single source of truth for the cross-repo
  invariant (gps-denied-onboard parity).

Contracts:
- New: contracts/api/tile-inventory.md v1.0.0.
- Bumped: contracts/data-access/tile-storage.md to v2.0.0 (joint
  ownership by AZ-503-foundation + AZ-505: schema + covering index +
  GetByTileCoordinatesAsync rewrite).

Tests:
- TileInventoryTests covers AC-1, AC-2 (DB-level), AC-4, AC-6.
- Http2MultiplexingTests covers AC-5 (20 concurrent multiplexed GETs
  over h2c via SocketsHttpHandler + AppContext Http2Unencrypted switch).
- LeafletPathIndexOnlyTests covers AC-3 (EXPLAIN (ANALYZE, BUFFERS)
  asserts Index Only Scan over tiles_leaflet_path with heap_blocks=0).

Docs:
- architecture.md, system-flows.md, data_model.md, module-layout.md,
  glossary.md, modules/api_program.md, modules/dataaccess_tile_repository.md,
  components/02_data_access/description.md all updated to reference the
  v2.0.0 tile-storage contract + new tile-inventory contract + AC-7.

Reports:
- batch_01_cycle6_report.md, batch_01_cycle6_review.md,
  implementation_completeness_cycle6_report.md (PASS),
  implementation_report_tile_inventory_cycle6.md.

Task spec moved todo/ -> done/.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-12 21:16:37 +03:00
parent 3c7cd4e56b
commit 909f69cb3a
26 changed files with 1780 additions and 65 deletions
@@ -0,0 +1,41 @@
-- AZ-505: Leaflet covering index on `tiles` keyed by location_hash.
--
-- Forward migration:
-- 1. Create `tiles_leaflet_path` covering index over (location_hash,
-- captured_at DESC, updated_at DESC, id DESC) with INCLUDE (file_path, source).
-- The leading column matches the equality predicate used by the AZ-505
-- Leaflet hot path (`SELECT file_path FROM tiles WHERE location_hash = $1
-- ORDER BY captured_at DESC, updated_at DESC, id DESC LIMIT 1`); the INCLUDE
-- columns make that exact projection an index-only scan once VACUUM ANALYZE
-- has set the visibility map.
-- 2. Drop the lightweight `idx_tiles_location_hash` introduced by migration
-- 014 — it is superseded because equality lookups by `location_hash` use
-- the leading column of the new covering index.
--
-- Back-migration (manual):
-- DROP INDEX IF EXISTS tiles_leaflet_path;
-- CREATE INDEX IF NOT EXISTS idx_tiles_location_hash ON tiles (location_hash);
--
-- INCLUDE columns are intentionally narrow (`file_path, source`). The richer
-- inventory endpoint legitimately requires extra columns that are NOT in the
-- INCLUDE list (`id, captured_at, flight_id, image_type, tile_size_meters,
-- tile_size_pixels, location_hash`); inventory queries therefore trigger a
-- bounded heap fetch, which is acceptable per the AZ-505 NFR-Perf-2 budget
-- (≤ 1000 ms p95 / 2500 tiles). See AZ-505 Risk 1 in the task spec.
--
-- Lock window: this migration runs inside DbUp's per-script transaction, which
-- is incompatible with `CREATE INDEX CONCURRENTLY`. On a populated `tiles`
-- table the `CREATE INDEX` takes an `ACCESS SHARE` + `SHARE` lock on the table
-- for the duration of the build, blocking writes. Schedule deploys to a
-- low-traffic window or pre-build the index out-of-band before running this
-- migration. See AZ-505 Risk 2.
BEGIN;
CREATE INDEX IF NOT EXISTS tiles_leaflet_path
ON tiles (location_hash, captured_at DESC, updated_at DESC, id DESC)
INCLUDE (file_path, source);
DROP INDEX IF EXISTS idx_tiles_location_hash;
COMMIT;
@@ -7,6 +7,11 @@ public interface ITileRepository
Task<TileEntity?> GetByIdAsync(Guid id);
Task<TileEntity?> GetByTileCoordinatesAsync(int tileZoom, int tileX, int tileY);
Task<IEnumerable<TileEntity>> GetTilesByRegionAsync(double latitude, double longitude, double sizeMeters, int zoomLevel);
// AZ-505: bulk-list endpoint backing query. Returns the most-recent row
// across sources/flights for each requested `location_hash`. Result order
// is unspecified; callers (TileService.GetInventoryAsync) re-align entries
// to the request order via dictionary lookup.
Task<IReadOnlyDictionary<Guid, TileEntity>> GetTilesByLocationHashesAsync(IReadOnlyList<Guid> locationHashes);
Task<Guid> InsertAsync(TileEntity tile);
Task<int> UpdateAsync(TileEntity tile);
Task<int> DeleteAsync(Guid id);
@@ -44,16 +44,122 @@ public class TileRepository : ITileRepository
public async Task<TileEntity?> GetByTileCoordinatesAsync(int tileZoom, int tileX, int tileY)
{
using var connection = new NpgsqlConnection(_connectionString);
// AZ-484 selection rule: most-recent across sources, deterministic tie-break on
// (captured_at DESC, updated_at DESC, id DESC).
// AZ-505 read-rewrite: filter by `location_hash` so the new
// `tiles_leaflet_path` covering index drives the scan. Selection rule
// is unchanged from AZ-484: most-recent across sources/flights with
// deterministic tie-break on (captured_at DESC, updated_at DESC, id DESC).
// Heap fetch is unavoidable here (the column list spans columns not in
// the index INCLUDE list); the slim `SELECT file_path` Leaflet hot path
// — which is what AC-3 measures — is index-only-scannable.
var locationHash = Uuidv5.LocationHashForTile(tileZoom, tileX, tileY);
const string sql = $@"
SELECT {ColumnList}
FROM tiles
WHERE tile_zoom = @TileZoom AND tile_x = @TileX AND tile_y = @TileY
WHERE location_hash = @LocationHash
ORDER BY captured_at DESC, updated_at DESC, id DESC
LIMIT 1";
return await connection.QuerySingleOrDefaultAsync<TileEntity>(sql, new { TileZoom = tileZoom, TileX = tileX, TileY = tileY });
return await connection.QuerySingleOrDefaultAsync<TileEntity>(sql, new { LocationHash = locationHash });
}
public async Task<IReadOnlyDictionary<Guid, TileEntity>> GetTilesByLocationHashesAsync(IReadOnlyList<Guid> locationHashes)
{
ArgumentNullException.ThrowIfNull(locationHashes);
if (locationHashes.Count == 0)
{
return new Dictionary<Guid, TileEntity>();
}
await using var connection = new NpgsqlConnection(_connectionString);
await connection.OpenAsync();
// AZ-505: one-row-per-hash bulk lookup. `DISTINCT ON (location_hash)`
// collapses the per-(z, x, y) cell to its most-recent variant across
// sources/flights using the same tie-break as AZ-484. Caller dedupes
// input + re-aligns response order; this query returns at most one
// row per distinct hash.
//
// The query is intentionally NOT routed through Dapper: Dapper's
// parameter expander rewrites any IEnumerable parameter (including
// `Guid[]`) into `(@p0, @p1, ...)`, which would turn `ANY(@p)` into
// `ANY((@p0, @p1, ...))` and break the SQL. Using NpgsqlParameter with
// `Array | Uuid` lets Npgsql bind the array as a single `uuid[]`,
// which is the form the AZ-505 spec query expects.
const string sql = @"
SELECT id, tile_zoom AS TileZoom, tile_x AS TileX, tile_y AS TileY,
latitude, longitude,
tile_size_meters AS TileSizeMeters, tile_size_pixels AS TileSizePixels,
image_type AS ImageType, maps_version AS MapsVersion, version,
file_path AS FilePath, source, captured_at AS CapturedAt,
created_at AS CreatedAt, updated_at AS UpdatedAt,
flight_id AS FlightId, location_hash AS LocationHash,
content_sha256 AS ContentSha256, legacy_id AS LegacyId
FROM (
SELECT DISTINCT ON (location_hash)
id, tile_zoom, tile_x, tile_y,
latitude, longitude,
tile_size_meters, tile_size_pixels,
image_type, maps_version, version,
file_path, source, captured_at,
created_at, updated_at,
flight_id, location_hash,
content_sha256, legacy_id
FROM tiles
WHERE location_hash = ANY(@LocationHashes)
ORDER BY location_hash, captured_at DESC, updated_at DESC, id DESC
) most_recent";
var distinctHashes = locationHashes.Distinct().ToArray();
await using var cmd = new NpgsqlCommand(sql, connection);
var arrayParam = new NpgsqlParameter("LocationHashes", NpgsqlTypes.NpgsqlDbType.Array | NpgsqlTypes.NpgsqlDbType.Uuid)
{
Value = distinctHashes
};
cmd.Parameters.Add(arrayParam);
var stopwatch = Stopwatch.StartNew();
var rows = new Dictionary<Guid, TileEntity>(distinctHashes.Length);
await using (var reader = await cmd.ExecuteReaderAsync())
{
while (await reader.ReadAsync())
{
var tile = new TileEntity
{
Id = reader.GetGuid(0),
TileZoom = reader.GetInt32(1),
TileX = reader.GetInt32(2),
TileY = reader.GetInt32(3),
Latitude = reader.GetDouble(4),
Longitude = reader.GetDouble(5),
TileSizeMeters = reader.GetDouble(6),
TileSizePixels = reader.GetInt32(7),
ImageType = reader.GetString(8),
MapsVersion = reader.IsDBNull(9) ? null : reader.GetString(9),
Version = reader.IsDBNull(10) ? null : reader.GetInt32(10),
FilePath = reader.GetString(11),
Source = reader.GetString(12),
CapturedAt = reader.GetDateTime(13),
CreatedAt = reader.GetDateTime(14),
UpdatedAt = reader.GetDateTime(15),
FlightId = reader.IsDBNull(16) ? null : reader.GetGuid(16),
LocationHash = reader.GetGuid(17),
ContentSha256 = reader.IsDBNull(18) ? null : (byte[])reader.GetValue(18),
LegacyId = reader.IsDBNull(19) ? null : reader.GetGuid(19)
};
rows[tile.LocationHash] = tile;
}
}
stopwatch.Stop();
if (stopwatch.ElapsedMilliseconds > SlowQueryThresholdMs)
{
_logger.LogWarning(
"Slow GetTilesByLocationHashesAsync: {ElapsedMs} ms (threshold {ThresholdMs} ms) for {RequestedHashes} requested ({DistinctHashes} distinct) hashes",
stopwatch.ElapsedMilliseconds, SlowQueryThresholdMs, locationHashes.Count, distinctHashes.Length);
}
return rows;
}
public async Task<IEnumerable<TileEntity>> GetTilesByRegionAsync(double latitude, double longitude, double sizeMeters, int zoomLevel)