feat: stage6 — Image Pipeline (F05) and Rotation Manager (F06)

This commit is contained in:
Yuzviak
2026-03-22 22:51:00 +02:00
parent a2fb9ab404
commit 9ef046d623
9 changed files with 653 additions and 26 deletions
+204
View File
@@ -0,0 +1,204 @@
"""Image Input Pipeline (Component F05)."""
import asyncio
import os
import re
from datetime import datetime, timezone
import cv2
import numpy as np
from gps_denied.schemas.image import (
ImageBatch, ImageData, ImageMetadata, ProcessedBatch, ProcessingStatus, ValidationResult
)
class QueueFullError(Exception):
pass
class ValidationError(Exception):
pass
class ImageInputPipeline:
"""Manages ingestion, disk storage, and queuing of UAV image batches."""
def __init__(self, storage_dir: str = "image_storage", max_queue_size: int = 50):
self.storage_dir = storage_dir
# flight_id -> asyncio.Queue of ImageBatch
self._queues: dict[str, asyncio.Queue] = {}
self.max_queue_size = max_queue_size
# In-memory tracking (in a real system, sync this with DB)
self._status: dict[str, dict] = {}
def _get_queue(self, flight_id: str) -> asyncio.Queue:
if flight_id not in self._queues:
self._queues[flight_id] = asyncio.Queue(maxsize=self.max_queue_size)
return self._queues[flight_id]
def _init_status(self, flight_id: str):
if flight_id not in self._status:
self._status[flight_id] = {
"total_images": 0,
"processed_images": 0,
"current_sequence": 1,
}
def validate_batch(self, batch: ImageBatch) -> ValidationResult:
"""Validates batch integrity and sequence continuity."""
errors = []
num_images = len(batch.images)
if num_images < 10:
errors.append("Batch is empty")
elif num_images > 100:
errors.append("Batch too large")
if len(batch.filenames) != num_images:
errors.append("Mismatch between filenames and images count")
# Naming convention ADxxxxxx.jpg or similar
pattern = re.compile(r"^[A-Za-z0-9_-]+\.(jpg|jpeg|png)$", re.IGNORECASE)
for fn in batch.filenames:
if not pattern.match(fn):
errors.append(f"Invalid filename: {fn}")
break
if batch.start_sequence > batch.end_sequence:
errors.append("Start sequence greater than end sequence")
return ValidationResult(valid=len(errors) == 0, errors=errors)
def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool:
"""Queues a batch of images for processing."""
val = self.validate_batch(batch)
if not val.valid:
raise ValidationError(f"Batch validation failed: {val.errors}")
q = self._get_queue(flight_id)
if q.full():
raise QueueFullError(f"Queue for flight {flight_id} is full")
q.put_nowait(batch)
self._init_status(flight_id)
self._status[flight_id]["total_images"] += len(batch.images)
return True
async def process_next_batch(self, flight_id: str) -> ProcessedBatch | None:
"""Dequeues and processing the next batch."""
q = self._get_queue(flight_id)
if q.empty():
return None
batch: ImageBatch = await q.get()
processed_images = []
for i, raw_bytes in enumerate(batch.images):
# Decode
nparr = np.frombuffer(raw_bytes, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
continue # skip corrupted
seq = batch.start_sequence + i
fn = batch.filenames[i]
h, w = img.shape[:2]
meta = ImageMetadata(
sequence=seq,
filename=fn,
dimensions=(w, h),
file_size=len(raw_bytes),
timestamp=datetime.now(timezone.utc),
)
img_data = ImageData(
flight_id=flight_id,
sequence=seq,
filename=fn,
image=img,
metadata=meta
)
processed_images.append(img_data)
# Store to disk
self.store_images(flight_id, processed_images)
self._status[flight_id]["processed_images"] += len(processed_images)
q.task_done()
return ProcessedBatch(
images=processed_images,
batch_id=f"batch_{batch.batch_number}",
start_sequence=batch.start_sequence,
end_sequence=batch.end_sequence
)
def store_images(self, flight_id: str, images: list[ImageData]) -> bool:
"""Persists images to disk."""
flight_dir = os.path.join(self.storage_dir, flight_id)
os.makedirs(flight_dir, exist_ok=True)
for img in images:
path = os.path.join(flight_dir, img.filename)
cv2.imwrite(path, img.image)
return True
def get_next_image(self, flight_id: str) -> ImageData | None:
"""Gets the next image in sequence for processing."""
self._init_status(flight_id)
seq = self._status[flight_id]["current_sequence"]
img = self.get_image_by_sequence(flight_id, seq)
if img:
self._status[flight_id]["current_sequence"] += 1
return img
def get_image_by_sequence(self, flight_id: str, sequence: int) -> ImageData | None:
"""Retrieves a specific image by sequence number."""
# For simplicity, we assume filenames follow "frame_{sequence:06d}.jpg"
# But if the user uploaded custom files, we'd need a DB lookup.
# Let's use a local map for this prototype if it's strictly required,
# or search the directory.
flight_dir = os.path.join(self.storage_dir, flight_id)
if not os.path.exists(flight_dir):
return None
# search
for fn in os.listdir(flight_dir):
# very rough matching
if str(sequence) in fn or fn.endswith(f"_{sequence:06d}.jpg"):
path = os.path.join(flight_dir, fn)
img = cv2.imread(path)
if img is not None:
h, w = img.shape[:2]
meta = ImageMetadata(
sequence=sequence,
filename=fn,
dimensions=(w, h),
file_size=os.path.getsize(path),
timestamp=datetime.now(timezone.utc)
)
return ImageData(flight_id, sequence, fn, img, meta)
return None
def get_processing_status(self, flight_id: str) -> ProcessingStatus:
self._init_status(flight_id)
s = self._status[flight_id]
q = self._get_queue(flight_id)
return ProcessingStatus(
flight_id=flight_id,
total_images=s["total_images"],
processed_images=s["processed_images"],
current_sequence=s["current_sequence"],
queued_batches=q.qsize(),
processing_rate=0.0 # mock
)