Initial commit

This commit is contained in:
Denys Zaitsev
2026-04-03 23:25:54 +03:00
parent 531a1301d5
commit d7e1066c60
3843 changed files with 1554468 additions and 0 deletions
+401
View File
@@ -0,0 +1,401 @@
import os
import cv2
import numpy as np
import json
import logging
import time
import queue
from datetime import datetime
from typing import List, Optional, Tuple, Dict, Any
from pydantic import BaseModel
from abc import ABC, abstractmethod
from h08_batch_validator import BatchValidator, ValidationResult
logger = logging.getLogger(__name__)
# --- Data Models ---
class ImageBatch(BaseModel):
images: List[bytes]
filenames: List[str]
start_sequence: int
end_sequence: int
batch_number: int
class ImageMetadata(BaseModel):
sequence: int
filename: str
dimensions: Tuple[int, int]
file_size: int
timestamp: datetime
exif_data: Optional[Dict[str, Any]] = None
class ImageData(BaseModel):
flight_id: str
sequence: int
filename: str
image: np.ndarray
metadata: ImageMetadata
model_config = {"arbitrary_types_allowed": True}
class ProcessedBatch(BaseModel):
images: List[ImageData]
batch_id: str
start_sequence: int
end_sequence: int
class ProcessingStatus(BaseModel):
flight_id: str
total_images: int
processed_images: int
current_sequence: int
queued_batches: int
processing_rate: float
# --- Interface ---
class IImageInputPipeline(ABC):
@abstractmethod
def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool: pass
@abstractmethod
def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]: pass
@abstractmethod
def validate_batch(self, batch: ImageBatch) -> ValidationResult: pass
@abstractmethod
def store_images(self, flight_id: str, images: List[ImageData]) -> bool: pass
@abstractmethod
def get_next_image(self, flight_id: str) -> Optional[ImageData]: pass
@abstractmethod
def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]: pass
@abstractmethod
def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]: pass
@abstractmethod
def get_processing_status(self, flight_id: str) -> ProcessingStatus: pass
# --- Implementation ---
class ImageInputPipeline(IImageInputPipeline):
"""
F05: Image Input Pipeline
Handles unified image ingestion, validation, storage, and retrieval.
Includes a simulation mode to stream sequential images from a local directory directly into the engine.
"""
def __init__(self, storage_dir: str = "./image_storage", max_queue_size: int = 10):
self.storage_dir = storage_dir
self.max_queue_size = max_queue_size
os.makedirs(self.storage_dir, exist_ok=True)
# State tracking per flight
self.flight_queues: Dict[str, queue.Queue] = {}
self.flight_sequences: Dict[str, int] = {}
self.flight_status: Dict[str, ProcessingStatus] = {}
self.expected_ingest_seq: Dict[str, int] = {}
self.flight_start_times: Dict[str, float] = {}
self.validator = BatchValidator()
def validate_batch(self, batch: ImageBatch) -> ValidationResult:
"""Validates batch integrity and sequence continuity."""
if len(batch.images) != len(batch.filenames): return ValidationResult(valid=False, errors=["Mismatch between images and filenames count."])
res = self.validator.validate_batch_size(batch)
if not res.valid: return res
res = self.validator.validate_naming_convention(batch.filenames)
if not res.valid: return res
res = self.validator.check_sequence_continuity(batch, batch.start_sequence)
if not res.valid: return res
for img in batch.images:
res = self.validator.validate_format(img)
if not res.valid: return res
return ValidationResult(valid=True, errors=[])
def _get_queue_capacity(self, flight_id: str) -> int:
if flight_id not in self.flight_queues:
return self.max_queue_size
return self.max_queue_size - self.flight_queues[flight_id].qsize()
def _check_sequence_continuity(self, flight_id: str, batch: ImageBatch) -> bool:
if flight_id not in self.expected_ingest_seq:
return True
return batch.start_sequence == self.expected_ingest_seq[flight_id]
def _add_to_queue(self, flight_id: str, batch: ImageBatch) -> bool:
if self._get_queue_capacity(flight_id) <= 0:
logger.error(f"Queue full for flight {flight_id}")
return False
self.flight_queues[flight_id].put(batch)
self.expected_ingest_seq[flight_id] = batch.end_sequence + 1
self.flight_status[flight_id].queued_batches += 1
return True
def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool:
"""Queues a batch of images for processing (FIFO)."""
validation = self.validate_batch(batch)
if not validation.valid:
logger.error(f"Batch validation failed: {validation.errors}")
return False
if not self._check_sequence_continuity(flight_id, batch):
logger.error(f"Sequence gap detected for flight {flight_id}")
return False
if flight_id not in self.flight_queues:
self.flight_queues[flight_id] = queue.Queue(maxsize=self.max_queue_size)
self.flight_status[flight_id] = ProcessingStatus(
flight_id=flight_id, total_images=0, processed_images=0,
current_sequence=1, queued_batches=0, processing_rate=0.0
)
return self._add_to_queue(flight_id, batch)
def _dequeue_batch(self, flight_id: str) -> Optional[ImageBatch]:
if flight_id not in self.flight_queues or self.flight_queues[flight_id].empty():
return None
batch: ImageBatch = self.flight_queues[flight_id].get()
self.flight_status[flight_id].queued_batches -= 1
return batch
def _extract_metadata(self, img_bytes: bytes, filename: str, seq: int, img: np.ndarray) -> ImageMetadata:
h, w = img.shape[:2]
return ImageMetadata(
sequence=seq,
filename=filename,
dimensions=(w, h),
file_size=len(img_bytes),
timestamp=datetime.utcnow()
)
def _decode_images(self, flight_id: str, batch: ImageBatch) -> List[ImageData]:
processed_data = []
for idx, img_bytes in enumerate(batch.images):
filename = batch.filenames[idx]
seq = batch.start_sequence + idx
np_arr = np.frombuffer(img_bytes, np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
if img is None:
logger.warning(f"Failed to decode image {filename}")
continue
# Rule 5: Image dimensions 640x480 to 6252x4168
h, w = img.shape[:2]
if not (640 <= w <= 6252 and 480 <= h <= 4168):
logger.warning(f"Image {filename} dimensions ({w}x{h}) out of bounds.")
continue
metadata = self._extract_metadata(img_bytes, filename, seq, img)
img_data = ImageData(
flight_id=flight_id, sequence=seq, filename=filename,
image=img, metadata=metadata
)
processed_data.append(img_data)
return processed_data
def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]:
"""Dequeues and processes the next batch from FIFO queue."""
batch = self._dequeue_batch(flight_id)
if not batch:
return None
if flight_id not in self.flight_start_times:
self.flight_start_times[flight_id] = time.time()
processed_data = self._decode_images(flight_id, batch)
if processed_data:
self.store_images(flight_id, processed_data)
self.flight_status[flight_id].processed_images += len(processed_data)
self.flight_status[flight_id].total_images += len(processed_data)
return ProcessedBatch(
images=processed_data,
batch_id=f"batch_{batch.batch_number}",
start_sequence=batch.start_sequence,
end_sequence=batch.end_sequence
)
def _create_flight_directory(self, flight_id: str) -> str:
flight_dir = os.path.join(self.storage_dir, flight_id)
os.makedirs(flight_dir, exist_ok=True)
return flight_dir
def _write_image(self, flight_id: str, filename: str, image: np.ndarray) -> bool:
flight_dir = self._create_flight_directory(flight_id)
img_path = os.path.join(flight_dir, filename)
try:
return cv2.imwrite(img_path, image)
except Exception as e:
logger.error(f"Failed to write image {img_path}: {e}")
return False
def _update_metadata_index(self, flight_id: str, metadata_list: List[ImageMetadata]) -> bool:
flight_dir = self._create_flight_directory(flight_id)
index_path = os.path.join(flight_dir, "metadata.json")
index_data = {}
if os.path.exists(index_path):
try:
with open(index_path, 'r') as f:
index_data = json.load(f)
except json.JSONDecodeError:
pass
for meta in metadata_list:
index_data[str(meta.sequence)] = json.loads(meta.model_dump_json())
try:
with open(index_path, 'w') as f:
json.dump(index_data, f)
return True
except Exception as e:
logger.error(f"Failed to update metadata index {index_path}: {e}")
return False
def store_images(self, flight_id: str, images: List[ImageData]) -> bool:
"""Persists images to disk with indexed storage."""
try:
self._create_flight_directory(flight_id)
metadata_list = []
for img_data in images:
if not self._write_image(flight_id, img_data.filename, img_data.image):
return False
metadata_list.append(img_data.metadata)
# Legacy individual meta file backup
flight_dir = os.path.join(self.storage_dir, flight_id)
meta_path = os.path.join(flight_dir, f"{img_data.filename}.meta.json")
with open(meta_path, 'w') as f:
f.write(img_data.metadata.model_dump_json())
self._update_metadata_index(flight_id, metadata_list)
return True
except Exception as e:
logger.error(f"Storage error for flight {flight_id}: {e}")
return False
def _load_image_from_disk(self, flight_id: str, filename: str) -> Optional[np.ndarray]:
flight_dir = os.path.join(self.storage_dir, flight_id)
img_path = os.path.join(flight_dir, filename)
if not os.path.exists(img_path):
return None
return cv2.imread(img_path, cv2.IMREAD_COLOR)
def _construct_filename(self, sequence: int) -> str:
return f"AD{sequence:06d}.jpg"
def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]:
"""Retrieves a specific image by sequence number."""
filename = self._construct_filename(sequence)
img = self._load_image_from_disk(flight_id, filename)
if img is None:
return None
metadata = self._load_metadata_from_index(flight_id, sequence)
if not metadata:
return None
return ImageData(flight_id=flight_id, sequence=sequence, filename=filename, image=img, metadata=metadata)
def _get_sequence_tracker(self, flight_id: str) -> int:
if flight_id not in self.flight_sequences:
self.flight_sequences[flight_id] = 1
return self.flight_sequences[flight_id]
def _increment_sequence(self, flight_id: str) -> None:
if flight_id in self.flight_sequences:
self.flight_sequences[flight_id] += 1
def get_next_image(self, flight_id: str) -> Optional[ImageData]:
"""Gets the next image in sequence for processing."""
seq = self._get_sequence_tracker(flight_id)
img_data = self.get_image_by_sequence(flight_id, seq)
if img_data:
self._increment_sequence(flight_id)
return img_data
return None
def _load_metadata_from_index(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]:
flight_dir = os.path.join(self.storage_dir, flight_id)
index_path = os.path.join(flight_dir, "metadata.json")
if os.path.exists(index_path):
try:
with open(index_path, 'r') as f:
index_data = json.load(f)
if str(sequence) in index_data:
return ImageMetadata(**index_data[str(sequence)])
except Exception:
pass
# Fallback to individual file
filename = self._construct_filename(sequence)
meta_path = os.path.join(flight_dir, f"{filename}.meta.json")
if os.path.exists(meta_path):
with open(meta_path, 'r') as f:
return ImageMetadata(**json.load(f))
return None
def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]:
"""Retrieves metadata without loading full image (lightweight)."""
return self._load_metadata_from_index(flight_id, sequence)
def _calculate_processing_rate(self, flight_id: str) -> float:
if flight_id not in self.flight_start_times or flight_id not in self.flight_status:
return 0.0
elapsed = time.time() - self.flight_start_times[flight_id]
if elapsed <= 0:
return 0.0
return self.flight_status[flight_id].processed_images / elapsed
def get_processing_status(self, flight_id: str) -> ProcessingStatus:
"""Gets current processing status for a flight."""
if flight_id not in self.flight_status:
return ProcessingStatus(
flight_id=flight_id, total_images=0, processed_images=0,
current_sequence=1, queued_batches=0, processing_rate=0.0
)
status = self.flight_status[flight_id]
status.current_sequence = self._get_sequence_tracker(flight_id)
status.processing_rate = self._calculate_processing_rate(flight_id)
return status
# --- Simulation Utility ---
def simulate_directory_ingestion(self, flight_id: str, directory_path: str, engine: Any, fps: float = 2.0):
"""
Simulates a flight by reading images sequentially from a local directory
and pushing them directly into the Flight Processing Engine queue.
"""
if not os.path.exists(directory_path):
logger.error(f"Simulation directory not found: {directory_path}")
return
valid_exts = ('.jpg', '.jpeg', '.png')
files = sorted([f for f in os.listdir(directory_path) if f.lower().endswith(valid_exts)])
delay = 1.0 / fps
logger.info(f"Starting directory simulation for {flight_id}. Found {len(files)} frames.")
for idx, filename in enumerate(files):
img = cv2.imread(os.path.join(directory_path, filename), cv2.IMREAD_COLOR)
if img is not None:
engine.add_image(idx + 1, img)
time.sleep(delay)