import os import cv2 import numpy as np import json import logging import time import queue from datetime import datetime from typing import List, Optional, Tuple, Dict, Any from pydantic import BaseModel from abc import ABC, abstractmethod from h08_batch_validator import BatchValidator, ValidationResult logger = logging.getLogger(__name__) # --- Data Models --- class ImageBatch(BaseModel): images: List[bytes] filenames: List[str] start_sequence: int end_sequence: int batch_number: int class ImageMetadata(BaseModel): sequence: int filename: str dimensions: Tuple[int, int] file_size: int timestamp: datetime exif_data: Optional[Dict[str, Any]] = None class ImageData(BaseModel): flight_id: str sequence: int filename: str image: np.ndarray metadata: ImageMetadata model_config = {"arbitrary_types_allowed": True} class ProcessedBatch(BaseModel): images: List[ImageData] batch_id: str start_sequence: int end_sequence: int class ProcessingStatus(BaseModel): flight_id: str total_images: int processed_images: int current_sequence: int queued_batches: int processing_rate: float # --- Interface --- class IImageInputPipeline(ABC): @abstractmethod def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool: pass @abstractmethod def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]: pass @abstractmethod def validate_batch(self, batch: ImageBatch) -> ValidationResult: pass @abstractmethod def store_images(self, flight_id: str, images: List[ImageData]) -> bool: pass @abstractmethod def get_next_image(self, flight_id: str) -> Optional[ImageData]: pass @abstractmethod def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]: pass @abstractmethod def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]: pass @abstractmethod def get_processing_status(self, flight_id: str) -> ProcessingStatus: pass # --- Implementation --- class ImageInputPipeline(IImageInputPipeline): """ F05: Image Input Pipeline Handles unified image ingestion, validation, storage, and retrieval. Includes a simulation mode to stream sequential images from a local directory directly into the engine. """ def __init__(self, storage_dir: str = "./image_storage", max_queue_size: int = 10): self.storage_dir = storage_dir self.max_queue_size = max_queue_size os.makedirs(self.storage_dir, exist_ok=True) # State tracking per flight self.flight_queues: Dict[str, queue.Queue] = {} self.flight_sequences: Dict[str, int] = {} self.flight_status: Dict[str, ProcessingStatus] = {} self.expected_ingest_seq: Dict[str, int] = {} self.flight_start_times: Dict[str, float] = {} self.validator = BatchValidator() def validate_batch(self, batch: ImageBatch) -> ValidationResult: """Validates batch integrity and sequence continuity.""" if len(batch.images) != len(batch.filenames): return ValidationResult(valid=False, errors=["Mismatch between images and filenames count."]) res = self.validator.validate_batch_size(batch) if not res.valid: return res res = self.validator.validate_naming_convention(batch.filenames) if not res.valid: return res res = self.validator.check_sequence_continuity(batch, batch.start_sequence) if not res.valid: return res for img in batch.images: res = self.validator.validate_format(img) if not res.valid: return res return ValidationResult(valid=True, errors=[]) def _get_queue_capacity(self, flight_id: str) -> int: if flight_id not in self.flight_queues: return self.max_queue_size return self.max_queue_size - self.flight_queues[flight_id].qsize() def _check_sequence_continuity(self, flight_id: str, batch: ImageBatch) -> bool: if flight_id not in self.expected_ingest_seq: return True return batch.start_sequence == self.expected_ingest_seq[flight_id] def _add_to_queue(self, flight_id: str, batch: ImageBatch) -> bool: if self._get_queue_capacity(flight_id) <= 0: logger.error(f"Queue full for flight {flight_id}") return False self.flight_queues[flight_id].put(batch) self.expected_ingest_seq[flight_id] = batch.end_sequence + 1 self.flight_status[flight_id].queued_batches += 1 return True def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool: """Queues a batch of images for processing (FIFO).""" validation = self.validate_batch(batch) if not validation.valid: logger.error(f"Batch validation failed: {validation.errors}") return False if not self._check_sequence_continuity(flight_id, batch): logger.error(f"Sequence gap detected for flight {flight_id}") return False if flight_id not in self.flight_queues: self.flight_queues[flight_id] = queue.Queue(maxsize=self.max_queue_size) self.flight_status[flight_id] = ProcessingStatus( flight_id=flight_id, total_images=0, processed_images=0, current_sequence=1, queued_batches=0, processing_rate=0.0 ) return self._add_to_queue(flight_id, batch) def _dequeue_batch(self, flight_id: str) -> Optional[ImageBatch]: if flight_id not in self.flight_queues or self.flight_queues[flight_id].empty(): return None batch: ImageBatch = self.flight_queues[flight_id].get() self.flight_status[flight_id].queued_batches -= 1 return batch def _extract_metadata(self, img_bytes: bytes, filename: str, seq: int, img: np.ndarray) -> ImageMetadata: h, w = img.shape[:2] return ImageMetadata( sequence=seq, filename=filename, dimensions=(w, h), file_size=len(img_bytes), timestamp=datetime.utcnow() ) def _decode_images(self, flight_id: str, batch: ImageBatch) -> List[ImageData]: processed_data = [] for idx, img_bytes in enumerate(batch.images): filename = batch.filenames[idx] seq = batch.start_sequence + idx np_arr = np.frombuffer(img_bytes, np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) if img is None: logger.warning(f"Failed to decode image {filename}") continue # Rule 5: Image dimensions 640x480 to 6252x4168 h, w = img.shape[:2] if not (640 <= w <= 6252 and 480 <= h <= 4168): logger.warning(f"Image {filename} dimensions ({w}x{h}) out of bounds.") continue metadata = self._extract_metadata(img_bytes, filename, seq, img) img_data = ImageData( flight_id=flight_id, sequence=seq, filename=filename, image=img, metadata=metadata ) processed_data.append(img_data) return processed_data def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]: """Dequeues and processes the next batch from FIFO queue.""" batch = self._dequeue_batch(flight_id) if not batch: return None if flight_id not in self.flight_start_times: self.flight_start_times[flight_id] = time.time() processed_data = self._decode_images(flight_id, batch) if processed_data: self.store_images(flight_id, processed_data) self.flight_status[flight_id].processed_images += len(processed_data) self.flight_status[flight_id].total_images += len(processed_data) return ProcessedBatch( images=processed_data, batch_id=f"batch_{batch.batch_number}", start_sequence=batch.start_sequence, end_sequence=batch.end_sequence ) def _create_flight_directory(self, flight_id: str) -> str: flight_dir = os.path.join(self.storage_dir, flight_id) os.makedirs(flight_dir, exist_ok=True) return flight_dir def _write_image(self, flight_id: str, filename: str, image: np.ndarray) -> bool: flight_dir = self._create_flight_directory(flight_id) img_path = os.path.join(flight_dir, filename) try: return cv2.imwrite(img_path, image) except Exception as e: logger.error(f"Failed to write image {img_path}: {e}") return False def _update_metadata_index(self, flight_id: str, metadata_list: List[ImageMetadata]) -> bool: flight_dir = self._create_flight_directory(flight_id) index_path = os.path.join(flight_dir, "metadata.json") index_data = {} if os.path.exists(index_path): try: with open(index_path, 'r') as f: index_data = json.load(f) except json.JSONDecodeError: pass for meta in metadata_list: index_data[str(meta.sequence)] = json.loads(meta.model_dump_json()) try: with open(index_path, 'w') as f: json.dump(index_data, f) return True except Exception as e: logger.error(f"Failed to update metadata index {index_path}: {e}") return False def store_images(self, flight_id: str, images: List[ImageData]) -> bool: """Persists images to disk with indexed storage.""" try: self._create_flight_directory(flight_id) metadata_list = [] for img_data in images: if not self._write_image(flight_id, img_data.filename, img_data.image): return False metadata_list.append(img_data.metadata) # Legacy individual meta file backup flight_dir = os.path.join(self.storage_dir, flight_id) meta_path = os.path.join(flight_dir, f"{img_data.filename}.meta.json") with open(meta_path, 'w') as f: f.write(img_data.metadata.model_dump_json()) self._update_metadata_index(flight_id, metadata_list) return True except Exception as e: logger.error(f"Storage error for flight {flight_id}: {e}") return False def _load_image_from_disk(self, flight_id: str, filename: str) -> Optional[np.ndarray]: flight_dir = os.path.join(self.storage_dir, flight_id) img_path = os.path.join(flight_dir, filename) if not os.path.exists(img_path): return None return cv2.imread(img_path, cv2.IMREAD_COLOR) def _construct_filename(self, sequence: int) -> str: return f"AD{sequence:06d}.jpg" def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]: """Retrieves a specific image by sequence number.""" filename = self._construct_filename(sequence) img = self._load_image_from_disk(flight_id, filename) if img is None: return None metadata = self._load_metadata_from_index(flight_id, sequence) if not metadata: return None return ImageData(flight_id=flight_id, sequence=sequence, filename=filename, image=img, metadata=metadata) def _get_sequence_tracker(self, flight_id: str) -> int: if flight_id not in self.flight_sequences: self.flight_sequences[flight_id] = 1 return self.flight_sequences[flight_id] def _increment_sequence(self, flight_id: str) -> None: if flight_id in self.flight_sequences: self.flight_sequences[flight_id] += 1 def get_next_image(self, flight_id: str) -> Optional[ImageData]: """Gets the next image in sequence for processing.""" seq = self._get_sequence_tracker(flight_id) img_data = self.get_image_by_sequence(flight_id, seq) if img_data: self._increment_sequence(flight_id) return img_data return None def _load_metadata_from_index(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]: flight_dir = os.path.join(self.storage_dir, flight_id) index_path = os.path.join(flight_dir, "metadata.json") if os.path.exists(index_path): try: with open(index_path, 'r') as f: index_data = json.load(f) if str(sequence) in index_data: return ImageMetadata(**index_data[str(sequence)]) except Exception: pass # Fallback to individual file filename = self._construct_filename(sequence) meta_path = os.path.join(flight_dir, f"{filename}.meta.json") if os.path.exists(meta_path): with open(meta_path, 'r') as f: return ImageMetadata(**json.load(f)) return None def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]: """Retrieves metadata without loading full image (lightweight).""" return self._load_metadata_from_index(flight_id, sequence) def _calculate_processing_rate(self, flight_id: str) -> float: if flight_id not in self.flight_start_times or flight_id not in self.flight_status: return 0.0 elapsed = time.time() - self.flight_start_times[flight_id] if elapsed <= 0: return 0.0 return self.flight_status[flight_id].processed_images / elapsed def get_processing_status(self, flight_id: str) -> ProcessingStatus: """Gets current processing status for a flight.""" if flight_id not in self.flight_status: return ProcessingStatus( flight_id=flight_id, total_images=0, processed_images=0, current_sequence=1, queued_batches=0, processing_rate=0.0 ) status = self.flight_status[flight_id] status.current_sequence = self._get_sequence_tracker(flight_id) status.processing_rate = self._calculate_processing_rate(flight_id) return status # --- Simulation Utility --- def simulate_directory_ingestion(self, flight_id: str, directory_path: str, engine: Any, fps: float = 2.0): """ Simulates a flight by reading images sequentially from a local directory and pushing them directly into the Flight Processing Engine queue. """ if not os.path.exists(directory_path): logger.error(f"Simulation directory not found: {directory_path}") return valid_exts = ('.jpg', '.jpeg', '.png') files = sorted([f for f in os.listdir(directory_path) if f.lower().endswith(valid_exts)]) delay = 1.0 / fps logger.info(f"Starting directory simulation for {flight_id}. Found {len(files)} frames.") for idx, filename in enumerate(files): img = cv2.imread(os.path.join(directory_path, filename), cv2.IMREAD_COLOR) if img is not None: engine.add_image(idx + 1, img) time.sleep(delay)