Initial commit

2026-04-23 00:56:37 +00:00 · 2026-04-03 23:25:54 +03:00
parent 531a1301d5
commit d7e1066c60
3843 changed files with 1554468 additions and 0 deletions
@@ -0,0 +1,401 @@
+import os
+import cv2
+import numpy as np
+import json
+import logging
+import time
+import queue
+from datetime import datetime
+from typing import List, Optional, Tuple, Dict, Any
+from pydantic import BaseModel
+from abc import ABC, abstractmethod
+
+from h08_batch_validator import BatchValidator, ValidationResult
+
+logger = logging.getLogger(__name__)
+
+# --- Data Models ---
+
+class ImageBatch(BaseModel):
+    images: List[bytes]
+    filenames: List[str]
+    start_sequence: int
+    end_sequence: int
+    batch_number: int
+
+class ImageMetadata(BaseModel):
+    sequence: int
+    filename: str
+    dimensions: Tuple[int, int]
+    file_size: int
+    timestamp: datetime
+    exif_data: Optional[Dict[str, Any]] = None
+
+class ImageData(BaseModel):
+    flight_id: str
+    sequence: int
+    filename: str
+    image: np.ndarray
+    metadata: ImageMetadata
+    
+    model_config = {"arbitrary_types_allowed": True}
+
+class ProcessedBatch(BaseModel):
+    images: List[ImageData]
+    batch_id: str
+    start_sequence: int
+    end_sequence: int
+
+class ProcessingStatus(BaseModel):
+    flight_id: str
+    total_images: int
+    processed_images: int
+    current_sequence: int
+    queued_batches: int
+    processing_rate: float
+
+# --- Interface ---
+
+class IImageInputPipeline(ABC):
+    @abstractmethod
+    def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool: pass
+    
+    @abstractmethod
+    def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]: pass
+    
+    @abstractmethod
+    def validate_batch(self, batch: ImageBatch) -> ValidationResult: pass
+    
+    @abstractmethod
+    def store_images(self, flight_id: str, images: List[ImageData]) -> bool: pass
+    
+    @abstractmethod
+    def get_next_image(self, flight_id: str) -> Optional[ImageData]: pass
+    
+    @abstractmethod
+    def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]: pass
+    
+    @abstractmethod
+    def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]: pass
+    
+    @abstractmethod
+    def get_processing_status(self, flight_id: str) -> ProcessingStatus: pass
+
+# --- Implementation ---
+
+class ImageInputPipeline(IImageInputPipeline):
+    """
+    F05: Image Input Pipeline
+    Handles unified image ingestion, validation, storage, and retrieval.
+    Includes a simulation mode to stream sequential images from a local directory directly into the engine.
+    """
+    def __init__(self, storage_dir: str = "./image_storage", max_queue_size: int = 10):
+        self.storage_dir = storage_dir
+        self.max_queue_size = max_queue_size
+        os.makedirs(self.storage_dir, exist_ok=True)
+        
+        # State tracking per flight
+        self.flight_queues: Dict[str, queue.Queue] = {}
+        self.flight_sequences: Dict[str, int] = {}
+        self.flight_status: Dict[str, ProcessingStatus] = {}
+        self.expected_ingest_seq: Dict[str, int] = {}
+        self.flight_start_times: Dict[str, float] = {}
+        self.validator = BatchValidator()
+
+    def validate_batch(self, batch: ImageBatch) -> ValidationResult:
+        """Validates batch integrity and sequence continuity."""
+        if len(batch.images) != len(batch.filenames): return ValidationResult(valid=False, errors=["Mismatch between images and filenames count."])
+        
+        res = self.validator.validate_batch_size(batch)
+        if not res.valid: return res
+        
+        res = self.validator.validate_naming_convention(batch.filenames)
+        if not res.valid: return res
+        
+        res = self.validator.check_sequence_continuity(batch, batch.start_sequence)
+        if not res.valid: return res
+        
+        for img in batch.images:
+            res = self.validator.validate_format(img)
+            if not res.valid: return res
+            
+        return ValidationResult(valid=True, errors=[])
+
+    def _get_queue_capacity(self, flight_id: str) -> int:
+        if flight_id not in self.flight_queues:
+            return self.max_queue_size
+        return self.max_queue_size - self.flight_queues[flight_id].qsize()
+
+    def _check_sequence_continuity(self, flight_id: str, batch: ImageBatch) -> bool:
+        if flight_id not in self.expected_ingest_seq:
+            return True
+        return batch.start_sequence == self.expected_ingest_seq[flight_id]
+
+    def _add_to_queue(self, flight_id: str, batch: ImageBatch) -> bool:
+        if self._get_queue_capacity(flight_id) <= 0:
+            logger.error(f"Queue full for flight {flight_id}")
+            return False
+            
+        self.flight_queues[flight_id].put(batch)
+        self.expected_ingest_seq[flight_id] = batch.end_sequence + 1
+        self.flight_status[flight_id].queued_batches += 1
+        return True
+
+    def queue_batch(self, flight_id: str, batch: ImageBatch) -> bool:
+        """Queues a batch of images for processing (FIFO)."""
+        validation = self.validate_batch(batch)
+        if not validation.valid:
+            logger.error(f"Batch validation failed: {validation.errors}")
+            return False
+            
+        if not self._check_sequence_continuity(flight_id, batch):
+            logger.error(f"Sequence gap detected for flight {flight_id}")
+            return False
+            
+        if flight_id not in self.flight_queues:
+            self.flight_queues[flight_id] = queue.Queue(maxsize=self.max_queue_size)
+            self.flight_status[flight_id] = ProcessingStatus(
+                flight_id=flight_id, total_images=0, processed_images=0,
+                current_sequence=1, queued_batches=0, processing_rate=0.0
+            )
+            
+        return self._add_to_queue(flight_id, batch)
+
+    def _dequeue_batch(self, flight_id: str) -> Optional[ImageBatch]:
+        if flight_id not in self.flight_queues or self.flight_queues[flight_id].empty():
+            return None
+            
+        batch: ImageBatch = self.flight_queues[flight_id].get()
+        self.flight_status[flight_id].queued_batches -= 1
+        return batch
+
+    def _extract_metadata(self, img_bytes: bytes, filename: str, seq: int, img: np.ndarray) -> ImageMetadata:
+        h, w = img.shape[:2]
+        return ImageMetadata(
+            sequence=seq,
+            filename=filename,
+            dimensions=(w, h),
+            file_size=len(img_bytes),
+            timestamp=datetime.utcnow()
+        )
+
+    def _decode_images(self, flight_id: str, batch: ImageBatch) -> List[ImageData]:
+        processed_data = []
+        for idx, img_bytes in enumerate(batch.images):
+            filename = batch.filenames[idx]
+            seq = batch.start_sequence + idx
+            
+            np_arr = np.frombuffer(img_bytes, np.uint8)
+            img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+            
+            if img is None:
+                logger.warning(f"Failed to decode image {filename}")
+                continue
+                
+            # Rule 5: Image dimensions 640x480 to 6252x4168
+            h, w = img.shape[:2]
+            if not (640 <= w <= 6252 and 480 <= h <= 4168):
+                logger.warning(f"Image {filename} dimensions ({w}x{h}) out of bounds.")
+                continue
+                
+            metadata = self._extract_metadata(img_bytes, filename, seq, img)
+            
+            img_data = ImageData(
+                flight_id=flight_id, sequence=seq, filename=filename,
+                image=img, metadata=metadata
+            )
+            processed_data.append(img_data)
+        return processed_data
+
+    def process_next_batch(self, flight_id: str) -> Optional[ProcessedBatch]:
+        """Dequeues and processes the next batch from FIFO queue."""
+        batch = self._dequeue_batch(flight_id)
+        if not batch:
+            return None
+            
+        if flight_id not in self.flight_start_times:
+            self.flight_start_times[flight_id] = time.time()
+            
+        processed_data = self._decode_images(flight_id, batch)
+        
+        if processed_data:
+            self.store_images(flight_id, processed_data)
+            self.flight_status[flight_id].processed_images += len(processed_data)
+            self.flight_status[flight_id].total_images += len(processed_data)
+            
+        return ProcessedBatch(
+            images=processed_data,
+            batch_id=f"batch_{batch.batch_number}",
+            start_sequence=batch.start_sequence,
+            end_sequence=batch.end_sequence
+        )
+        
+    def _create_flight_directory(self, flight_id: str) -> str:
+        flight_dir = os.path.join(self.storage_dir, flight_id)
+        os.makedirs(flight_dir, exist_ok=True)
+        return flight_dir
+        
+    def _write_image(self, flight_id: str, filename: str, image: np.ndarray) -> bool:
+        flight_dir = self._create_flight_directory(flight_id)
+        img_path = os.path.join(flight_dir, filename)
+        try:
+            return cv2.imwrite(img_path, image)
+        except Exception as e:
+            logger.error(f"Failed to write image {img_path}: {e}")
+            return False
+            
+    def _update_metadata_index(self, flight_id: str, metadata_list: List[ImageMetadata]) -> bool:
+        flight_dir = self._create_flight_directory(flight_id)
+        index_path = os.path.join(flight_dir, "metadata.json")
+        
+        index_data = {}
+        if os.path.exists(index_path):
+            try:
+                with open(index_path, 'r') as f:
+                    index_data = json.load(f)
+            except json.JSONDecodeError:
+                pass
+                
+        for meta in metadata_list:
+            index_data[str(meta.sequence)] = json.loads(meta.model_dump_json())
+            
+        try:
+            with open(index_path, 'w') as f:
+                json.dump(index_data, f)
+            return True
+        except Exception as e:
+            logger.error(f"Failed to update metadata index {index_path}: {e}")
+            return False
+
+    def store_images(self, flight_id: str, images: List[ImageData]) -> bool:
+        """Persists images to disk with indexed storage."""
+        try:
+            self._create_flight_directory(flight_id)
+            metadata_list = []
+            
+            for img_data in images:
+                if not self._write_image(flight_id, img_data.filename, img_data.image):
+                    return False
+                metadata_list.append(img_data.metadata)
+                
+                # Legacy individual meta file backup
+                flight_dir = os.path.join(self.storage_dir, flight_id)
+                meta_path = os.path.join(flight_dir, f"{img_data.filename}.meta.json")
+                with open(meta_path, 'w') as f:
+                    f.write(img_data.metadata.model_dump_json())
+                    
+            self._update_metadata_index(flight_id, metadata_list)
+            return True
+        except Exception as e:
+            logger.error(f"Storage error for flight {flight_id}: {e}")
+            return False
+            
+    def _load_image_from_disk(self, flight_id: str, filename: str) -> Optional[np.ndarray]:
+        flight_dir = os.path.join(self.storage_dir, flight_id)
+        img_path = os.path.join(flight_dir, filename)
+        if not os.path.exists(img_path):
+            return None
+        return cv2.imread(img_path, cv2.IMREAD_COLOR)
+        
+    def _construct_filename(self, sequence: int) -> str:
+        return f"AD{sequence:06d}.jpg"
+
+    def get_image_by_sequence(self, flight_id: str, sequence: int) -> Optional[ImageData]:
+        """Retrieves a specific image by sequence number."""
+        filename = self._construct_filename(sequence)
+        img = self._load_image_from_disk(flight_id, filename)
+        if img is None:
+            return None
+            
+        metadata = self._load_metadata_from_index(flight_id, sequence)
+        if not metadata:
+            return None
+            
+        return ImageData(flight_id=flight_id, sequence=sequence, filename=filename, image=img, metadata=metadata)
+        
+    def _get_sequence_tracker(self, flight_id: str) -> int:
+        if flight_id not in self.flight_sequences:
+            self.flight_sequences[flight_id] = 1
+        return self.flight_sequences[flight_id]
+        
+    def _increment_sequence(self, flight_id: str) -> None:
+        if flight_id in self.flight_sequences:
+            self.flight_sequences[flight_id] += 1
+
+    def get_next_image(self, flight_id: str) -> Optional[ImageData]:
+        """Gets the next image in sequence for processing."""
+        seq = self._get_sequence_tracker(flight_id)
+        img_data = self.get_image_by_sequence(flight_id, seq)
+        
+        if img_data:
+            self._increment_sequence(flight_id)
+            return img_data
+            
+        return None
+        
+    def _load_metadata_from_index(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]:
+        flight_dir = os.path.join(self.storage_dir, flight_id)
+        index_path = os.path.join(flight_dir, "metadata.json")
+        
+        if os.path.exists(index_path):
+            try:
+                with open(index_path, 'r') as f:
+                    index_data = json.load(f)
+                    if str(sequence) in index_data:
+                        return ImageMetadata(**index_data[str(sequence)])
+            except Exception:
+                pass
+                
+        # Fallback to individual file
+        filename = self._construct_filename(sequence)
+        meta_path = os.path.join(flight_dir, f"{filename}.meta.json")
+        if os.path.exists(meta_path):
+            with open(meta_path, 'r') as f:
+                return ImageMetadata(**json.load(f))
+        return None
+
+    def get_image_metadata(self, flight_id: str, sequence: int) -> Optional[ImageMetadata]:
+        """Retrieves metadata without loading full image (lightweight)."""
+        return self._load_metadata_from_index(flight_id, sequence)
+        
+    def _calculate_processing_rate(self, flight_id: str) -> float:
+        if flight_id not in self.flight_start_times or flight_id not in self.flight_status:
+            return 0.0
+        elapsed = time.time() - self.flight_start_times[flight_id]
+        if elapsed <= 0:
+            return 0.0
+        return self.flight_status[flight_id].processed_images / elapsed
+
+    def get_processing_status(self, flight_id: str) -> ProcessingStatus:
+        """Gets current processing status for a flight."""
+        if flight_id not in self.flight_status:
+            return ProcessingStatus(
+                flight_id=flight_id, total_images=0, processed_images=0,
+                current_sequence=1, queued_batches=0, processing_rate=0.0
+            )
+            
+        status = self.flight_status[flight_id]
+        status.current_sequence = self._get_sequence_tracker(flight_id)
+        status.processing_rate = self._calculate_processing_rate(flight_id)
+        return status
+
+    # --- Simulation Utility ---
+    def simulate_directory_ingestion(self, flight_id: str, directory_path: str, engine: Any, fps: float = 2.0):
+        """
+        Simulates a flight by reading images sequentially from a local directory 
+        and pushing them directly into the Flight Processing Engine queue.
+        """
+        if not os.path.exists(directory_path):
+            logger.error(f"Simulation directory not found: {directory_path}")
+            return
+            
+        valid_exts = ('.jpg', '.jpeg', '.png')
+        files = sorted([f for f in os.listdir(directory_path) if f.lower().endswith(valid_exts)])
+        delay = 1.0 / fps
+        
+        logger.info(f"Starting directory simulation for {flight_id}. Found {len(files)} frames.")
+        for idx, filename in enumerate(files):
+            img = cv2.imread(os.path.join(directory_path, filename), cv2.IMREAD_COLOR)
+            if img is not None:
+                engine.add_image(idx + 1, img)
+                time.sleep(delay)