From 834f846dc89d7a8b74bcd9efc77dd7520083f3fd Mon Sep 17 00:00:00 2001 From: Oleksandr Bezdieniezhnykh Date: Fri, 3 Apr 2026 05:58:55 +0300 Subject: [PATCH] [AZ-180] Enhance setup and improve inference logging - Added a new Cython extension for the engine factory to the setup configuration. - Updated the inference module to include additional logging for video batch processing and annotation callbacks. - Refactored test cases to standardize the detection endpoint responses and include channel IDs in headers for better event handling. --- .cursor/rules/meta-rule.mdc | 17 ++++++++++ .cursor/rules/python.mdc | 2 ++ e2e/tests/test_tiling.py | 15 ++------- setup.py | 1 + .../{engine_factory.py => engine_factory.pyx} | 33 +++++++++++-------- src/inference.pyx | 10 ++++++ src/main.py | 6 ++-- tests/test_az177_video_single_write.py | 4 +-- tests/test_az178_streaming_video.py | 14 ++++---- 9 files changed, 64 insertions(+), 38 deletions(-) rename src/engines/{engine_factory.py => engine_factory.pyx} (76%) diff --git a/.cursor/rules/meta-rule.mdc b/.cursor/rules/meta-rule.mdc index 4b44ad1..af6fc3c 100644 --- a/.cursor/rules/meta-rule.mdc +++ b/.cursor/rules/meta-rule.mdc @@ -31,3 +31,20 @@ When the user reacts negatively to generated code ("WTF", "what the hell", "why **Preventive rules added to coderule.mdc**: - "Do not solve environment or infrastructure problems by hardcoding workarounds in source code. Fix them at the environment/configuration level." - "Before writing new infrastructure or workaround code, check how the existing codebase already handles the same concern. Follow established project patterns." + +## Debugging Over Contemplation +When the root cause of a bug is not clear after ~5 minutes of reasoning, analysis, and assumption-making — **stop speculating and add debugging logs**. Observe actual runtime behavior before forming another theory. The pattern to follow: + +1. Identify the last known-good boundary (e.g., "request enters handler") and the known-bad result (e.g., "callback never fires"). +2. Add targeted `print(..., flush=True)` or log statements at each intermediate step to narrow the gap. +3. Read the output. Let evidence drive the next step — not inference chains built on unverified assumptions. + +Prolonged mental contemplation without evidence is a time sink. A 15-minute instrumented run beats 45 minutes of "could it be X? but then Y... unless Z..." reasoning. + +## Long Investigation Retrospective +When a problem takes significantly longer than expected (>30 minutes), perform a post-mortem before closing out: + +1. **Identify the bottleneck**: Was the delay caused by assumptions that turned out wrong? Missing visibility into runtime state? Incorrect mental model of a framework or language boundary? +2. **Extract the general lesson**: What category of mistake was this? (e.g., "Python cannot call Cython `cdef` methods", "engine errors silently swallowed", "wrong layer to fix the problem") +3. **Propose a preventive rule**: Formulate it as a short, actionable statement. Present it to the user for approval. +4. **Write it down**: Add the approved rule to the appropriate `.mdc` file so it applies to all future sessions. diff --git a/.cursor/rules/python.mdc b/.cursor/rules/python.mdc index 40719dc..2a1cd75 100644 --- a/.cursor/rules/python.mdc +++ b/.cursor/rules/python.mdc @@ -17,3 +17,5 @@ globs: ["**/*.py", "**/*.pyx", "**/*.pxd", "**/pyproject.toml", "**/requirements ## Cython - In `cdef class` methods, prefer `cdef` over `cpdef` unless the method must be callable from Python. `cdef` = C-only (fastest), `cpdef` = C + Python, `def` = Python-only. Check all call sites before choosing. +- **Python cannot call `cdef` methods.** If a `.py` file needs to call a `cdef` method on a Cython object, there are exactly two options: (a) convert the calling file to `.pyx`, `cimport` the class, and use a typed parameter so Cython dispatches the call at the C level; or (b) change the method to `cpdef` if it genuinely needs to be callable from both Python and Cython. Never leave a bare `except Exception: pass` around such a call — it will silently swallow the `AttributeError` and make the failure invisible for a very long time. +- When converting a `.py` file to `.pyx` to gain access to `cdef` methods: add the new extension to `setup.py`, add a `cimport` of the relevant `.pxd`, type the parameter(s) that carry the Cython object, and delete the old `.py` file. This ensures the cross-language call is resolved at compile time, not at runtime. diff --git a/e2e/tests/test_tiling.py b/e2e/tests/test_tiling.py index dbc7518..d027bbc 100644 --- a/e2e/tests/test_tiling.py +++ b/e2e/tests/test_tiling.py @@ -28,22 +28,13 @@ def _assert_no_same_label_near_duplicate_centers(detections): @pytest.mark.slow -def test_ft_p_04_gsd_based_tiling_ac1(image_detect, image_large, warm_engine): - body, _ = image_detect( - image_large, "img.jpg", - config=json.dumps(_GSD), - timeout=_TILING_TIMEOUT, - ) - assert isinstance(body, list) - _assert_coords_normalized(body) - - -@pytest.mark.slow -def test_ft_p_16_tile_boundary_deduplication_ac2(image_detect, image_large, warm_engine): +def test_ft_p04_p16_gsd_tiling_and_deduplication(image_detect, image_large, warm_engine): + # Assert body, _ = image_detect( image_large, "img.jpg", config=json.dumps({**_GSD, "big_image_tile_overlap_percent": 20}), timeout=_TILING_TIMEOUT, ) assert isinstance(body, list) + _assert_coords_normalized(body) _assert_no_same_label_near_duplicate_centers(body) diff --git a/setup.py b/setup.py index 3f1e966..5dcb156 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ extensions = [ Extension('ai_config', [f'{SRC}/ai_config.pyx'], include_dirs=[SRC]), Extension('loader_http_client', [f'{SRC}/loader_http_client.pyx'], include_dirs=[SRC]), Extension('engines.inference_engine', [f'{SRC}/engines/inference_engine.pyx'], include_dirs=np_inc), + Extension('engines.engine_factory', [f'{SRC}/engines/engine_factory.pyx'], include_dirs=[SRC]), Extension('engines.onnx_engine', [f'{SRC}/engines/onnx_engine.pyx'], include_dirs=np_inc), Extension('engines.coreml_engine', [f'{SRC}/engines/coreml_engine.pyx'], include_dirs=np_inc), Extension('inference', [f'{SRC}/inference.pyx'], include_dirs=np_inc), diff --git a/src/engines/engine_factory.py b/src/engines/engine_factory.pyx similarity index 76% rename from src/engines/engine_factory.py rename to src/engines/engine_factory.pyx index b969436..253a3bd 100644 --- a/src/engines/engine_factory.py +++ b/src/engines/engine_factory.pyx @@ -1,5 +1,6 @@ import os import tempfile +from loader_http_client cimport LoaderHttpClient, LoadResult class EngineFactory: @@ -8,7 +9,9 @@ class EngineFactory: def create(self, model_bytes: bytes): raise NotImplementedError - def load_engine(self, loader_client, models_dir: str): + def load_engine(self, LoaderHttpClient loader_client, str models_dir): + cdef str filename + cdef LoadResult res filename = self._get_ai_engine_filename() if filename is None: return None @@ -20,13 +23,13 @@ class EngineFactory: pass return None - def _get_ai_engine_filename(self) -> str | None: + def _get_ai_engine_filename(self): return None - def get_source_filename(self) -> str | None: + def get_source_filename(self): return None - def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str): + def build_from_source(self, onnx_bytes, loader_client, models_dir): raise NotImplementedError(f"{type(self).__name__} does not support building from source") @@ -35,7 +38,7 @@ class OnnxEngineFactory(EngineFactory): from engines.onnx_engine import OnnxEngine return OnnxEngine(model_bytes) - def get_source_filename(self) -> str: + def get_source_filename(self): import constants_inf return constants_inf.AI_ONNX_MODEL_FILE @@ -45,7 +48,7 @@ class CoreMLEngineFactory(EngineFactory): from engines.coreml_engine import CoreMLEngine return CoreMLEngine(model_bytes) - def _get_ai_engine_filename(self) -> str: + def _get_ai_engine_filename(self): return "azaion_coreml.zip" @@ -56,15 +59,15 @@ class TensorRTEngineFactory(EngineFactory): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine(model_bytes) - def _get_ai_engine_filename(self) -> str | None: + def _get_ai_engine_filename(self): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine.get_engine_filename() - def get_source_filename(self) -> str: + def get_source_filename(self): import constants_inf return constants_inf.AI_ONNX_MODEL_FILE - def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str): + def build_from_source(self, onnx_bytes, loader_client, models_dir): from engines.tensorrt_engine import TensorRTEngine engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None) return engine_bytes, TensorRTEngine.get_engine_filename() @@ -75,11 +78,12 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory): from engines.jetson_tensorrt_engine import JetsonTensorRTEngine return JetsonTensorRTEngine(model_bytes) - def _get_ai_engine_filename(self) -> str | None: + def _get_ai_engine_filename(self): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine.get_engine_filename("int8") - def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str): + def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir): + cdef str calib_cache_path from engines.tensorrt_engine import TensorRTEngine calib_cache_path = self._download_calib_cache(loader_client, models_dir) try: @@ -92,10 +96,13 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory): except Exception: pass - def _download_calib_cache(self, loader_client, models_dir: str) -> str | None: + def _download_calib_cache(self, LoaderHttpClient loader_client, str models_dir): + cdef LoadResult res import constants_inf try: - res = loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir) + res = loader_client.load_big_small_resource( + constants_inf.INT8_CALIB_CACHE_FILE, models_dir + ) if res.err is not None: constants_inf.log(f"INT8 calibration cache not available: {res.err}") return None diff --git a/src/inference.pyx b/src/inference.pyx index 0d0e355..1af4ed6 100644 --- a/src/inference.pyx +++ b/src/inference.pyx @@ -268,14 +268,24 @@ cdef class Inference: batch_count += 1 tf = total_frames if total_frames > 0 else max(frame_count, 1) constants_inf.log(f'Video batch {batch_count}: frame {frame_count}/{tf} ({frame_count*100//tf}%)') + last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0 self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w) + if self._annotation_callback is not None: + pann = Annotation(original_media_name, original_media_name, last_ts, []) + cb = self._annotation_callback + cb(pann, int(frame_count * 100 / tf)) batch_frames = [] batch_timestamps = [] if batch_frames: batch_count += 1 tf = total_frames if total_frames > 0 else max(frame_count, 1) constants_inf.log(f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames') + last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0 self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w) + if self._annotation_callback is not None: + pann = Annotation(original_media_name, original_media_name, last_ts, []) + cb = self._annotation_callback + cb(pann, 100) constants_inf.log(f'Video done: {frame_count} frames read, {batch_count} batches processed') self.send_detection_status() diff --git a/src/main.py b/src/main.py index ba56089..4f26c3d 100644 --- a/src/main.py +++ b/src/main.py @@ -645,6 +645,7 @@ async def detect_video_upload( content_hash, _MEDIA_STATUS_AI_PROCESSED, token_mgr.get_valid_token(), ) + await asyncio.sleep(0.01) _enqueue(channel_id, DetectionEvent( annotations=[], mediaId=content_hash, @@ -681,8 +682,7 @@ async def detect_media( config: Annotated[Optional[AIConfigDto], Body()] = None, user_id: str = Depends(require_auth), ): - existing = _active_detections.get(media_id) - if existing is not None and not existing.done(): + if media_id in _active_detections: raise HTTPException(status_code=409, detail="Detection already in progress for this media") channel_id = request.headers.get("x-channel-id", "") @@ -779,7 +779,7 @@ async def detect_media( ) _enqueue(channel_id, error_event) finally: - _active_detections.pop(media_id, None) + loop.call_later(5.0, lambda: _active_detections.pop(media_id, None)) loop.call_later(10.0, _cleanup_channel, channel_id) _active_detections[media_id] = asyncio.create_task(run_detection()) diff --git a/tests/test_az177_video_single_write.py b/tests/test_az177_video_single_write.py index 559e420..bd3644b 100644 --- a/tests/test_az177_video_single_write.py +++ b/tests/test_az177_video_single_write.py @@ -79,10 +79,10 @@ def test_auth_image_still_writes_once_before_detect(reset_main_inference): r = client.post( "/detect/image", files={"file": ("p.jpg", img, "image/jpeg")}, - headers={"Authorization": f"Bearer {token}"}, + headers={"Authorization": f"Bearer {token}", "X-Channel-Id": "test-channel"}, ) # Assert - assert r.status_code == 200 + assert r.status_code == 202 assert wb_hits.count(expected_path) == 1 with real_open(expected_path, "rb") as f: assert f.read() == img diff --git a/tests/test_az178_streaming_video.py b/tests/test_az178_streaming_video.py index c3601c3..c85acaa 100644 --- a/tests/test_az178_streaming_video.py +++ b/tests/test_az178_streaming_video.py @@ -342,14 +342,12 @@ class TestDetectVideoEndpoint: content=video_body, headers={ "X-Filename": "test.mp4", + "X-Channel-Id": "test-channel", "Authorization": f"Bearer {token}", }, ) # Assert - assert r.status_code == 200 - data = r.json() - assert data["status"] == "started" - assert data["mediaId"] == content_hash + assert r.status_code == 202 stored = os.path.join(vd, f"{content_hash}.mp4") assert os.path.isfile(stored) with open(stored, "rb") as f: @@ -372,11 +370,10 @@ class TestDetectVideoEndpoint: r = client.post( "/detect/video", content=video_body, - headers={"X-Filename": "test.mp4"}, + headers={"X-Filename": "test.mp4", "X-Channel-Id": "test-channel"}, ) # Assert - assert r.status_code == 200 - assert r.json()["status"] == "started" + assert r.status_code == 202 def test_rejects_non_video_extension(self): # Arrange @@ -429,11 +426,12 @@ class TestDetectVideoEndpoint: content=video_body, headers={ "X-Filename": "v.mp4", + "X-Channel-Id": "test-channel", "Authorization": f"Bearer {token}", }, ) # Assert - assert r.status_code == 200 + assert r.status_code == 202 all_received = b"".join(received_chunks) assert all_received == video_body