mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 09:06:31 +00:00
[AZ-180] Enhance setup and improve inference logging
- Added a new Cython extension for the engine factory to the setup configuration. - Updated the inference module to include additional logging for video batch processing and annotation callbacks. - Refactored test cases to standardize the detection endpoint responses and include channel IDs in headers for better event handling.
This commit is contained in:
@@ -31,3 +31,20 @@ When the user reacts negatively to generated code ("WTF", "what the hell", "why
|
|||||||
**Preventive rules added to coderule.mdc**:
|
**Preventive rules added to coderule.mdc**:
|
||||||
- "Do not solve environment or infrastructure problems by hardcoding workarounds in source code. Fix them at the environment/configuration level."
|
- "Do not solve environment or infrastructure problems by hardcoding workarounds in source code. Fix them at the environment/configuration level."
|
||||||
- "Before writing new infrastructure or workaround code, check how the existing codebase already handles the same concern. Follow established project patterns."
|
- "Before writing new infrastructure or workaround code, check how the existing codebase already handles the same concern. Follow established project patterns."
|
||||||
|
|
||||||
|
## Debugging Over Contemplation
|
||||||
|
When the root cause of a bug is not clear after ~5 minutes of reasoning, analysis, and assumption-making — **stop speculating and add debugging logs**. Observe actual runtime behavior before forming another theory. The pattern to follow:
|
||||||
|
|
||||||
|
1. Identify the last known-good boundary (e.g., "request enters handler") and the known-bad result (e.g., "callback never fires").
|
||||||
|
2. Add targeted `print(..., flush=True)` or log statements at each intermediate step to narrow the gap.
|
||||||
|
3. Read the output. Let evidence drive the next step — not inference chains built on unverified assumptions.
|
||||||
|
|
||||||
|
Prolonged mental contemplation without evidence is a time sink. A 15-minute instrumented run beats 45 minutes of "could it be X? but then Y... unless Z..." reasoning.
|
||||||
|
|
||||||
|
## Long Investigation Retrospective
|
||||||
|
When a problem takes significantly longer than expected (>30 minutes), perform a post-mortem before closing out:
|
||||||
|
|
||||||
|
1. **Identify the bottleneck**: Was the delay caused by assumptions that turned out wrong? Missing visibility into runtime state? Incorrect mental model of a framework or language boundary?
|
||||||
|
2. **Extract the general lesson**: What category of mistake was this? (e.g., "Python cannot call Cython `cdef` methods", "engine errors silently swallowed", "wrong layer to fix the problem")
|
||||||
|
3. **Propose a preventive rule**: Formulate it as a short, actionable statement. Present it to the user for approval.
|
||||||
|
4. **Write it down**: Add the approved rule to the appropriate `.mdc` file so it applies to all future sessions.
|
||||||
|
|||||||
@@ -17,3 +17,5 @@ globs: ["**/*.py", "**/*.pyx", "**/*.pxd", "**/pyproject.toml", "**/requirements
|
|||||||
|
|
||||||
## Cython
|
## Cython
|
||||||
- In `cdef class` methods, prefer `cdef` over `cpdef` unless the method must be callable from Python. `cdef` = C-only (fastest), `cpdef` = C + Python, `def` = Python-only. Check all call sites before choosing.
|
- In `cdef class` methods, prefer `cdef` over `cpdef` unless the method must be callable from Python. `cdef` = C-only (fastest), `cpdef` = C + Python, `def` = Python-only. Check all call sites before choosing.
|
||||||
|
- **Python cannot call `cdef` methods.** If a `.py` file needs to call a `cdef` method on a Cython object, there are exactly two options: (a) convert the calling file to `.pyx`, `cimport` the class, and use a typed parameter so Cython dispatches the call at the C level; or (b) change the method to `cpdef` if it genuinely needs to be callable from both Python and Cython. Never leave a bare `except Exception: pass` around such a call — it will silently swallow the `AttributeError` and make the failure invisible for a very long time.
|
||||||
|
- When converting a `.py` file to `.pyx` to gain access to `cdef` methods: add the new extension to `setup.py`, add a `cimport` of the relevant `.pxd`, type the parameter(s) that carry the Cython object, and delete the old `.py` file. This ensures the cross-language call is resolved at compile time, not at runtime.
|
||||||
|
|||||||
@@ -28,22 +28,13 @@ def _assert_no_same_label_near_duplicate_centers(detections):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_ft_p_04_gsd_based_tiling_ac1(image_detect, image_large, warm_engine):
|
def test_ft_p04_p16_gsd_tiling_and_deduplication(image_detect, image_large, warm_engine):
|
||||||
body, _ = image_detect(
|
# Assert
|
||||||
image_large, "img.jpg",
|
|
||||||
config=json.dumps(_GSD),
|
|
||||||
timeout=_TILING_TIMEOUT,
|
|
||||||
)
|
|
||||||
assert isinstance(body, list)
|
|
||||||
_assert_coords_normalized(body)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
def test_ft_p_16_tile_boundary_deduplication_ac2(image_detect, image_large, warm_engine):
|
|
||||||
body, _ = image_detect(
|
body, _ = image_detect(
|
||||||
image_large, "img.jpg",
|
image_large, "img.jpg",
|
||||||
config=json.dumps({**_GSD, "big_image_tile_overlap_percent": 20}),
|
config=json.dumps({**_GSD, "big_image_tile_overlap_percent": 20}),
|
||||||
timeout=_TILING_TIMEOUT,
|
timeout=_TILING_TIMEOUT,
|
||||||
)
|
)
|
||||||
assert isinstance(body, list)
|
assert isinstance(body, list)
|
||||||
|
_assert_coords_normalized(body)
|
||||||
_assert_no_same_label_near_duplicate_centers(body)
|
_assert_no_same_label_near_duplicate_centers(body)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ extensions = [
|
|||||||
Extension('ai_config', [f'{SRC}/ai_config.pyx'], include_dirs=[SRC]),
|
Extension('ai_config', [f'{SRC}/ai_config.pyx'], include_dirs=[SRC]),
|
||||||
Extension('loader_http_client', [f'{SRC}/loader_http_client.pyx'], include_dirs=[SRC]),
|
Extension('loader_http_client', [f'{SRC}/loader_http_client.pyx'], include_dirs=[SRC]),
|
||||||
Extension('engines.inference_engine', [f'{SRC}/engines/inference_engine.pyx'], include_dirs=np_inc),
|
Extension('engines.inference_engine', [f'{SRC}/engines/inference_engine.pyx'], include_dirs=np_inc),
|
||||||
|
Extension('engines.engine_factory', [f'{SRC}/engines/engine_factory.pyx'], include_dirs=[SRC]),
|
||||||
Extension('engines.onnx_engine', [f'{SRC}/engines/onnx_engine.pyx'], include_dirs=np_inc),
|
Extension('engines.onnx_engine', [f'{SRC}/engines/onnx_engine.pyx'], include_dirs=np_inc),
|
||||||
Extension('engines.coreml_engine', [f'{SRC}/engines/coreml_engine.pyx'], include_dirs=np_inc),
|
Extension('engines.coreml_engine', [f'{SRC}/engines/coreml_engine.pyx'], include_dirs=np_inc),
|
||||||
Extension('inference', [f'{SRC}/inference.pyx'], include_dirs=np_inc),
|
Extension('inference', [f'{SRC}/inference.pyx'], include_dirs=np_inc),
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from loader_http_client cimport LoaderHttpClient, LoadResult
|
||||||
|
|
||||||
|
|
||||||
class EngineFactory:
|
class EngineFactory:
|
||||||
@@ -8,7 +9,9 @@ class EngineFactory:
|
|||||||
def create(self, model_bytes: bytes):
|
def create(self, model_bytes: bytes):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def load_engine(self, loader_client, models_dir: str):
|
def load_engine(self, LoaderHttpClient loader_client, str models_dir):
|
||||||
|
cdef str filename
|
||||||
|
cdef LoadResult res
|
||||||
filename = self._get_ai_engine_filename()
|
filename = self._get_ai_engine_filename()
|
||||||
if filename is None:
|
if filename is None:
|
||||||
return None
|
return None
|
||||||
@@ -20,13 +23,13 @@ class EngineFactory:
|
|||||||
pass
|
pass
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_ai_engine_filename(self) -> str | None:
|
def _get_ai_engine_filename(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_source_filename(self) -> str | None:
|
def get_source_filename(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
|
def build_from_source(self, onnx_bytes, loader_client, models_dir):
|
||||||
raise NotImplementedError(f"{type(self).__name__} does not support building from source")
|
raise NotImplementedError(f"{type(self).__name__} does not support building from source")
|
||||||
|
|
||||||
|
|
||||||
@@ -35,7 +38,7 @@ class OnnxEngineFactory(EngineFactory):
|
|||||||
from engines.onnx_engine import OnnxEngine
|
from engines.onnx_engine import OnnxEngine
|
||||||
return OnnxEngine(model_bytes)
|
return OnnxEngine(model_bytes)
|
||||||
|
|
||||||
def get_source_filename(self) -> str:
|
def get_source_filename(self):
|
||||||
import constants_inf
|
import constants_inf
|
||||||
return constants_inf.AI_ONNX_MODEL_FILE
|
return constants_inf.AI_ONNX_MODEL_FILE
|
||||||
|
|
||||||
@@ -45,7 +48,7 @@ class CoreMLEngineFactory(EngineFactory):
|
|||||||
from engines.coreml_engine import CoreMLEngine
|
from engines.coreml_engine import CoreMLEngine
|
||||||
return CoreMLEngine(model_bytes)
|
return CoreMLEngine(model_bytes)
|
||||||
|
|
||||||
def _get_ai_engine_filename(self) -> str:
|
def _get_ai_engine_filename(self):
|
||||||
return "azaion_coreml.zip"
|
return "azaion_coreml.zip"
|
||||||
|
|
||||||
|
|
||||||
@@ -56,15 +59,15 @@ class TensorRTEngineFactory(EngineFactory):
|
|||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
return TensorRTEngine(model_bytes)
|
return TensorRTEngine(model_bytes)
|
||||||
|
|
||||||
def _get_ai_engine_filename(self) -> str | None:
|
def _get_ai_engine_filename(self):
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
return TensorRTEngine.get_engine_filename()
|
return TensorRTEngine.get_engine_filename()
|
||||||
|
|
||||||
def get_source_filename(self) -> str:
|
def get_source_filename(self):
|
||||||
import constants_inf
|
import constants_inf
|
||||||
return constants_inf.AI_ONNX_MODEL_FILE
|
return constants_inf.AI_ONNX_MODEL_FILE
|
||||||
|
|
||||||
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
|
def build_from_source(self, onnx_bytes, loader_client, models_dir):
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None)
|
engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None)
|
||||||
return engine_bytes, TensorRTEngine.get_engine_filename()
|
return engine_bytes, TensorRTEngine.get_engine_filename()
|
||||||
@@ -75,11 +78,12 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
|
|||||||
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
||||||
return JetsonTensorRTEngine(model_bytes)
|
return JetsonTensorRTEngine(model_bytes)
|
||||||
|
|
||||||
def _get_ai_engine_filename(self) -> str | None:
|
def _get_ai_engine_filename(self):
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
return TensorRTEngine.get_engine_filename("int8")
|
return TensorRTEngine.get_engine_filename("int8")
|
||||||
|
|
||||||
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
|
def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
|
||||||
|
cdef str calib_cache_path
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
calib_cache_path = self._download_calib_cache(loader_client, models_dir)
|
calib_cache_path = self._download_calib_cache(loader_client, models_dir)
|
||||||
try:
|
try:
|
||||||
@@ -92,10 +96,13 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _download_calib_cache(self, loader_client, models_dir: str) -> str | None:
|
def _download_calib_cache(self, LoaderHttpClient loader_client, str models_dir):
|
||||||
|
cdef LoadResult res
|
||||||
import constants_inf
|
import constants_inf
|
||||||
try:
|
try:
|
||||||
res = loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir)
|
res = loader_client.load_big_small_resource(
|
||||||
|
constants_inf.INT8_CALIB_CACHE_FILE, models_dir
|
||||||
|
)
|
||||||
if res.err is not None:
|
if res.err is not None:
|
||||||
constants_inf.log(f"INT8 calibration cache not available: {res.err}")
|
constants_inf.log(f"INT8 calibration cache not available: {res.err}")
|
||||||
return None
|
return None
|
||||||
@@ -268,14 +268,24 @@ cdef class Inference:
|
|||||||
batch_count += 1
|
batch_count += 1
|
||||||
tf = total_frames if total_frames > 0 else max(frame_count, 1)
|
tf = total_frames if total_frames > 0 else max(frame_count, 1)
|
||||||
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{tf} ({frame_count*100//tf}%)')
|
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{tf} ({frame_count*100//tf}%)')
|
||||||
|
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
|
||||||
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
|
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
|
||||||
|
if self._annotation_callback is not None:
|
||||||
|
pann = Annotation(original_media_name, original_media_name, last_ts, [])
|
||||||
|
cb = self._annotation_callback
|
||||||
|
cb(pann, int(frame_count * 100 / tf))
|
||||||
batch_frames = []
|
batch_frames = []
|
||||||
batch_timestamps = []
|
batch_timestamps = []
|
||||||
if batch_frames:
|
if batch_frames:
|
||||||
batch_count += 1
|
batch_count += 1
|
||||||
tf = total_frames if total_frames > 0 else max(frame_count, 1)
|
tf = total_frames if total_frames > 0 else max(frame_count, 1)
|
||||||
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
|
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
|
||||||
|
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
|
||||||
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
|
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
|
||||||
|
if self._annotation_callback is not None:
|
||||||
|
pann = Annotation(original_media_name, original_media_name, last_ts, [])
|
||||||
|
cb = self._annotation_callback
|
||||||
|
cb(pann, 100)
|
||||||
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
|
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
|
||||||
self.send_detection_status()
|
self.send_detection_status()
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -645,6 +645,7 @@ async def detect_video_upload(
|
|||||||
content_hash, _MEDIA_STATUS_AI_PROCESSED,
|
content_hash, _MEDIA_STATUS_AI_PROCESSED,
|
||||||
token_mgr.get_valid_token(),
|
token_mgr.get_valid_token(),
|
||||||
)
|
)
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
_enqueue(channel_id, DetectionEvent(
|
_enqueue(channel_id, DetectionEvent(
|
||||||
annotations=[],
|
annotations=[],
|
||||||
mediaId=content_hash,
|
mediaId=content_hash,
|
||||||
@@ -681,8 +682,7 @@ async def detect_media(
|
|||||||
config: Annotated[Optional[AIConfigDto], Body()] = None,
|
config: Annotated[Optional[AIConfigDto], Body()] = None,
|
||||||
user_id: str = Depends(require_auth),
|
user_id: str = Depends(require_auth),
|
||||||
):
|
):
|
||||||
existing = _active_detections.get(media_id)
|
if media_id in _active_detections:
|
||||||
if existing is not None and not existing.done():
|
|
||||||
raise HTTPException(status_code=409, detail="Detection already in progress for this media")
|
raise HTTPException(status_code=409, detail="Detection already in progress for this media")
|
||||||
|
|
||||||
channel_id = request.headers.get("x-channel-id", "")
|
channel_id = request.headers.get("x-channel-id", "")
|
||||||
@@ -779,7 +779,7 @@ async def detect_media(
|
|||||||
)
|
)
|
||||||
_enqueue(channel_id, error_event)
|
_enqueue(channel_id, error_event)
|
||||||
finally:
|
finally:
|
||||||
_active_detections.pop(media_id, None)
|
loop.call_later(5.0, lambda: _active_detections.pop(media_id, None))
|
||||||
loop.call_later(10.0, _cleanup_channel, channel_id)
|
loop.call_later(10.0, _cleanup_channel, channel_id)
|
||||||
|
|
||||||
_active_detections[media_id] = asyncio.create_task(run_detection())
|
_active_detections[media_id] = asyncio.create_task(run_detection())
|
||||||
|
|||||||
@@ -79,10 +79,10 @@ def test_auth_image_still_writes_once_before_detect(reset_main_inference):
|
|||||||
r = client.post(
|
r = client.post(
|
||||||
"/detect/image",
|
"/detect/image",
|
||||||
files={"file": ("p.jpg", img, "image/jpeg")},
|
files={"file": ("p.jpg", img, "image/jpeg")},
|
||||||
headers={"Authorization": f"Bearer {token}"},
|
headers={"Authorization": f"Bearer {token}", "X-Channel-Id": "test-channel"},
|
||||||
)
|
)
|
||||||
# Assert
|
# Assert
|
||||||
assert r.status_code == 200
|
assert r.status_code == 202
|
||||||
assert wb_hits.count(expected_path) == 1
|
assert wb_hits.count(expected_path) == 1
|
||||||
with real_open(expected_path, "rb") as f:
|
with real_open(expected_path, "rb") as f:
|
||||||
assert f.read() == img
|
assert f.read() == img
|
||||||
|
|||||||
@@ -342,14 +342,12 @@ class TestDetectVideoEndpoint:
|
|||||||
content=video_body,
|
content=video_body,
|
||||||
headers={
|
headers={
|
||||||
"X-Filename": "test.mp4",
|
"X-Filename": "test.mp4",
|
||||||
|
"X-Channel-Id": "test-channel",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
# Assert
|
# Assert
|
||||||
assert r.status_code == 200
|
assert r.status_code == 202
|
||||||
data = r.json()
|
|
||||||
assert data["status"] == "started"
|
|
||||||
assert data["mediaId"] == content_hash
|
|
||||||
stored = os.path.join(vd, f"{content_hash}.mp4")
|
stored = os.path.join(vd, f"{content_hash}.mp4")
|
||||||
assert os.path.isfile(stored)
|
assert os.path.isfile(stored)
|
||||||
with open(stored, "rb") as f:
|
with open(stored, "rb") as f:
|
||||||
@@ -372,11 +370,10 @@ class TestDetectVideoEndpoint:
|
|||||||
r = client.post(
|
r = client.post(
|
||||||
"/detect/video",
|
"/detect/video",
|
||||||
content=video_body,
|
content=video_body,
|
||||||
headers={"X-Filename": "test.mp4"},
|
headers={"X-Filename": "test.mp4", "X-Channel-Id": "test-channel"},
|
||||||
)
|
)
|
||||||
# Assert
|
# Assert
|
||||||
assert r.status_code == 200
|
assert r.status_code == 202
|
||||||
assert r.json()["status"] == "started"
|
|
||||||
|
|
||||||
def test_rejects_non_video_extension(self):
|
def test_rejects_non_video_extension(self):
|
||||||
# Arrange
|
# Arrange
|
||||||
@@ -429,11 +426,12 @@ class TestDetectVideoEndpoint:
|
|||||||
content=video_body,
|
content=video_body,
|
||||||
headers={
|
headers={
|
||||||
"X-Filename": "v.mp4",
|
"X-Filename": "v.mp4",
|
||||||
|
"X-Channel-Id": "test-channel",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert r.status_code == 200
|
assert r.status_code == 202
|
||||||
all_received = b"".join(received_chunks)
|
all_received = b"".join(received_chunks)
|
||||||
assert all_received == video_body
|
assert all_received == video_body
|
||||||
|
|||||||
Reference in New Issue
Block a user