Resize input image to 640x640 for ONNX inference in RTSP demo app.

2026-04-23 01:26:34 +00:00 · 2024-07-09 21:11:23 +02:00
parent 607ac22b46
commit 896307d296
1 changed files with 43 additions and 4 deletions
@@ -11,15 +11,54 @@ AiEngineInferenceOnnx::AiEngineInferenceOnnx(QString modelPath, QObject *parent)
 }


+cv::Mat resizeAndPad(const cv::Mat& src) {
+    // Desired size
+    const int targetWidth = 640;
+    const int targetHeight = 640;
+
+    // Calculate the aspect ratio
+    float aspectRatio = static_cast<float>(src.cols) / src.rows;
+
+    // Determine new size while maintaining aspect ratio
+    int newWidth = src.cols;
+    int newHeight = src.rows;
+    if (src.cols > targetWidth || src.rows > targetHeight) {
+        if (aspectRatio > 1) {
+            // Width is greater than height
+            newWidth = targetWidth;
+            newHeight = static_cast<int>(targetWidth / aspectRatio);
+        } else {
+            // Height is greater than or equal to width
+            newHeight = targetHeight;
+            newWidth = static_cast<int>(targetHeight * aspectRatio);
+        }
+    }
+
+    // Resize the original image if needed
+    cv::Mat resized;
+    cv::resize(src, resized, cv::Size(newWidth, newHeight));
+
+    // Create a new 640x640 image with a black background
+    cv::Mat output(targetHeight, targetWidth, src.type(), cv::Scalar(0, 0, 0));
+
+    // Copy the resized image to the top-left corner of the new image
+    resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
+
+    return output;
+}
+
+
 void AiEngineInferenceOnnx::performInferenceSlot(cv::Mat frame)
 {
    //qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();

    mActive = true;

-    int orig_width                         = frame.cols;
-    int orig_height                        = frame.rows;
-    std::vector<float> input_tensor_values = mEngine->preprocessImage(frame);
+    cv::Mat scaledImage = resizeAndPad(frame);
+
+    int orig_width                         = scaledImage.cols;
+    int orig_height                        = scaledImage.rows;
+    std::vector<float> input_tensor_values = mEngine->preprocessImage(scaledImage);
    std::vector<float> results             = mEngine->runInference(input_tensor_values);
    float confidence_threshold             = 0.4;
    std::vector<Detection> detections      = mEngine->filterDetections(results, confidence_threshold, mEngine->input_shape[2], mEngine->input_shape[3], orig_width, orig_height);
@@ -38,7 +77,7 @@ void AiEngineInferenceOnnx::performInferenceSlot(cv::Mat frame)
        result.objects.append(object);
    }

-    result.frame = mEngine->draw_labels(frame.clone(), detections);
+    result.frame = mEngine->draw_labels(scaledImage.clone(), detections);
    emit resultsReady(result);

    mActive = false;