Changed directory structure and renamed applications

- autopilot -> drone_controller - rtsp_ai_player -> ai_controller - added top level qmake project file - updated documentation - moved small demo applications from tmp/ to misc/
2026-04-22 12:36:35 +00:00 · 2024-10-19 14:44:34 +02:00
parent 54b7dc41ca
commit 45c19baa45
94 changed files with 149 additions and 204 deletions
@@ -0,0 +1,67 @@
+#include <QDebug>
+#include <QThread>
+#include "aiengineinferenceopencvonnx.h"
+
+
+AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
+    : AiEngineInference{modelPath, parent},
+      mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
+{
+    //qDebug() << "TUOMAS test mModelPath=" << mModelPath;
+    //mEngine = new InferenceEngine(modelPath.toStdString());
+    //mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
+}
+
+
+void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
+{
+    try {
+        qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
+
+        mActive = true;
+
+        cv::Mat scaledImage               = resizeAndPad(frame);
+        //cv::imwrite("/tmp/frame.png", scaledImage);
+        std::vector<Detection> detections = mInference.runInference(scaledImage);
+        AiEngineInferenceResult result;
+
+        qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
+
+        for (uint i = 0; i < detections.size(); ++i) {
+            const Detection &detection = detections[i];
+
+            // Add detected objects to the results
+            AiEngineObject object;
+            object.classId          = detection.class_id;
+            object.propability      = detection.confidence;
+            object.rectangle.top    = detection.box.y;
+            object.rectangle.left   = detection.box.x;
+            object.rectangle.bottom = detection.box.y + detection.box.height;
+            object.rectangle.right  = detection.box.x + detection.box.width;
+            result.objects.append(object);
+
+            //qDebug() << "performInferenceSlot()" << object.rectangle.top << object.rectangle.left << "and" << object.rectangle.bottom << object.rectangle.right;
+        }
+
+        auto end = std::remove_if(detections.begin(), detections.end(), [](const Detection& detection) {
+            return detection.class_id != 0;
+        });
+        detections.erase(end, detections.end());
+
+        if (result.objects.empty() == false) {
+            result.frame = mInference.drawLabels(scaledImage, detections);
+            emit resultsReady(result);
+        }
+
+        mActive = false;
+    }
+    catch (const cv::Exception& e) {
+        std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
+    }
+}
+
+
+void AiEngineInferenceOpencvOnnx::initialize(int number)
+{
+    (void)number;
+}
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <QObject>
+#include "aiengineinference.h"
+#include "src-opencv-onnx/inference.h"
+
+class AiEngineInferenceOpencvOnnx : public AiEngineInference
+{
+    Q_OBJECT
+public:
+    explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
+    void initialize(int number);
+
+public slots:
+    void performInferenceSlot(cv::Mat frame) override;
+
+private:
+    Inference mInference;
+};
@@ -0,0 +1,227 @@
+#include "inference.h"
+
+Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
+{
+    modelPath = onnxModelPath;
+    modelShape = modelInputShape;
+    classesPath = classesTxtFile;
+    cudaEnabled = runWithCuda;
+
+    std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
+    loadOnnxNetwork();
+    // loadClassesFromFile(); The classes are hard-coded for this example
+}
+
+std::vector<Detection> Inference::runInference(const cv::Mat &input)
+{
+    cv::Mat modelInput = input;
+    if (letterBoxForSquare && modelShape.width == modelShape.height)
+        modelInput = formatToSquare(modelInput);
+
+    cv::Mat blob;
+    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
+    net.setInput(blob);
+
+    std::vector<cv::Mat> outputs;
+    net.forward(outputs, net.getUnconnectedOutLayersNames());
+
+    int rows = outputs[0].size[1];
+    int dimensions = outputs[0].size[2];
+
+    bool yolov8 = true;
+    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+    /*
+    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
+    {
+        std::cout << "yolov8 = " << yolov8 << std::endl;
+        yolov8 = true;
+        rows = outputs[0].size[2];
+        dimensions = outputs[0].size[1];
+
+        outputs[0] = outputs[0].reshape(1, dimensions);
+        cv::transpose(outputs[0], outputs[0]);
+    }
+    */
+
+    float *data = (float *)outputs[0].data;
+
+    float x_factor = modelInput.cols / modelShape.width;
+    float y_factor = modelInput.rows / modelShape.height;
+
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        if (yolov8)
+        {
+            float *classes_scores = data+4;
+
+            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+
+            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+
+            if (maxClassScore > modelScoreThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                class_ids.push_back(class_id.x);
+
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+
+                int left = int((x - 0.5 * w) * x_factor);
+                int top = int((y - 0.5 * h) * y_factor);
+
+                int width = int(w * x_factor);
+                int height = int(h * y_factor);
+
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+        }
+        else // yolov5
+        {
+            float confidence = data[4];
+
+            if (confidence >= modelConfidenceThreshold)
+            {
+                float *classes_scores = data+5;
+
+                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+                cv::Point class_id;
+                double max_class_score;
+
+                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+
+                if (max_class_score > modelScoreThreshold)
+                {
+                    confidences.push_back(confidence);
+                    class_ids.push_back(class_id.x);
+
+                    float x = data[0];
+                    float y = data[1];
+                    float w = data[2];
+                    float h = data[3];
+
+                    int left = int((x - 0.5 * w) * x_factor);
+                    int top = int((y - 0.5 * h) * y_factor);
+
+                    int width = int(w * x_factor);
+                    int height = int(h * y_factor);
+
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+
+        data += dimensions;
+    }
+
+    std::vector<int> nms_result;
+    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
+
+    std::vector<Detection> detections{};
+    for (unsigned long i = 0; i < nms_result.size(); ++i)
+    {
+        int idx = nms_result[i];
+
+        Detection result;
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int> dis(100, 255);
+        result.color = cv::Scalar(dis(gen),
+                                  dis(gen),
+                                  dis(gen));
+
+        result.className = classes[result.class_id];
+        result.box = boxes[idx];
+
+        detections.push_back(result);
+    }
+
+    return detections;
+}
+
+void Inference::loadClassesFromFile()
+{
+    std::ifstream inputFile(classesPath);
+    if (inputFile.is_open())
+    {
+        std::string classLine;
+        while (std::getline(inputFile, classLine))
+            classes.push_back(classLine);
+        inputFile.close();
+    }
+}
+
+void Inference::loadOnnxNetwork()
+{
+    printf("loadOnnxNetwork() starts\n");
+
+    net = cv::dnn::readNetFromONNX(modelPath);
+
+    if (cudaEnabled)
+    {
+        std::cout << "\nRunning on CUDA" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
+    }
+    else
+    {
+        std::cout << "\nRunning on CPU" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
+    }
+}
+
+cv::Mat Inference::formatToSquare(const cv::Mat &source)
+{
+    int col = source.cols;
+    int row = source.rows;
+    int _max = MAX(col, row);
+    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+    source.copyTo(result(cv::Rect(0, 0, col, row)));
+    return result;
+}
+
+cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
+{
+    cv::Mat result = image.clone();
+
+    for (const auto &detection : detections)
+    {
+        cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
+        std::string label = detection.className + ": " + std::to_string(detection.confidence);
+
+        int baseLine;
+        cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+
+        cv::rectangle(
+            result,
+            cv::Point(detection.box.x, detection.box.y - labelSize.height),
+            cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
+            cv::Scalar(255, 255, 255),
+            cv::FILLED);
+
+        cv::putText(
+            result,
+            label,
+            cv::Point(
+                detection.box.x,
+                detection.box.y),
+            cv::FONT_HERSHEY_SIMPLEX,
+            0.5,
+            cv::Scalar(0, 0, 0),
+            1);
+    }
+
+    return result;
+}
@@ -0,0 +1,51 @@
+#pragma once
+
+// Cpp native
+#include <fstream>
+#include <vector>
+#include <string>
+#include <random>
+
+// OpenCV / DNN / Inference
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+struct Detection
+{
+    int class_id{0};
+    std::string className{};
+    float confidence{0.0};
+    cv::Scalar color{};
+    cv::Rect box{};
+};
+
+class Inference
+{
+public:
+    Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
+    std::vector<Detection> runInference(const cv::Mat &input);
+    cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
+
+private:
+    void loadClassesFromFile();
+    void loadOnnxNetwork();    
+
+    cv::Mat formatToSquare(const cv::Mat &source);
+
+    std::string modelPath{};
+    std::string classesPath{};
+    bool cudaEnabled{};
+
+    std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
+
+    cv::Size2f modelShape{};
+
+    float modelConfidenceThreshold {0.25};
+    float modelScoreThreshold      {0.45};
+    float modelNMSThreshold        {0.50};
+
+    bool letterBoxForSquare = false;
+
+    cv::dnn::Net net;
+};