working yolo default model with opencv

2026-04-23 00:06:35 +00:00 · 2024-08-25 18:26:19 +03:00
parent be59a02f9b
commit ef137fbc4b
6 changed files with 747 additions and 1 deletions
@@ -0,0 +1,262 @@
+#include <QDebug>
+#include <QElapsedTimer>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+#define CONFIDENCE_THRESHOLD 0.2
+
+using namespace std;
+using namespace cv;
+
+// function to create vector of class names
+std::vector<std::string> createClaseNames() {
+    std::vector<std::string> classNames;
+    classNames.push_back("background");
+    classNames.push_back("aeroplane");
+    classNames.push_back("bicycle");
+    classNames.push_back("bird");
+    classNames.push_back("boat");
+    classNames.push_back("bottle");
+    classNames.push_back("bus");
+    classNames.push_back("car");
+    classNames.push_back("cat");
+    classNames.push_back("chair");
+    classNames.push_back("cow");
+    classNames.push_back("diningtable");
+    classNames.push_back("dog");
+    classNames.push_back("horse");
+    classNames.push_back("motorbike");
+    classNames.push_back("person");
+    classNames.push_back("pottedplant");
+    classNames.push_back("sheep");
+    return classNames;
+};
+
+int main(int argc, char *argv[])
+{
+    if (argc < 3) {
+        qDebug() << "Give ONNX model as first argument and image as second one.";
+        return 1;
+    }
+     std::vector<String> classNames = createClaseNames();
+
+    std::string model_filename = argv[1];
+    std::string image_filename = argv[2];
+
+    // Load the ONNX model
+    cv::dnn::Net net = cv::dnn::readNetFromONNX(model_filename);
+
+    // Set backend to OpenCL
+    net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
+    //net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
+
+    // Load an image and preprocess it
+    cv::Mat image = cv::imread(image_filename);
+    cv::Mat blob = cv::dnn::blobFromImage(image, 1/255.0, cv::Size(640, 640), cv::Scalar(), true, false);
+    cv::imwrite("tmpInput.png", image);
+
+    // Set the input for the network
+    net.setInput(blob);
+    std::vector<cv::Mat> outputs;
+    net.forward(outputs);
+    cv::Mat output = outputs[0];
+
+    std::vector<cv::String> layerNames = net.getLayerNames();
+    std::vector<int> outLayerIndices = net.getUnconnectedOutLayers();
+    for (int index : outLayerIndices) {
+        std::cout << layerNames[index - 1] << std::endl;
+    }
+
+
+    cv::Mat detections = net.forward("output0");
+
+    // print some information about detections
+    std::cout << "dims: " << outputs[0].dims << std::endl;
+    std::cout << "size: " << outputs[0].size << std::endl;
+
+    int num_detections0 = outputs.size();
+    int num_classes0 = outputs[0].size[1];
+    int rows = outputs[0].size[1];
+    qDebug() << "num_detections0:" << num_detections0 << "num_classes0:" << num_classes0 << "rows:" << rows;
+
+    int num_detections = detections.size[2];  // 8400
+    int num_attributes = detections.size[1];  // 14
+    qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
+
+
+    detections = detections.reshape(1, num_detections);
+
+    for (int i = 0; i < num_detections; i++) {
+        Mat row = detections.row(i);
+        float box_confidence = row.at<float>(4);
+        float cls_confidence = row.at<float>(5);
+        if (box_confidence > 0.5 && cls_confidence > 0.5) {
+            std::cout << "box_confidence " << box_confidence << " cls_confidence: " << cls_confidence << std::endl;
+        }
+    }
+
+
+
+
+    /*
+    // Output tensor processing
+    // Assuming output tensor has shape [1, 14, 8400]
+    int num_detections = detections.size[2];  // 8400
+    int num_attributes = detections.size[1];  // 14
+    qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
+
+    // Extract and print confidence for each detection
+    const float* data = (float*)output.data;
+    for (int i = 0; i < num_detections; i++) {
+        // Confidence value is at index 4 (based on YOLO-like model output format)
+        float confidence = data[i * num_attributes + 3];
+        // Print confidence value
+        if (confidence > 0.5f) {
+            std::cout << "Detection " << i << " confidence: " << confidence << std::endl;
+        }
+    }
+    */
+
+    /*
+    // Assuming outputs is a vector of cv::Mat containing the model's output
+    for (int i = 0; i < outputs[0].size[0]; ++i) {
+        for (int j = 0; j < outputs[0].size[1]; ++j) {
+            float confidence = outputs[0].at<float>(i, j);
+            // Print or use the confidence value
+            std::cout << "Confidence: " << confidence << std::endl;
+        }
+    }
+    */
+
+    /*
+    // Output processing variables
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    // Analyze each output
+    for (const auto& output : outputs) {
+        // Each row is a detection: [center_x, center_y, width, height, conf, class1_score, class2_score, ...]
+        const auto* data = (float*)output.data;
+
+        for (int i = 0; i < output.rows; i++, data += output.cols) {
+            float confidence = data[4];  // Objectness confidence
+
+            if (confidence >= 0.5) {  // Apply a confidence threshold
+                cv::Mat scores = output.row(i).colRange(5, output.cols);
+                cv::Point class_id_point;
+                double max_class_score;
+
+                cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
+                int class_id = class_id_point.x;
+
+                if (max_class_score >= 0.5) {  // Apply a class score threshold
+                    // Get bounding box
+                    int center_x = (int)(data[0] * image.cols);
+                    int center_y = (int)(data[1] * image.rows);
+                    int width = (int)(data[2] * image.cols);
+                    int height = (int)(data[3] * image.rows);
+                    int left = center_x - width / 2;
+                    int top = center_y - height / 2;
+
+                    // Store the results
+                    class_ids.push_back(class_id);
+                    confidences.push_back(confidence);
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+    }
+    */
+
+    /*
+    for (int i = 0; i < num_detections; ++i) {
+        for (int a = 0; a < 10; a++) {
+            qDebug() << "confidence:" << confidence << "class_id" << class_id;
+        }
+        //float confidence = outputs[0].at<float>(i, 4);
+        //int class_id = outputs[0].at<float>(i, 5);
+        qDebug() << "confidence:" << confidence << "class_id" << class_id;
+    }
+    */
+
+    /*
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // Output tensor processing
+    for (size_t i = 0; i < outputs.size(); i++) {
+        float* data = (float*)outputs[i].data;
+        for (int j = 0; j < outputs[i].rows; ++j) {
+            float confidence = data[4]; // Objectness confidence
+            qDebug() <<" Confidence: " << confidence;
+
+            if (confidence >= CONFIDENCE_THRESHOLD) {
+                // Get class with the highest score
+                cv::Mat scores = outputs[i].row(j).colRange(5, num_classes + 5);
+                cv::Point class_id_point;
+                double max_class_score;
+                cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
+
+                qDebug() << "Class ID: " << class_id_point.x << " Confidence: " << confidence;
+
+                if (max_class_score > CONFIDENCE_THRESHOLD) {
+                    int center_x = (int)(data[0] * image.cols);
+                    int center_y = (int)(data[1] * image.rows);
+                    int width = (int)(data[2] * image.cols);
+                    int height = (int)(data[3] * image.rows);
+                    int left = center_x - width / 2;
+                    int top = center_y - height / 2;
+
+                    // Store the results
+                    class_ids.push_back(class_id_point.x);
+                    confidences.push_back((float)max_class_score);
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+            data += num_classes + 5; // Move to the next detection
+        }
+    }
+    */
+
+    /*
+    // Process the output
+    for (int i = 0; i < num_detections; ++i) {
+        float confidence = outputs[0].at<float>(i, 4);
+        int class_id = outputs[0].at<float>(i, 5);
+        // ... extract bounding box coordinates
+        float x_center = outputs[0].at<float>(i, 0);
+        float y_center = outputs[0].at<float>(i, 1);
+        float width = outputs[0].at<float>(i, 2);
+        float height = outputs[0].at<float>(i, 3);
+
+        // Calculate bounding box corners
+        int x = (int)(x_center - width / 2.0);
+        int y = (int)(y_center - height / 2.0);
+        int width_int = (int)width;
+        int height_int = (int)height;
+
+        // Print or use the detected information
+        qDebug() << "Class ID: " << class_id << " Confidence: " << confidence << " Bounding Box: (" << x << ", " << y << ", " << width_int << ", " << height_int << ")";
+    }
+    */
+
+
+    /*
+    // Perform forward pass
+    for (int i = 0; i < 10; i++) {
+        std::vector<cv::Mat> outputs;
+        QElapsedTimer timer;
+        timer.start();
+        net.forward(outputs);
+        qDebug() << "Inference completed in" << timer.elapsed() << "ms.";
+    }
+    */
+
+    // Process the output (YOLO-specific post-processing like NMS is needed)
+    // Outputs contain class scores, bounding boxes, etc.
+
+
+    return 0;
+}