Files
autopilot/misc/opencv-dnn-onnx-inference/main.cpp
T
2024-08-25 18:26:19 +03:00

263 lines
9.2 KiB
C++

#include <QDebug>
#include <QElapsedTimer>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#define CONFIDENCE_THRESHOLD 0.2
using namespace std;
using namespace cv;
// function to create vector of class names
std::vector<std::string> createClaseNames() {
std::vector<std::string> classNames;
classNames.push_back("background");
classNames.push_back("aeroplane");
classNames.push_back("bicycle");
classNames.push_back("bird");
classNames.push_back("boat");
classNames.push_back("bottle");
classNames.push_back("bus");
classNames.push_back("car");
classNames.push_back("cat");
classNames.push_back("chair");
classNames.push_back("cow");
classNames.push_back("diningtable");
classNames.push_back("dog");
classNames.push_back("horse");
classNames.push_back("motorbike");
classNames.push_back("person");
classNames.push_back("pottedplant");
classNames.push_back("sheep");
return classNames;
};
int main(int argc, char *argv[])
{
if (argc < 3) {
qDebug() << "Give ONNX model as first argument and image as second one.";
return 1;
}
std::vector<String> classNames = createClaseNames();
std::string model_filename = argv[1];
std::string image_filename = argv[2];
// Load the ONNX model
cv::dnn::Net net = cv::dnn::readNetFromONNX(model_filename);
// Set backend to OpenCL
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
//net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
// Load an image and preprocess it
cv::Mat image = cv::imread(image_filename);
cv::Mat blob = cv::dnn::blobFromImage(image, 1/255.0, cv::Size(640, 640), cv::Scalar(), true, false);
cv::imwrite("tmpInput.png", image);
// Set the input for the network
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs);
cv::Mat output = outputs[0];
std::vector<cv::String> layerNames = net.getLayerNames();
std::vector<int> outLayerIndices = net.getUnconnectedOutLayers();
for (int index : outLayerIndices) {
std::cout << layerNames[index - 1] << std::endl;
}
cv::Mat detections = net.forward("output0");
// print some information about detections
std::cout << "dims: " << outputs[0].dims << std::endl;
std::cout << "size: " << outputs[0].size << std::endl;
int num_detections0 = outputs.size();
int num_classes0 = outputs[0].size[1];
int rows = outputs[0].size[1];
qDebug() << "num_detections0:" << num_detections0 << "num_classes0:" << num_classes0 << "rows:" << rows;
int num_detections = detections.size[2]; // 8400
int num_attributes = detections.size[1]; // 14
qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
detections = detections.reshape(1, num_detections);
for (int i = 0; i < num_detections; i++) {
Mat row = detections.row(i);
float box_confidence = row.at<float>(4);
float cls_confidence = row.at<float>(5);
if (box_confidence > 0.5 && cls_confidence > 0.5) {
std::cout << "box_confidence " << box_confidence << " cls_confidence: " << cls_confidence << std::endl;
}
}
/*
// Output tensor processing
// Assuming output tensor has shape [1, 14, 8400]
int num_detections = detections.size[2]; // 8400
int num_attributes = detections.size[1]; // 14
qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
// Extract and print confidence for each detection
const float* data = (float*)output.data;
for (int i = 0; i < num_detections; i++) {
// Confidence value is at index 4 (based on YOLO-like model output format)
float confidence = data[i * num_attributes + 3];
// Print confidence value
if (confidence > 0.5f) {
std::cout << "Detection " << i << " confidence: " << confidence << std::endl;
}
}
*/
/*
// Assuming outputs is a vector of cv::Mat containing the model's output
for (int i = 0; i < outputs[0].size[0]; ++i) {
for (int j = 0; j < outputs[0].size[1]; ++j) {
float confidence = outputs[0].at<float>(i, j);
// Print or use the confidence value
std::cout << "Confidence: " << confidence << std::endl;
}
}
*/
/*
// Output processing variables
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
// Analyze each output
for (const auto& output : outputs) {
// Each row is a detection: [center_x, center_y, width, height, conf, class1_score, class2_score, ...]
const auto* data = (float*)output.data;
for (int i = 0; i < output.rows; i++, data += output.cols) {
float confidence = data[4]; // Objectness confidence
if (confidence >= 0.5) { // Apply a confidence threshold
cv::Mat scores = output.row(i).colRange(5, output.cols);
cv::Point class_id_point;
double max_class_score;
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
int class_id = class_id_point.x;
if (max_class_score >= 0.5) { // Apply a class score threshold
// Get bounding box
int center_x = (int)(data[0] * image.cols);
int center_y = (int)(data[1] * image.rows);
int width = (int)(data[2] * image.cols);
int height = (int)(data[3] * image.rows);
int left = center_x - width / 2;
int top = center_y - height / 2;
// Store the results
class_ids.push_back(class_id);
confidences.push_back(confidence);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
}
*/
/*
for (int i = 0; i < num_detections; ++i) {
for (int a = 0; a < 10; a++) {
qDebug() << "confidence:" << confidence << "class_id" << class_id;
}
//float confidence = outputs[0].at<float>(i, 4);
//int class_id = outputs[0].at<float>(i, 5);
qDebug() << "confidence:" << confidence << "class_id" << class_id;
}
*/
/*
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
// Output tensor processing
for (size_t i = 0; i < outputs.size(); i++) {
float* data = (float*)outputs[i].data;
for (int j = 0; j < outputs[i].rows; ++j) {
float confidence = data[4]; // Objectness confidence
qDebug() <<" Confidence: " << confidence;
if (confidence >= CONFIDENCE_THRESHOLD) {
// Get class with the highest score
cv::Mat scores = outputs[i].row(j).colRange(5, num_classes + 5);
cv::Point class_id_point;
double max_class_score;
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
qDebug() << "Class ID: " << class_id_point.x << " Confidence: " << confidence;
if (max_class_score > CONFIDENCE_THRESHOLD) {
int center_x = (int)(data[0] * image.cols);
int center_y = (int)(data[1] * image.rows);
int width = (int)(data[2] * image.cols);
int height = (int)(data[3] * image.rows);
int left = center_x - width / 2;
int top = center_y - height / 2;
// Store the results
class_ids.push_back(class_id_point.x);
confidences.push_back((float)max_class_score);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += num_classes + 5; // Move to the next detection
}
}
*/
/*
// Process the output
for (int i = 0; i < num_detections; ++i) {
float confidence = outputs[0].at<float>(i, 4);
int class_id = outputs[0].at<float>(i, 5);
// ... extract bounding box coordinates
float x_center = outputs[0].at<float>(i, 0);
float y_center = outputs[0].at<float>(i, 1);
float width = outputs[0].at<float>(i, 2);
float height = outputs[0].at<float>(i, 3);
// Calculate bounding box corners
int x = (int)(x_center - width / 2.0);
int y = (int)(y_center - height / 2.0);
int width_int = (int)width;
int height_int = (int)height;
// Print or use the detected information
qDebug() << "Class ID: " << class_id << " Confidence: " << confidence << " Bounding Box: (" << x << ", " << y << ", " << width_int << ", " << height_int << ")";
}
*/
/*
// Perform forward pass
for (int i = 0; i < 10; i++) {
std::vector<cv::Mat> outputs;
QElapsedTimer timer;
timer.start();
net.forward(outputs);
qDebug() << "Inference completed in" << timer.elapsed() << "ms.";
}
*/
// Process the output (YOLO-specific post-processing like NMS is needed)
// Outputs contain class scores, bounding boxes, etc.
return 0;
}