mirror of
https://github.com/azaion/autopilot.git
synced 2026-04-23 00:06:35 +00:00
working yolo default model with opencv
This commit is contained in:
@@ -0,0 +1,262 @@
|
||||
#include <QDebug>
|
||||
#include <QElapsedTimer>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
#define CONFIDENCE_THRESHOLD 0.2
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
// function to create vector of class names
|
||||
std::vector<std::string> createClaseNames() {
|
||||
std::vector<std::string> classNames;
|
||||
classNames.push_back("background");
|
||||
classNames.push_back("aeroplane");
|
||||
classNames.push_back("bicycle");
|
||||
classNames.push_back("bird");
|
||||
classNames.push_back("boat");
|
||||
classNames.push_back("bottle");
|
||||
classNames.push_back("bus");
|
||||
classNames.push_back("car");
|
||||
classNames.push_back("cat");
|
||||
classNames.push_back("chair");
|
||||
classNames.push_back("cow");
|
||||
classNames.push_back("diningtable");
|
||||
classNames.push_back("dog");
|
||||
classNames.push_back("horse");
|
||||
classNames.push_back("motorbike");
|
||||
classNames.push_back("person");
|
||||
classNames.push_back("pottedplant");
|
||||
classNames.push_back("sheep");
|
||||
return classNames;
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
qDebug() << "Give ONNX model as first argument and image as second one.";
|
||||
return 1;
|
||||
}
|
||||
std::vector<String> classNames = createClaseNames();
|
||||
|
||||
std::string model_filename = argv[1];
|
||||
std::string image_filename = argv[2];
|
||||
|
||||
// Load the ONNX model
|
||||
cv::dnn::Net net = cv::dnn::readNetFromONNX(model_filename);
|
||||
|
||||
// Set backend to OpenCL
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
||||
//net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
||||
|
||||
// Load an image and preprocess it
|
||||
cv::Mat image = cv::imread(image_filename);
|
||||
cv::Mat blob = cv::dnn::blobFromImage(image, 1/255.0, cv::Size(640, 640), cv::Scalar(), true, false);
|
||||
cv::imwrite("tmpInput.png", image);
|
||||
|
||||
// Set the input for the network
|
||||
net.setInput(blob);
|
||||
std::vector<cv::Mat> outputs;
|
||||
net.forward(outputs);
|
||||
cv::Mat output = outputs[0];
|
||||
|
||||
std::vector<cv::String> layerNames = net.getLayerNames();
|
||||
std::vector<int> outLayerIndices = net.getUnconnectedOutLayers();
|
||||
for (int index : outLayerIndices) {
|
||||
std::cout << layerNames[index - 1] << std::endl;
|
||||
}
|
||||
|
||||
|
||||
cv::Mat detections = net.forward("output0");
|
||||
|
||||
// print some information about detections
|
||||
std::cout << "dims: " << outputs[0].dims << std::endl;
|
||||
std::cout << "size: " << outputs[0].size << std::endl;
|
||||
|
||||
int num_detections0 = outputs.size();
|
||||
int num_classes0 = outputs[0].size[1];
|
||||
int rows = outputs[0].size[1];
|
||||
qDebug() << "num_detections0:" << num_detections0 << "num_classes0:" << num_classes0 << "rows:" << rows;
|
||||
|
||||
int num_detections = detections.size[2]; // 8400
|
||||
int num_attributes = detections.size[1]; // 14
|
||||
qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
|
||||
|
||||
|
||||
detections = detections.reshape(1, num_detections);
|
||||
|
||||
for (int i = 0; i < num_detections; i++) {
|
||||
Mat row = detections.row(i);
|
||||
float box_confidence = row.at<float>(4);
|
||||
float cls_confidence = row.at<float>(5);
|
||||
if (box_confidence > 0.5 && cls_confidence > 0.5) {
|
||||
std::cout << "box_confidence " << box_confidence << " cls_confidence: " << cls_confidence << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
// Output tensor processing
|
||||
// Assuming output tensor has shape [1, 14, 8400]
|
||||
int num_detections = detections.size[2]; // 8400
|
||||
int num_attributes = detections.size[1]; // 14
|
||||
qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes;
|
||||
|
||||
// Extract and print confidence for each detection
|
||||
const float* data = (float*)output.data;
|
||||
for (int i = 0; i < num_detections; i++) {
|
||||
// Confidence value is at index 4 (based on YOLO-like model output format)
|
||||
float confidence = data[i * num_attributes + 3];
|
||||
// Print confidence value
|
||||
if (confidence > 0.5f) {
|
||||
std::cout << "Detection " << i << " confidence: " << confidence << std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// Assuming outputs is a vector of cv::Mat containing the model's output
|
||||
for (int i = 0; i < outputs[0].size[0]; ++i) {
|
||||
for (int j = 0; j < outputs[0].size[1]; ++j) {
|
||||
float confidence = outputs[0].at<float>(i, j);
|
||||
// Print or use the confidence value
|
||||
std::cout << "Confidence: " << confidence << std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// Output processing variables
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
// Analyze each output
|
||||
for (const auto& output : outputs) {
|
||||
// Each row is a detection: [center_x, center_y, width, height, conf, class1_score, class2_score, ...]
|
||||
const auto* data = (float*)output.data;
|
||||
|
||||
for (int i = 0; i < output.rows; i++, data += output.cols) {
|
||||
float confidence = data[4]; // Objectness confidence
|
||||
|
||||
if (confidence >= 0.5) { // Apply a confidence threshold
|
||||
cv::Mat scores = output.row(i).colRange(5, output.cols);
|
||||
cv::Point class_id_point;
|
||||
double max_class_score;
|
||||
|
||||
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
|
||||
int class_id = class_id_point.x;
|
||||
|
||||
if (max_class_score >= 0.5) { // Apply a class score threshold
|
||||
// Get bounding box
|
||||
int center_x = (int)(data[0] * image.cols);
|
||||
int center_y = (int)(data[1] * image.rows);
|
||||
int width = (int)(data[2] * image.cols);
|
||||
int height = (int)(data[3] * image.rows);
|
||||
int left = center_x - width / 2;
|
||||
int top = center_y - height / 2;
|
||||
|
||||
// Store the results
|
||||
class_ids.push_back(class_id);
|
||||
confidences.push_back(confidence);
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
for (int i = 0; i < num_detections; ++i) {
|
||||
for (int a = 0; a < 10; a++) {
|
||||
qDebug() << "confidence:" << confidence << "class_id" << class_id;
|
||||
}
|
||||
//float confidence = outputs[0].at<float>(i, 4);
|
||||
//int class_id = outputs[0].at<float>(i, 5);
|
||||
qDebug() << "confidence:" << confidence << "class_id" << class_id;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
// Output tensor processing
|
||||
for (size_t i = 0; i < outputs.size(); i++) {
|
||||
float* data = (float*)outputs[i].data;
|
||||
for (int j = 0; j < outputs[i].rows; ++j) {
|
||||
float confidence = data[4]; // Objectness confidence
|
||||
qDebug() <<" Confidence: " << confidence;
|
||||
|
||||
if (confidence >= CONFIDENCE_THRESHOLD) {
|
||||
// Get class with the highest score
|
||||
cv::Mat scores = outputs[i].row(j).colRange(5, num_classes + 5);
|
||||
cv::Point class_id_point;
|
||||
double max_class_score;
|
||||
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point);
|
||||
|
||||
qDebug() << "Class ID: " << class_id_point.x << " Confidence: " << confidence;
|
||||
|
||||
if (max_class_score > CONFIDENCE_THRESHOLD) {
|
||||
int center_x = (int)(data[0] * image.cols);
|
||||
int center_y = (int)(data[1] * image.rows);
|
||||
int width = (int)(data[2] * image.cols);
|
||||
int height = (int)(data[3] * image.rows);
|
||||
int left = center_x - width / 2;
|
||||
int top = center_y - height / 2;
|
||||
|
||||
// Store the results
|
||||
class_ids.push_back(class_id_point.x);
|
||||
confidences.push_back((float)max_class_score);
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
data += num_classes + 5; // Move to the next detection
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// Process the output
|
||||
for (int i = 0; i < num_detections; ++i) {
|
||||
float confidence = outputs[0].at<float>(i, 4);
|
||||
int class_id = outputs[0].at<float>(i, 5);
|
||||
// ... extract bounding box coordinates
|
||||
float x_center = outputs[0].at<float>(i, 0);
|
||||
float y_center = outputs[0].at<float>(i, 1);
|
||||
float width = outputs[0].at<float>(i, 2);
|
||||
float height = outputs[0].at<float>(i, 3);
|
||||
|
||||
// Calculate bounding box corners
|
||||
int x = (int)(x_center - width / 2.0);
|
||||
int y = (int)(y_center - height / 2.0);
|
||||
int width_int = (int)width;
|
||||
int height_int = (int)height;
|
||||
|
||||
// Print or use the detected information
|
||||
qDebug() << "Class ID: " << class_id << " Confidence: " << confidence << " Bounding Box: (" << x << ", " << y << ", " << width_int << ", " << height_int << ")";
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
// Perform forward pass
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::vector<cv::Mat> outputs;
|
||||
QElapsedTimer timer;
|
||||
timer.start();
|
||||
net.forward(outputs);
|
||||
qDebug() << "Inference completed in" << timer.elapsed() << "ms.";
|
||||
}
|
||||
*/
|
||||
|
||||
// Process the output (YOLO-specific post-processing like NMS is needed)
|
||||
// Outputs contain class scores, bounding boxes, etc.
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user