mirror of
https://github.com/azaion/autopilot.git
synced 2026-04-22 22:36:34 +00:00
Fully working opi_rtsp on PC with YOLOv8 ONNX models
This commit is contained in:
@@ -24,6 +24,7 @@ opi5 {
|
|||||||
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
|
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
|
||||||
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
|
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
|
||||||
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
|
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
|
||||||
|
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
|
||||||
SOURCES += $$PWD/src-onnx/*.cpp
|
SOURCES += $$PWD/src-onnx/*.cpp
|
||||||
HEADERS += $$PWD/src-onnx/*.h
|
HEADERS += $$PWD/src-onnx/*.h
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,34 +3,39 @@
|
|||||||
#include "aiengineinferenceonnx.h"
|
#include "aiengineinferenceonnx.h"
|
||||||
|
|
||||||
|
|
||||||
|
const int INFERENCE_SQUARE_WIDTH = 640;
|
||||||
|
const int INFERENCE_SQUARE_HEIGHT = 640;
|
||||||
|
|
||||||
|
|
||||||
AiEngineInferenceOnnx::AiEngineInferenceOnnx(QString modelPath, QObject *parent)
|
AiEngineInferenceOnnx::AiEngineInferenceOnnx(QString modelPath, QObject *parent)
|
||||||
: AiEngineInference{modelPath, parent}
|
: AiEngineInference{modelPath, parent},
|
||||||
|
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
|
||||||
{
|
{
|
||||||
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
|
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
|
||||||
mEngine = new InferenceEngine(modelPath.toStdString());
|
//mEngine = new InferenceEngine(modelPath.toStdString());
|
||||||
|
//mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cv::Mat resizeAndPad(const cv::Mat& src) {
|
cv::Mat resizeAndPad(const cv::Mat& src)
|
||||||
// Desired size
|
{
|
||||||
const int targetWidth = 640;
|
|
||||||
const int targetHeight = 640;
|
|
||||||
|
|
||||||
// Calculate the aspect ratio
|
// Calculate the aspect ratio
|
||||||
float aspectRatio = static_cast<float>(src.cols) / src.rows;
|
float aspectRatio = static_cast<float>(src.cols) / src.rows;
|
||||||
|
|
||||||
// Determine new size while maintaining aspect ratio
|
// Determine new size while maintaining aspect ratio
|
||||||
int newWidth = src.cols;
|
int newWidth = src.cols;
|
||||||
int newHeight = src.rows;
|
int newHeight = src.rows;
|
||||||
if (src.cols > targetWidth || src.rows > targetHeight) {
|
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
|
||||||
if (aspectRatio > 1) {
|
if (aspectRatio > 1)
|
||||||
|
{
|
||||||
// Width is greater than height
|
// Width is greater than height
|
||||||
newWidth = targetWidth;
|
newWidth = INFERENCE_SQUARE_WIDTH;
|
||||||
newHeight = static_cast<int>(targetWidth / aspectRatio);
|
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
// Height is greater than or equal to width
|
// Height is greater than or equal to width
|
||||||
newHeight = targetHeight;
|
newHeight = INFERENCE_SQUARE_HEIGHT;
|
||||||
newWidth = static_cast<int>(targetHeight * aspectRatio);
|
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,7 +44,7 @@ cv::Mat resizeAndPad(const cv::Mat& src) {
|
|||||||
cv::resize(src, resized, cv::Size(newWidth, newHeight));
|
cv::resize(src, resized, cv::Size(newWidth, newHeight));
|
||||||
|
|
||||||
// Create a new 640x640 image with a black background
|
// Create a new 640x640 image with a black background
|
||||||
cv::Mat output(targetHeight, targetWidth, src.type(), cv::Scalar(0, 0, 0));
|
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
|
||||||
|
|
||||||
// Copy the resized image to the top-left corner of the new image
|
// Copy the resized image to the top-left corner of the new image
|
||||||
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
|
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
|
||||||
@@ -50,35 +55,50 @@ cv::Mat resizeAndPad(const cv::Mat& src) {
|
|||||||
|
|
||||||
void AiEngineInferenceOnnx::performInferenceSlot(cv::Mat frame)
|
void AiEngineInferenceOnnx::performInferenceSlot(cv::Mat frame)
|
||||||
{
|
{
|
||||||
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
|
try {
|
||||||
|
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
|
||||||
|
|
||||||
mActive = true;
|
mActive = true;
|
||||||
|
|
||||||
cv::Mat scaledImage = resizeAndPad(frame);
|
cv::Mat scaledImage = resizeAndPad(frame);
|
||||||
|
std::vector<Detection> detections = mInference.runInference(scaledImage);
|
||||||
|
AiEngineInferenceResult result;
|
||||||
|
|
||||||
int orig_width = scaledImage.cols;
|
for (uint i = 0; i < detections.size(); ++i) {
|
||||||
int orig_height = scaledImage.rows;
|
const Detection &detection = detections[i];
|
||||||
std::vector<float> input_tensor_values = mEngine->preprocessImage(scaledImage);
|
|
||||||
std::vector<float> results = mEngine->runInference(input_tensor_values);
|
|
||||||
float confidence_threshold = 0.4;
|
|
||||||
std::vector<Detection> detections = mEngine->filterDetections(results, confidence_threshold, mEngine->input_shape[2], mEngine->input_shape[3], orig_width, orig_height);
|
|
||||||
|
|
||||||
AiEngineInferenceResult result;
|
// Add detected objects to the results
|
||||||
for (uint32_t i = 0; i < detections.size(); i++) {
|
AiEngineObject object;
|
||||||
const Detection &detection = detections[i];
|
object.classId = detection.class_id;
|
||||||
|
object.propability = detection.confidence;
|
||||||
|
object.rectangle.top = detection.box.y;
|
||||||
|
object.rectangle.left = detection.box.x;
|
||||||
|
object.rectangle.bottom = detection.box.y + detection.box.height;
|
||||||
|
object.rectangle.right = detection.box.x + detection.box.width;
|
||||||
|
result.objects.append(object);
|
||||||
|
|
||||||
AiEngineObject object;
|
/*
|
||||||
object.classId = detection.class_id;
|
// Draw box and text
|
||||||
object.propability = detection.confidence;
|
cv::Rect box = detection.box;
|
||||||
object.rectangle.top = detection.bbox.y;
|
cv::Scalar color = detection.color;
|
||||||
object.rectangle.left = detection.bbox.x;
|
cv::rectangle(frame, box, color, 2);
|
||||||
object.rectangle.bottom = detection.bbox.y + detection.bbox.height;
|
std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
||||||
object.rectangle.right = detection.bbox.x + detection.bbox.width;
|
//std::cout << "classString:" << classString << std::endl;
|
||||||
result.objects.append(object);
|
cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
|
||||||
|
cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
|
||||||
|
cv::rectangle(scaledImage, textBox, color, cv::FILLED);
|
||||||
|
cv::putText(scaledImage, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.objects.empty() == false) {
|
||||||
|
result.frame = mInference.drawLabels(scaledImage, detections);
|
||||||
|
emit resultsReady(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
mActive = false;
|
||||||
|
}
|
||||||
|
catch (const cv::Exception& e) {
|
||||||
|
std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
result.frame = mEngine->draw_labels(scaledImage.clone(), detections);
|
|
||||||
emit resultsReady(result);
|
|
||||||
|
|
||||||
mActive = false;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,5 +14,6 @@ public slots:
|
|||||||
void performInferenceSlot(cv::Mat frame) override;
|
void performInferenceSlot(cv::Mat frame) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
InferenceEngine *mEngine;
|
//InferenceEngine *mEngine;
|
||||||
|
Inference mInference;
|
||||||
};
|
};
|
||||||
|
|||||||
+173
-150
@@ -1,162 +1,209 @@
|
|||||||
#include "inference.h"
|
#include "inference.h"
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
const std::vector<std::string> InferenceEngine::CLASS_NAMES = {
|
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
|
||||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
|
||||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
|
||||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
|
||||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
|
||||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
||||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
|
||||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
|
|
||||||
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
|
|
||||||
"scissors", "teddy bear", "hair drier", "toothbrush"};
|
|
||||||
|
|
||||||
InferenceEngine::InferenceEngine(const std::string &model_path)
|
|
||||||
: env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"),
|
|
||||||
session_options(),
|
|
||||||
session(env, model_path.c_str(), session_options),
|
|
||||||
input_shape{1, 3, 640, 640}
|
|
||||||
{
|
{
|
||||||
session_options.SetIntraOpNumThreads(1);
|
modelPath = onnxModelPath;
|
||||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
|
modelShape = modelInputShape;
|
||||||
|
classesPath = classesTxtFile;
|
||||||
|
cudaEnabled = runWithCuda;
|
||||||
|
|
||||||
|
std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
|
||||||
|
loadOnnxNetwork();
|
||||||
|
// loadClassesFromFile(); The classes are hard-coded for this example
|
||||||
}
|
}
|
||||||
|
|
||||||
InferenceEngine::~InferenceEngine() {}
|
std::vector<Detection> Inference::runInference(const cv::Mat &input)
|
||||||
|
|
||||||
/*
|
|
||||||
* Function to preprocess the image
|
|
||||||
*
|
|
||||||
* @param image_path: path to the image
|
|
||||||
* @param orig_width: original width of the image
|
|
||||||
* @param orig_height: original height of the image
|
|
||||||
*
|
|
||||||
* @return: vector of floats representing the preprocessed image
|
|
||||||
*/
|
|
||||||
std::vector<float> InferenceEngine::preprocessImage(const cv::Mat &image)
|
|
||||||
{
|
{
|
||||||
if (image.empty())
|
cv::Mat modelInput = input;
|
||||||
|
if (letterBoxForSquare && modelShape.width == modelShape.height)
|
||||||
|
modelInput = formatToSquare(modelInput);
|
||||||
|
|
||||||
|
cv::Mat blob;
|
||||||
|
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
|
||||||
|
net.setInput(blob);
|
||||||
|
|
||||||
|
std::vector<cv::Mat> outputs;
|
||||||
|
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
||||||
|
|
||||||
|
int rows = outputs[0].size[1];
|
||||||
|
int dimensions = outputs[0].size[2];
|
||||||
|
|
||||||
|
bool yolov8 = false;
|
||||||
|
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
||||||
|
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
||||||
|
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("Could not read the image");
|
yolov8 = true;
|
||||||
|
rows = outputs[0].size[2];
|
||||||
|
dimensions = outputs[0].size[1];
|
||||||
|
|
||||||
|
outputs[0] = outputs[0].reshape(1, dimensions);
|
||||||
|
cv::transpose(outputs[0], outputs[0]);
|
||||||
}
|
}
|
||||||
|
float *data = (float *)outputs[0].data;
|
||||||
|
|
||||||
cv::Mat resized_image;
|
float x_factor = modelInput.cols / modelShape.width;
|
||||||
cv::resize(image, resized_image, cv::Size(input_shape[2], input_shape[3]));
|
float y_factor = modelInput.rows / modelShape.height;
|
||||||
|
|
||||||
resized_image.convertTo(resized_image, CV_32F, 1.0 / 255);
|
std::vector<int> class_ids;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
|
||||||
std::vector<cv::Mat> channels(3);
|
for (int i = 0; i < rows; ++i)
|
||||||
cv::split(resized_image, channels);
|
|
||||||
|
|
||||||
std::vector<float> input_tensor_values;
|
|
||||||
for (int c = 0; c < 3; ++c)
|
|
||||||
{
|
{
|
||||||
input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]);
|
if (yolov8)
|
||||||
}
|
|
||||||
|
|
||||||
return input_tensor_values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Function to filter the detections based on the confidence threshold
|
|
||||||
*
|
|
||||||
* @param results: vector of floats representing the output tensor
|
|
||||||
* @param confidence_threshold: minimum confidence threshold
|
|
||||||
* @param img_width: width of the input image
|
|
||||||
* @param img_height: height of the input image
|
|
||||||
* @param orig_width: original width of the image
|
|
||||||
* @param orig_height: original height of the image
|
|
||||||
*
|
|
||||||
* @return: vector of Detection objects
|
|
||||||
|
|
||||||
*/
|
|
||||||
std::vector<Detection> InferenceEngine::filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height)
|
|
||||||
{
|
|
||||||
std::vector<Detection> detections;
|
|
||||||
const int num_detections = results.size() / 6;
|
|
||||||
|
|
||||||
for (int i = 0; i < num_detections; ++i)
|
|
||||||
{
|
|
||||||
float left = results[i * 6 + 0];
|
|
||||||
float top = results[i * 6 + 1];
|
|
||||||
float right = results[i * 6 + 2];
|
|
||||||
float bottom = results[i * 6 + 3];
|
|
||||||
float confidence = results[i * 6 + 4];
|
|
||||||
int class_id = results[i * 6 + 5];
|
|
||||||
|
|
||||||
if (confidence >= confidence_threshold)
|
|
||||||
{
|
{
|
||||||
int x = static_cast<int>(left * orig_width / img_width);
|
float *classes_scores = data+4;
|
||||||
int y = static_cast<int>(top * orig_height / img_height);
|
|
||||||
int width = static_cast<int>((right - left) * orig_width / img_width);
|
|
||||||
int height = static_cast<int>((bottom - top) * orig_height / img_height);
|
|
||||||
|
|
||||||
detections.push_back(
|
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||||
{confidence,
|
cv::Point class_id;
|
||||||
cv::Rect(x, y, width, height),
|
double maxClassScore;
|
||||||
class_id,
|
|
||||||
CLASS_NAMES[class_id]});
|
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||||
|
|
||||||
|
if (maxClassScore > modelScoreThreshold)
|
||||||
|
{
|
||||||
|
confidences.push_back(maxClassScore);
|
||||||
|
class_ids.push_back(class_id.x);
|
||||||
|
|
||||||
|
float x = data[0];
|
||||||
|
float y = data[1];
|
||||||
|
float w = data[2];
|
||||||
|
float h = data[3];
|
||||||
|
|
||||||
|
int left = int((x - 0.5 * w) * x_factor);
|
||||||
|
int top = int((y - 0.5 * h) * y_factor);
|
||||||
|
|
||||||
|
int width = int(w * x_factor);
|
||||||
|
int height = int(h * y_factor);
|
||||||
|
|
||||||
|
boxes.push_back(cv::Rect(left, top, width, height));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else // yolov5
|
||||||
|
{
|
||||||
|
float confidence = data[4];
|
||||||
|
|
||||||
|
if (confidence >= modelConfidenceThreshold)
|
||||||
|
{
|
||||||
|
float *classes_scores = data+5;
|
||||||
|
|
||||||
|
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||||
|
cv::Point class_id;
|
||||||
|
double max_class_score;
|
||||||
|
|
||||||
|
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||||||
|
|
||||||
|
if (max_class_score > modelScoreThreshold)
|
||||||
|
{
|
||||||
|
confidences.push_back(confidence);
|
||||||
|
class_ids.push_back(class_id.x);
|
||||||
|
|
||||||
|
float x = data[0];
|
||||||
|
float y = data[1];
|
||||||
|
float w = data[2];
|
||||||
|
float h = data[3];
|
||||||
|
|
||||||
|
int left = int((x - 0.5 * w) * x_factor);
|
||||||
|
int top = int((y - 0.5 * h) * y_factor);
|
||||||
|
|
||||||
|
int width = int(w * x_factor);
|
||||||
|
int height = int(h * y_factor);
|
||||||
|
|
||||||
|
boxes.push_back(cv::Rect(left, top, width, height));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data += dimensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> nms_result;
|
||||||
|
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
|
||||||
|
|
||||||
|
std::vector<Detection> detections{};
|
||||||
|
for (unsigned long i = 0; i < nms_result.size(); ++i)
|
||||||
|
{
|
||||||
|
int idx = nms_result[i];
|
||||||
|
|
||||||
|
Detection result;
|
||||||
|
result.class_id = class_ids[idx];
|
||||||
|
result.confidence = confidences[idx];
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_int_distribution<int> dis(100, 255);
|
||||||
|
result.color = cv::Scalar(dis(gen),
|
||||||
|
dis(gen),
|
||||||
|
dis(gen));
|
||||||
|
|
||||||
|
result.className = classes[result.class_id];
|
||||||
|
result.box = boxes[idx];
|
||||||
|
|
||||||
|
detections.push_back(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
return detections;
|
return detections;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Inference::loadClassesFromFile()
|
||||||
/*
|
|
||||||
* Function to run inference
|
|
||||||
*
|
|
||||||
* @param input_tensor_values: vector of floats representing the input tensor
|
|
||||||
*
|
|
||||||
* @return: vector of floats representing the output tensor
|
|
||||||
*/
|
|
||||||
std::vector<float> InferenceEngine::runInference(const std::vector<float> &input_tensor_values)
|
|
||||||
{
|
{
|
||||||
Ort::AllocatorWithDefaultOptions allocator;
|
std::ifstream inputFile(classesPath);
|
||||||
|
if (inputFile.is_open())
|
||||||
std::string input_name = getInputName();
|
{
|
||||||
std::string output_name = getOutputName();
|
std::string classLine;
|
||||||
|
while (std::getline(inputFile, classLine))
|
||||||
const char *input_name_ptr = input_name.c_str();
|
classes.push_back(classLine);
|
||||||
const char *output_name_ptr = output_name.c_str();
|
inputFile.close();
|
||||||
|
}
|
||||||
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
||||||
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float *>(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size());
|
|
||||||
|
|
||||||
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1);
|
|
||||||
|
|
||||||
float *floatarr = output_tensors[0].GetTensorMutableData<float>();
|
|
||||||
size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
|
|
||||||
|
|
||||||
return std::vector<float>(floatarr, floatarr + output_tensor_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
void Inference::loadOnnxNetwork()
|
||||||
* Function to draw the labels on the image
|
{
|
||||||
*
|
printf("loadOnnxNetwork() starts\n");
|
||||||
* @param image: input image
|
|
||||||
* @param detections: vector of Detection objects
|
|
||||||
*
|
|
||||||
* @return: image with labels drawn
|
|
||||||
|
|
||||||
*/
|
net = cv::dnn::readNetFromONNX(modelPath);
|
||||||
cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Detection> &detections)
|
|
||||||
|
if (cudaEnabled)
|
||||||
|
{
|
||||||
|
std::cout << "\nRunning on CUDA" << std::endl;
|
||||||
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||||
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "\nRunning on CPU" << std::endl;
|
||||||
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||||
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat Inference::formatToSquare(const cv::Mat &source)
|
||||||
|
{
|
||||||
|
int col = source.cols;
|
||||||
|
int row = source.rows;
|
||||||
|
int _max = MAX(col, row);
|
||||||
|
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
||||||
|
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
|
||||||
{
|
{
|
||||||
cv::Mat result = image.clone();
|
cv::Mat result = image.clone();
|
||||||
|
|
||||||
for (const auto &detection : detections)
|
for (const auto &detection : detections)
|
||||||
{
|
{
|
||||||
cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2);
|
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
|
||||||
std::string label = detection.class_name + ": " + std::to_string(detection.confidence);
|
std::string label = detection.className + ": " + std::to_string(detection.confidence);
|
||||||
|
|
||||||
int baseLine;
|
int baseLine;
|
||||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||||
|
|
||||||
cv::rectangle(
|
cv::rectangle(
|
||||||
result,
|
result,
|
||||||
cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height),
|
cv::Point(detection.box.x, detection.box.y - labelSize.height),
|
||||||
cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine),
|
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
|
||||||
cv::Scalar(255, 255, 255),
|
cv::Scalar(255, 255, 255),
|
||||||
cv::FILLED);
|
cv::FILLED);
|
||||||
|
|
||||||
@@ -164,8 +211,8 @@ cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Det
|
|||||||
result,
|
result,
|
||||||
label,
|
label,
|
||||||
cv::Point(
|
cv::Point(
|
||||||
detection.bbox.x,
|
detection.box.x,
|
||||||
detection.bbox.y),
|
detection.box.y),
|
||||||
cv::FONT_HERSHEY_SIMPLEX,
|
cv::FONT_HERSHEY_SIMPLEX,
|
||||||
0.5,
|
0.5,
|
||||||
cv::Scalar(0, 0, 0),
|
cv::Scalar(0, 0, 0),
|
||||||
@@ -174,27 +221,3 @@ cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Det
|
|||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Function to get the input name
|
|
||||||
*
|
|
||||||
* @return: name of the input tensor
|
|
||||||
*/
|
|
||||||
std::string InferenceEngine::getInputName()
|
|
||||||
{
|
|
||||||
Ort::AllocatorWithDefaultOptions allocator;
|
|
||||||
Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator);
|
|
||||||
return std::string(name_allocator.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Function to get the output name
|
|
||||||
*
|
|
||||||
* @return: name of the output tensor
|
|
||||||
*/
|
|
||||||
std::string InferenceEngine::getOutputName()
|
|
||||||
{
|
|
||||||
Ort::AllocatorWithDefaultOptions allocator;
|
|
||||||
Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator);
|
|
||||||
return std::string(name_allocator.get());
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,44 +1,51 @@
|
|||||||
#ifndef INFERENCE_H
|
#pragma once
|
||||||
#define INFERENCE_H
|
|
||||||
|
|
||||||
#include <onnxruntime_cxx_api.h>
|
// Cpp native
|
||||||
#include <opencv2/opencv.hpp>
|
#include <fstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
// OpenCV / DNN / Inference
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <opencv2/dnn.hpp>
|
||||||
|
|
||||||
struct Detection
|
struct Detection
|
||||||
{
|
{
|
||||||
float confidence;
|
int class_id{0};
|
||||||
cv::Rect bbox;
|
std::string className{};
|
||||||
int class_id;
|
float confidence{0.0};
|
||||||
std::string class_name;
|
cv::Scalar color{};
|
||||||
|
cv::Rect box{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Inference
|
||||||
class InferenceEngine
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
InferenceEngine(const std::string &model_path);
|
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
|
||||||
~InferenceEngine();
|
std::vector<Detection> runInference(const cv::Mat &input);
|
||||||
|
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
|
||||||
std::vector<float> preprocessImage(const cv::Mat &image);
|
|
||||||
std::vector<Detection> filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height);
|
|
||||||
std::vector<float> runInference(const std::vector<float> &input_tensor_values);
|
|
||||||
|
|
||||||
cv::Mat draw_labels(const cv::Mat &image, const std::vector<Detection> &detections);
|
|
||||||
|
|
||||||
std::vector<int64_t> input_shape;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Ort::Env env;
|
void loadClassesFromFile();
|
||||||
Ort::SessionOptions session_options;
|
void loadOnnxNetwork();
|
||||||
Ort::Session session;
|
|
||||||
|
|
||||||
std::string getInputName();
|
cv::Mat formatToSquare(const cv::Mat &source);
|
||||||
std::string getOutputName();
|
|
||||||
|
|
||||||
static const std::vector<std::string> CLASS_NAMES;
|
std::string modelPath{};
|
||||||
|
std::string classesPath{};
|
||||||
|
bool cudaEnabled{};
|
||||||
|
|
||||||
|
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
|
||||||
|
|
||||||
|
cv::Size2f modelShape{};
|
||||||
|
|
||||||
|
float modelConfidenceThreshold {0.25};
|
||||||
|
float modelScoreThreshold {0.45};
|
||||||
|
float modelNMSThreshold {0.50};
|
||||||
|
|
||||||
|
bool letterBoxForSquare = false;
|
||||||
|
|
||||||
|
cv::dnn::Net net;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif // INFERENCE_H
|
|
||||||
|
|||||||
Reference in New Issue
Block a user