Chaneges to opi_rtsp test application

- refactoring
  - can use normal YOLOv8 files converted to ONNX format
  - does not work with azaion ONNX files!
This commit is contained in:
Tuomas Järvinen
2024-07-10 19:53:19 +02:00
parent 3d39d8fd99
commit 683f2d538b
6 changed files with 13 additions and 24 deletions
@@ -0,0 +1,93 @@
#include <QDebug>
#include <QThread>
#include "aiengineinferenceopencvonnx.h"
const int INFERENCE_SQUARE_WIDTH = 640;
const int INFERENCE_SQUARE_HEIGHT = 640;
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent},
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
{
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
//mEngine = new InferenceEngine(modelPath.toStdString());
//mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
}
cv::Mat resizeAndPad(const cv::Mat& src)
{
// Calculate the aspect ratio
float aspectRatio = static_cast<float>(src.cols) / src.rows;
// Determine new size while maintaining aspect ratio
int newWidth = src.cols;
int newHeight = src.rows;
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
if (aspectRatio > 1)
{
// Width is greater than height
newWidth = INFERENCE_SQUARE_WIDTH;
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
}
else {
// Height is greater than or equal to width
newHeight = INFERENCE_SQUARE_HEIGHT;
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
}
}
// Resize the original image if needed
cv::Mat resized;
cv::resize(src, resized, cv::Size(newWidth, newHeight));
// Create a new 640x640 image with a black background
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
// Copy the resized image to the top-left corner of the new image
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
return output;
}
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
{
try {
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
std::vector<Detection> detections = mInference.runInference(scaledImage);
AiEngineInferenceResult result;
//qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
for (uint i = 0; i < detections.size(); ++i) {
const Detection &detection = detections[i];
// Add detected objects to the results
AiEngineObject object;
object.classId = detection.class_id;
object.propability = detection.confidence;
object.rectangle.top = detection.box.y;
object.rectangle.left = detection.box.x;
object.rectangle.bottom = detection.box.y + detection.box.height;
object.rectangle.right = detection.box.x + detection.box.width;
result.objects.append(object);
}
if (result.objects.empty() == false) {
result.frame = mInference.drawLabels(scaledImage, detections);
emit resultsReady(result);
}
mActive = false;
}
catch (const cv::Exception& e) {
std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
}
}
@@ -0,0 +1,19 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "src-opencv-onnx/inference.h"
class AiEngineInferenceOpencvOnnx : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
//InferenceEngine *mEngine;
Inference mInference;
};
+223
View File
@@ -0,0 +1,223 @@
#include "inference.h"
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
classesPath = classesTxtFile;
cudaEnabled = runWithCuda;
std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
loadOnnxNetwork();
// loadClassesFromFile(); The classes are hard-coded for this example
}
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
cv::Mat blob;
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
bool yolov8 = false;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data+4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data+5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Detection> detections{};
for (unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id];
result.box = boxes[idx];
detections.push_back(result);
}
return detections;
}
void Inference::loadClassesFromFile()
{
std::ifstream inputFile(classesPath);
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
classes.push_back(classLine);
inputFile.close();
}
}
void Inference::loadOnnxNetwork()
{
printf("loadOnnxNetwork() starts\n");
net = cv::dnn::readNetFromONNX(modelPath);
if (cudaEnabled)
{
std::cout << "\nRunning on CUDA" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
}
else
{
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
std::string label = detection.className + ": " + std::to_string(detection.confidence);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.box.x, detection.box.y - labelSize.height),
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.box.x,
detection.box.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
+51
View File
@@ -0,0 +1,51 @@
#pragma once
// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>
// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
int class_id{0};
std::string className{};
float confidence{0.0};
cv::Scalar color{};
cv::Rect box{};
};
class Inference
{
public:
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
std::vector<Detection> runInference(const cv::Mat &input);
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
private:
void loadClassesFromFile();
void loadOnnxNetwork();
cv::Mat formatToSquare(const cv::Mat &source);
std::string modelPath{};
std::string classesPath{};
bool cudaEnabled{};
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
cv::Size2f modelShape{};
float modelConfidenceThreshold {0.25};
float modelScoreThreshold {0.45};
float modelNMSThreshold {0.50};
bool letterBoxForSquare = false;
cv::dnn::Net net;
};