mirror of
https://github.com/azaion/autopilot.git
synced 2026-04-22 09:06:33 +00:00
Fully working YOLOv8 detection with ONNX runtime.
This commit is contained in:
@@ -3,10 +3,12 @@
|
|||||||
#include "aiengine.h"
|
#include "aiengine.h"
|
||||||
#include "aiengineinference.h"
|
#include "aiengineinference.h"
|
||||||
|
|
||||||
#ifdef OPI5_BUILD
|
#if defined(OPI5_BUILD)
|
||||||
#include "src-opi5/aiengineinferenceopi5.h"
|
#include "src-opi5/aiengineinferenceopi5.h"
|
||||||
#else
|
#elif defined(OPENCV_BUILD)
|
||||||
#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
|
#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
|
||||||
|
#else
|
||||||
|
#include "src-onnx-runtime/aiengineinferenceonnxruntime.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
AiEngine::AiEngine(QString modelPath, QObject *parent)
|
AiEngine::AiEngine(QString modelPath, QObject *parent)
|
||||||
@@ -15,10 +17,12 @@ AiEngine::AiEngine(QString modelPath, QObject *parent)
|
|||||||
mRtspListener = new AiEngineRtspListener(this);
|
mRtspListener = new AiEngineRtspListener(this);
|
||||||
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
|
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
|
||||||
|
|
||||||
#ifdef OPI5_BUILD
|
#if defined(OPI5_BUILD)
|
||||||
mInference = new AiEngineInferenceOpi5(modelPath);
|
mInference = new AiEngineInferenceOpi5(modelPath);
|
||||||
#else
|
#elif defined(OPENCV_BUILD)
|
||||||
mInference = new AiEngineInferenceOpencvOnnx(modelPath);
|
mInference = new AiEngineInferenceOpencvOnnx(modelPath);
|
||||||
|
#else
|
||||||
|
mInference = new AiEngineInferencevOnnxRuntime(modelPath);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
QThread *inferenceThread = new QThread(this);
|
QThread *inferenceThread = new QThread(this);
|
||||||
|
|||||||
@@ -12,3 +12,39 @@ bool AiEngineInference::isActive(void)
|
|||||||
{
|
{
|
||||||
return mActive;
|
return mActive;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src)
|
||||||
|
{
|
||||||
|
// Calculate the aspect ratio
|
||||||
|
float aspectRatio = static_cast<float>(src.cols) / src.rows;
|
||||||
|
|
||||||
|
// Determine new size while maintaining aspect ratio
|
||||||
|
int newWidth = src.cols;
|
||||||
|
int newHeight = src.rows;
|
||||||
|
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
|
||||||
|
if (aspectRatio > 1)
|
||||||
|
{
|
||||||
|
// Width is greater than height
|
||||||
|
newWidth = INFERENCE_SQUARE_WIDTH;
|
||||||
|
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Height is greater than or equal to width
|
||||||
|
newHeight = INFERENCE_SQUARE_HEIGHT;
|
||||||
|
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resize the original image if needed
|
||||||
|
cv::Mat resized;
|
||||||
|
cv::resize(src, resized, cv::Size(newWidth, newHeight));
|
||||||
|
|
||||||
|
// Create a new 640x640 image with a black background
|
||||||
|
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
|
||||||
|
|
||||||
|
// Copy the resized image to the top-left corner of the new image
|
||||||
|
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,11 @@
|
|||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QVector>
|
#include <QVector>
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
const int INFERENCE_SQUARE_WIDTH = 640;
|
||||||
|
const int INFERENCE_SQUARE_HEIGHT = 640;
|
||||||
|
|
||||||
|
|
||||||
class AiEngineRectangle {
|
class AiEngineRectangle {
|
||||||
@@ -38,6 +43,7 @@ public:
|
|||||||
bool isActive(void);
|
bool isActive(void);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
cv::Mat resizeAndPad(const cv::Mat& src);
|
||||||
QString mModelPath;
|
QString mModelPath;
|
||||||
bool mActive;
|
bool mActive;
|
||||||
|
|
||||||
|
|||||||
@@ -17,14 +17,23 @@ opi5 {
|
|||||||
LIBS += /usr/local/lib/librknnrt.so
|
LIBS += /usr/local/lib/librknnrt.so
|
||||||
SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
|
SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
|
||||||
HEADERS += $$PWD/src-opi5/*.h
|
HEADERS += $$PWD/src-opi5/*.h
|
||||||
} else {
|
} else:opencv {
|
||||||
|
message("OpenCV build")
|
||||||
|
message("You must use YOLOv8 ONNX files. Azaion model does not work!")
|
||||||
|
message("OpenCV must be version 4.10.0 installed to /usr/local/")
|
||||||
|
QMAKE_CXXFLAGS += -DOPENCV_BUILD
|
||||||
|
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
|
||||||
|
SOURCES += $$PWD/src-opencv-onnx/*.cpp
|
||||||
|
HEADERS += $$PWD/src-opencv-onnx/*.h
|
||||||
|
}
|
||||||
|
else {
|
||||||
message("ONNX build")
|
message("ONNX build")
|
||||||
message("You must use YOLOv8 ONNX files")
|
message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.")
|
||||||
QMAKE_CXXFLAGS += -DONNX_BUILD
|
QMAKE_CXXFLAGS += -DONNX_BUILD
|
||||||
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
|
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
|
||||||
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
|
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
|
||||||
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
|
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
|
||||||
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
|
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
|
||||||
SOURCES += $$PWD/src-opencv-onnx/*.cpp
|
SOURCES += $$PWD/src-onnx-runtime/*.cpp
|
||||||
HEADERS += $$PWD/src-opencv-onnx/*.h
|
HEADERS += $$PWD/src-onnx-runtime/*.h
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
#include <QDebug>
|
||||||
|
#include <QThread>
|
||||||
|
#include <vector>
|
||||||
|
#include "aiengineinferenceonnxruntime.h"
|
||||||
|
|
||||||
|
|
||||||
|
static const float confThreshold = 0.4f;
|
||||||
|
static const float iouThreshold = 0.4f;
|
||||||
|
static const float maskThreshold = 0.5f;
|
||||||
|
|
||||||
|
|
||||||
|
AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) :
|
||||||
|
AiEngineInference{modelPath, parent},
|
||||||
|
mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold)
|
||||||
|
{
|
||||||
|
qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath;
|
||||||
|
|
||||||
|
mClassNames = {
|
||||||
|
"Armoured vehicle",
|
||||||
|
"Truck",
|
||||||
|
"Vehicle",
|
||||||
|
"Artillery",
|
||||||
|
"Shadow artillery",
|
||||||
|
"Trenches",
|
||||||
|
"Military man",
|
||||||
|
"Tyre tracks",
|
||||||
|
"Additional protection tank",
|
||||||
|
"Smoke"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections)
|
||||||
|
{
|
||||||
|
cv::Mat result = image.clone();
|
||||||
|
|
||||||
|
for (const auto &detection : detections)
|
||||||
|
{
|
||||||
|
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
|
||||||
|
std::string label = mClassNames[detection.classId] + ": " + std::to_string(detection.conf);
|
||||||
|
|
||||||
|
int baseLine;
|
||||||
|
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||||
|
|
||||||
|
cv::rectangle(
|
||||||
|
result,
|
||||||
|
cv::Point(detection.box.x, detection.box.y - labelSize.height),
|
||||||
|
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
|
||||||
|
cv::Scalar(255, 255, 255),
|
||||||
|
cv::FILLED);
|
||||||
|
|
||||||
|
cv::putText(
|
||||||
|
result,
|
||||||
|
label,
|
||||||
|
cv::Point(
|
||||||
|
detection.box.x,
|
||||||
|
detection.box.y),
|
||||||
|
cv::FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
cv::Scalar(0, 0, 0),
|
||||||
|
1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame)
|
||||||
|
{
|
||||||
|
mActive = true;
|
||||||
|
|
||||||
|
cv::Mat scaledImage = resizeAndPad(frame);
|
||||||
|
std::vector<Yolov8Result> detections = mPredictor.predict(scaledImage);
|
||||||
|
AiEngineInferenceResult result;
|
||||||
|
|
||||||
|
for (uint i = 0; i < detections.size(); i++) {
|
||||||
|
const Yolov8Result &detection = detections[i];
|
||||||
|
|
||||||
|
// Add detected objects to the results
|
||||||
|
AiEngineObject object;
|
||||||
|
object.classId = detection.classId;
|
||||||
|
object.propability = detection.conf;
|
||||||
|
object.rectangle.top = detection.box.y;
|
||||||
|
object.rectangle.left = detection.box.x;
|
||||||
|
object.rectangle.bottom = detection.box.y + detection.box.height;
|
||||||
|
object.rectangle.right = detection.box.x + detection.box.width;
|
||||||
|
result.objects.append(object);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.objects.empty() == false) {
|
||||||
|
qDebug() << __PRETTY_FUNCTION__ << "detections:" << detections.size();
|
||||||
|
result.frame = drawLabels(scaledImage, detections);
|
||||||
|
emit resultsReady(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
mActive = false;
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
#include "aiengineinference.h"
|
||||||
|
#include "yolov8Predictor.h"
|
||||||
|
|
||||||
|
class AiEngineInferencevOnnxRuntime : public AiEngineInference
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr);
|
||||||
|
|
||||||
|
public slots:
|
||||||
|
void performInferenceSlot(cv::Mat frame) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections);
|
||||||
|
YOLOPredictor mPredictor;
|
||||||
|
std::vector<std::string> mClassNames;
|
||||||
|
};
|
||||||
@@ -0,0 +1,167 @@
|
|||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
size_t utils::vectorProduct(const std::vector<int64_t> &vector)
|
||||||
|
{
|
||||||
|
if (vector.empty())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size_t product = 1;
|
||||||
|
for (const auto &element : vector)
|
||||||
|
product *= element;
|
||||||
|
|
||||||
|
return product;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring utils::charToWstring(const char *str)
|
||||||
|
{
|
||||||
|
typedef std::codecvt_utf8<wchar_t> convert_type;
|
||||||
|
std::wstring_convert<convert_type, wchar_t> converter;
|
||||||
|
|
||||||
|
return converter.from_bytes(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> utils::loadNames(const std::string &path)
|
||||||
|
{
|
||||||
|
// load class names
|
||||||
|
std::vector<std::string> classNames;
|
||||||
|
std::ifstream infile(path);
|
||||||
|
if (infile.good())
|
||||||
|
{
|
||||||
|
std::string line;
|
||||||
|
while (getline(infile, line))
|
||||||
|
{
|
||||||
|
if (line.back() == '\r')
|
||||||
|
line.pop_back();
|
||||||
|
classNames.emplace_back(line);
|
||||||
|
}
|
||||||
|
infile.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cerr << "ERROR: Failed to access class name path: " << path << std::endl;
|
||||||
|
}
|
||||||
|
// set color
|
||||||
|
srand(time(0));
|
||||||
|
|
||||||
|
for (int i = 0; i < (int)(2 * classNames.size()); i++)
|
||||||
|
{
|
||||||
|
int b = rand() % 256;
|
||||||
|
int g = rand() % 256;
|
||||||
|
int r = rand() % 256;
|
||||||
|
colors.push_back(cv::Scalar(b, g, r));
|
||||||
|
}
|
||||||
|
return classNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
void utils::visualizeDetection(cv::Mat &im, std::vector<Yolov8Result> &results,
|
||||||
|
const std::vector<std::string> &classNames)
|
||||||
|
{
|
||||||
|
cv::Mat image = im.clone();
|
||||||
|
for (const Yolov8Result &result : results)
|
||||||
|
{
|
||||||
|
|
||||||
|
int x = result.box.x;
|
||||||
|
int y = result.box.y;
|
||||||
|
|
||||||
|
int conf = (int)std::round(result.conf * 100);
|
||||||
|
int classId = result.classId;
|
||||||
|
std::string label = classNames[classId] + " 0." + std::to_string(conf);
|
||||||
|
|
||||||
|
int baseline = 0;
|
||||||
|
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline);
|
||||||
|
image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
|
||||||
|
cv::rectangle(image, result.box, colors[classId], 2);
|
||||||
|
cv::rectangle(image,
|
||||||
|
cv::Point(x, y), cv::Point(x + size.width, y + 12),
|
||||||
|
colors[classId], -1);
|
||||||
|
cv::putText(image, label,
|
||||||
|
cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
|
||||||
|
0.4, cv::Scalar(0, 0, 0), 1);
|
||||||
|
}
|
||||||
|
cv::addWeighted(im, 0.4, image, 0.6, 0, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
void utils::letterbox(const cv::Mat &image, cv::Mat &outImage,
|
||||||
|
const cv::Size &newShape = cv::Size(640, 640),
|
||||||
|
const cv::Scalar &color = cv::Scalar(114, 114, 114),
|
||||||
|
bool auto_ = true,
|
||||||
|
bool scaleFill = false,
|
||||||
|
bool scaleUp = true,
|
||||||
|
int stride = 32)
|
||||||
|
{
|
||||||
|
cv::Size shape = image.size();
|
||||||
|
float r = std::min((float)newShape.height / (float)shape.height,
|
||||||
|
(float)newShape.width / (float)shape.width);
|
||||||
|
if (!scaleUp)
|
||||||
|
r = std::min(r, 1.0f);
|
||||||
|
|
||||||
|
float ratio[2]{r, r};
|
||||||
|
|
||||||
|
int newUnpad[2]{(int)std::round((float)shape.width * r),
|
||||||
|
(int)std::round((float)shape.height * r)};
|
||||||
|
|
||||||
|
auto dw = (float)(newShape.width - newUnpad[0]);
|
||||||
|
auto dh = (float)(newShape.height - newUnpad[1]);
|
||||||
|
|
||||||
|
if (auto_)
|
||||||
|
{
|
||||||
|
dw = (float)((int)dw % stride);
|
||||||
|
dh = (float)((int)dh % stride);
|
||||||
|
}
|
||||||
|
else if (scaleFill)
|
||||||
|
{
|
||||||
|
dw = 0.0f;
|
||||||
|
dh = 0.0f;
|
||||||
|
newUnpad[0] = newShape.width;
|
||||||
|
newUnpad[1] = newShape.height;
|
||||||
|
ratio[0] = (float)newShape.width / (float)shape.width;
|
||||||
|
ratio[1] = (float)newShape.height / (float)shape.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
dw /= 2.0f;
|
||||||
|
dh /= 2.0f;
|
||||||
|
|
||||||
|
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
|
||||||
|
{
|
||||||
|
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
int top = int(std::round(dh - 0.1f));
|
||||||
|
int bottom = int(std::round(dh + 0.1f));
|
||||||
|
int left = int(std::round(dw - 0.1f));
|
||||||
|
int right = int(std::round(dw + 0.1f));
|
||||||
|
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
||||||
|
}
|
||||||
|
|
||||||
|
void utils::scaleCoords(cv::Rect &coords,
|
||||||
|
cv::Mat &mask,
|
||||||
|
const float maskThreshold,
|
||||||
|
const cv::Size &imageShape,
|
||||||
|
const cv::Size &imageOriginalShape)
|
||||||
|
{
|
||||||
|
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
|
||||||
|
(float)imageShape.width / (float)imageOriginalShape.width);
|
||||||
|
|
||||||
|
int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
|
||||||
|
(int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
|
||||||
|
|
||||||
|
coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
|
||||||
|
coords.x = std::max(0, coords.x);
|
||||||
|
coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
|
||||||
|
coords.y = std::max(0, coords.y);
|
||||||
|
|
||||||
|
coords.width = (int)std::round(((float)coords.width / gain));
|
||||||
|
coords.width = std::min(coords.width, imageOriginalShape.width - coords.x);
|
||||||
|
coords.height = (int)std::round(((float)coords.height / gain));
|
||||||
|
coords.height = std::min(coords.height, imageOriginalShape.height - coords.y);
|
||||||
|
mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1]));
|
||||||
|
|
||||||
|
cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR);
|
||||||
|
|
||||||
|
mask = mask(coords) > maskThreshold;
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
T utils::clip(const T &n, const T &lower, const T &upper)
|
||||||
|
{
|
||||||
|
return std::max(lower, std::min(n, upper));
|
||||||
|
}
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <codecvt>
|
||||||
|
#include <fstream>
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
|
||||||
|
struct Yolov8Result
|
||||||
|
{
|
||||||
|
cv::Rect box;
|
||||||
|
cv::Mat boxMask; // mask in box
|
||||||
|
float conf{};
|
||||||
|
int classId{};
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace utils
|
||||||
|
{
|
||||||
|
static std::vector<cv::Scalar> colors;
|
||||||
|
|
||||||
|
size_t vectorProduct(const std::vector<int64_t> &vector);
|
||||||
|
std::wstring charToWstring(const char *str);
|
||||||
|
std::vector<std::string> loadNames(const std::string &path);
|
||||||
|
void visualizeDetection(cv::Mat &image, std::vector<Yolov8Result> &results,
|
||||||
|
const std::vector<std::string> &classNames);
|
||||||
|
|
||||||
|
void letterbox(const cv::Mat &image, cv::Mat &outImage,
|
||||||
|
const cv::Size &newShape,
|
||||||
|
const cv::Scalar &color,
|
||||||
|
bool auto_,
|
||||||
|
bool scaleFill,
|
||||||
|
bool scaleUp,
|
||||||
|
int stride);
|
||||||
|
|
||||||
|
void scaleCoords(cv::Rect &coords, cv::Mat &mask,
|
||||||
|
const float maskThreshold,
|
||||||
|
const cv::Size &imageShape, const cv::Size &imageOriginalShape);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T clip(const T &n, const T &lower, const T &upper);
|
||||||
|
}
|
||||||
@@ -0,0 +1,243 @@
|
|||||||
|
#include "yolov8Predictor.h"
|
||||||
|
|
||||||
|
YOLOPredictor::YOLOPredictor(const std::string &modelPath,
|
||||||
|
float confThreshold,
|
||||||
|
float iouThreshold,
|
||||||
|
float maskThreshold)
|
||||||
|
{
|
||||||
|
this->confThreshold = confThreshold;
|
||||||
|
this->iouThreshold = iouThreshold;
|
||||||
|
this->maskThreshold = maskThreshold;
|
||||||
|
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
|
||||||
|
sessionOptions = Ort::SessionOptions();
|
||||||
|
|
||||||
|
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
||||||
|
std::cout << "Inference device: CPU" << std::endl;
|
||||||
|
|
||||||
|
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
|
||||||
|
|
||||||
|
const size_t num_input_nodes = session.GetInputCount(); //==1
|
||||||
|
const size_t num_output_nodes = session.GetOutputCount(); //==1,2
|
||||||
|
if (num_output_nodes > 1)
|
||||||
|
{
|
||||||
|
this->hasMask = true;
|
||||||
|
std::cout << "Instance Segmentation" << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
std::cout << "Object Detection" << std::endl;
|
||||||
|
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator;
|
||||||
|
for (int i = 0; i < (int)num_input_nodes; i++)
|
||||||
|
{
|
||||||
|
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||||
|
this->inputNames.push_back(input_name.get());
|
||||||
|
input_names_ptr.push_back(std::move(input_name));
|
||||||
|
|
||||||
|
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
|
||||||
|
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
this->inputShapes.push_back(inputTensorShape);
|
||||||
|
this->isDynamicInputShape = true;
|
||||||
|
// checking if width and height are dynamic
|
||||||
|
if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
|
||||||
|
{
|
||||||
|
std::cout << "Dynamic input shape" << std::endl;
|
||||||
|
this->isDynamicInputShape = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < (int)num_output_nodes; i++)
|
||||||
|
{
|
||||||
|
auto output_name = session.GetOutputNameAllocated(i, allocator);
|
||||||
|
this->outputNames.push_back(output_name.get());
|
||||||
|
output_names_ptr.push_back(std::move(output_name));
|
||||||
|
|
||||||
|
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
|
||||||
|
std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
this->outputShapes.push_back(outputTensorShape);
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
if (!this->hasMask)
|
||||||
|
classNums = outputTensorShape[1] - 4;
|
||||||
|
else
|
||||||
|
classNums = outputTensorShape[1] - 4 - 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// for (const char *x : this->inputNames)
|
||||||
|
// {
|
||||||
|
// std::cout << x << std::endl;
|
||||||
|
// }
|
||||||
|
// for (const char *x : this->outputNames)
|
||||||
|
// {
|
||||||
|
// std::cout << x << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << classNums << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
|
||||||
|
float &bestConf,
|
||||||
|
int &bestClassId,
|
||||||
|
const int _classNums)
|
||||||
|
{
|
||||||
|
// first 4 element are box
|
||||||
|
bestClassId = 4;
|
||||||
|
bestConf = 0;
|
||||||
|
|
||||||
|
for (int i = 4; i < _classNums + 4; i++)
|
||||||
|
{
|
||||||
|
if (it[i] > bestConf)
|
||||||
|
{
|
||||||
|
bestConf = it[i];
|
||||||
|
bestClassId = i - 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
|
||||||
|
const cv::Mat &maskProtos)
|
||||||
|
{
|
||||||
|
cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
|
||||||
|
|
||||||
|
cv::Mat matmul_res = (maskProposals * protos).t();
|
||||||
|
cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
|
||||||
|
cv::Mat dest;
|
||||||
|
|
||||||
|
// sigmoid
|
||||||
|
cv::exp(-masks, dest);
|
||||||
|
dest = 1.0 / (1.0 + dest);
|
||||||
|
cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape)
|
||||||
|
{
|
||||||
|
cv::Mat resizedImage, floatImage;
|
||||||
|
cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB);
|
||||||
|
utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
|
||||||
|
cv::Scalar(114, 114, 114), this->isDynamicInputShape,
|
||||||
|
false, true, 32);
|
||||||
|
|
||||||
|
inputTensorShape[2] = resizedImage.rows;
|
||||||
|
inputTensorShape[3] = resizedImage.cols;
|
||||||
|
|
||||||
|
resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
|
||||||
|
blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
|
||||||
|
cv::Size floatImageSize{floatImage.cols, floatImage.rows};
|
||||||
|
|
||||||
|
// hwc -> chw
|
||||||
|
std::vector<cv::Mat> chw(floatImage.channels());
|
||||||
|
for (int i = 0; i < floatImage.channels(); ++i)
|
||||||
|
{
|
||||||
|
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
|
||||||
|
}
|
||||||
|
cv::split(floatImage, chw);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Yolov8Result> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
|
||||||
|
const cv::Size &originalImageShape,
|
||||||
|
std::vector<Ort::Value> &outputTensors)
|
||||||
|
{
|
||||||
|
|
||||||
|
// for box
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
std::vector<float> confs;
|
||||||
|
std::vector<int> classIds;
|
||||||
|
|
||||||
|
float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||||
|
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
|
||||||
|
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
|
||||||
|
float *output0ptr = (float *)output0.data;
|
||||||
|
int rows = (int)this->outputShapes[0][2];
|
||||||
|
int cols = (int)this->outputShapes[0][1];
|
||||||
|
// std::cout << rows << cols << std::endl;
|
||||||
|
// if hasMask
|
||||||
|
std::vector<std::vector<float>> picked_proposals;
|
||||||
|
cv::Mat mask_protos;
|
||||||
|
|
||||||
|
for (int i = 0; i < rows; i++)
|
||||||
|
{
|
||||||
|
std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
|
||||||
|
float confidence;
|
||||||
|
int classId;
|
||||||
|
this->getBestClassInfo(it.begin(), confidence, classId, classNums);
|
||||||
|
|
||||||
|
if (confidence > this->confThreshold)
|
||||||
|
{
|
||||||
|
if (this->hasMask)
|
||||||
|
{
|
||||||
|
std::vector<float> temp(it.begin() + 4 + classNums, it.end());
|
||||||
|
picked_proposals.push_back(temp);
|
||||||
|
}
|
||||||
|
int centerX = (int)(it[0]);
|
||||||
|
int centerY = (int)(it[1]);
|
||||||
|
int width = (int)(it[2]);
|
||||||
|
int height = (int)(it[3]);
|
||||||
|
int left = centerX - width / 2;
|
||||||
|
int top = centerY - height / 2;
|
||||||
|
boxes.emplace_back(left, top, width, height);
|
||||||
|
confs.emplace_back(confidence);
|
||||||
|
classIds.emplace_back(classId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> indices;
|
||||||
|
cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices);
|
||||||
|
|
||||||
|
if (this->hasMask)
|
||||||
|
{
|
||||||
|
float *maskOutput = outputTensors[1].GetTensorMutableData<float>();
|
||||||
|
std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
|
||||||
|
mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Yolov8Result> results;
|
||||||
|
for (int idx : indices)
|
||||||
|
{
|
||||||
|
Yolov8Result res;
|
||||||
|
res.box = cv::Rect(boxes[idx]);
|
||||||
|
if (this->hasMask)
|
||||||
|
res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos);
|
||||||
|
else
|
||||||
|
res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
|
||||||
|
|
||||||
|
utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape);
|
||||||
|
res.conf = confs[idx];
|
||||||
|
res.classId = classIds[idx];
|
||||||
|
results.emplace_back(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Yolov8Result> YOLOPredictor::predict(cv::Mat &image)
|
||||||
|
{
|
||||||
|
float *blob = nullptr;
|
||||||
|
std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
|
||||||
|
this->preprocessing(image, blob, inputTensorShape);
|
||||||
|
|
||||||
|
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
|
||||||
|
|
||||||
|
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
|
||||||
|
|
||||||
|
std::vector<Ort::Value> inputTensors;
|
||||||
|
|
||||||
|
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
||||||
|
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
|
||||||
|
|
||||||
|
inputTensors.push_back(Ort::Value::CreateTensor<float>(
|
||||||
|
memoryInfo, inputTensorValues.data(), inputTensorSize,
|
||||||
|
inputTensorShape.data(), inputTensorShape.size()));
|
||||||
|
|
||||||
|
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
|
||||||
|
this->inputNames.data(),
|
||||||
|
inputTensors.data(),
|
||||||
|
1,
|
||||||
|
this->outputNames.data(),
|
||||||
|
this->outputNames.size());
|
||||||
|
|
||||||
|
cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]);
|
||||||
|
std::vector<Yolov8Result> result = this->postprocessing(resizedShape,
|
||||||
|
image.size(),
|
||||||
|
outputTensors);
|
||||||
|
|
||||||
|
delete[] blob;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <onnxruntime_cxx_api.h>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
class YOLOPredictor
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit YOLOPredictor(std::nullptr_t){};
|
||||||
|
YOLOPredictor(const std::string &modelPath,
|
||||||
|
float confThreshold,
|
||||||
|
float iouThreshold,
|
||||||
|
float maskThreshold);
|
||||||
|
// ~YOLOPredictor();
|
||||||
|
std::vector<Yolov8Result> predict(cv::Mat &image);
|
||||||
|
int classNums = 10;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Ort::Env env{nullptr};
|
||||||
|
Ort::SessionOptions sessionOptions{nullptr};
|
||||||
|
Ort::Session session{nullptr};
|
||||||
|
|
||||||
|
void preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape);
|
||||||
|
std::vector<Yolov8Result> postprocessing(const cv::Size &resizedImageShape,
|
||||||
|
const cv::Size &originalImageShape,
|
||||||
|
std::vector<Ort::Value> &outputTensors);
|
||||||
|
|
||||||
|
static void getBestClassInfo(std::vector<float>::iterator it,
|
||||||
|
float &bestConf,
|
||||||
|
int &bestClassId,
|
||||||
|
const int _classNums);
|
||||||
|
cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos);
|
||||||
|
bool isDynamicInputShape{};
|
||||||
|
|
||||||
|
std::vector<const char *> inputNames;
|
||||||
|
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
|
||||||
|
|
||||||
|
std::vector<const char *> outputNames;
|
||||||
|
std::vector<Ort::AllocatedStringPtr> output_names_ptr;
|
||||||
|
|
||||||
|
std::vector<std::vector<int64_t>> inputShapes;
|
||||||
|
std::vector<std::vector<int64_t>> outputShapes;
|
||||||
|
float confThreshold = 0.3f;
|
||||||
|
float iouThreshold = 0.4f;
|
||||||
|
|
||||||
|
bool hasMask = false;
|
||||||
|
float maskThreshold = 0.5f;
|
||||||
|
};
|
||||||
@@ -3,10 +3,6 @@
|
|||||||
#include "aiengineinferenceopencvonnx.h"
|
#include "aiengineinferenceopencvonnx.h"
|
||||||
|
|
||||||
|
|
||||||
const int INFERENCE_SQUARE_WIDTH = 640;
|
|
||||||
const int INFERENCE_SQUARE_HEIGHT = 640;
|
|
||||||
|
|
||||||
|
|
||||||
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
|
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
|
||||||
: AiEngineInference{modelPath, parent},
|
: AiEngineInference{modelPath, parent},
|
||||||
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
|
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
|
||||||
@@ -17,42 +13,6 @@ AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObj
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cv::Mat resizeAndPad(const cv::Mat& src)
|
|
||||||
{
|
|
||||||
// Calculate the aspect ratio
|
|
||||||
float aspectRatio = static_cast<float>(src.cols) / src.rows;
|
|
||||||
|
|
||||||
// Determine new size while maintaining aspect ratio
|
|
||||||
int newWidth = src.cols;
|
|
||||||
int newHeight = src.rows;
|
|
||||||
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
|
|
||||||
if (aspectRatio > 1)
|
|
||||||
{
|
|
||||||
// Width is greater than height
|
|
||||||
newWidth = INFERENCE_SQUARE_WIDTH;
|
|
||||||
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Height is greater than or equal to width
|
|
||||||
newHeight = INFERENCE_SQUARE_HEIGHT;
|
|
||||||
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resize the original image if needed
|
|
||||||
cv::Mat resized;
|
|
||||||
cv::resize(src, resized, cv::Size(newWidth, newHeight));
|
|
||||||
|
|
||||||
// Create a new 640x640 image with a black background
|
|
||||||
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
|
|
||||||
|
|
||||||
// Copy the resized image to the top-left corner of the new image
|
|
||||||
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
|
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -14,6 +14,5 @@ public slots:
|
|||||||
void performInferenceSlot(cv::Mat frame) override;
|
void performInferenceSlot(cv::Mat frame) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
//InferenceEngine *mEngine;
|
|
||||||
Inference mInference;
|
Inference mInference;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user