Renamed opi_player as rtsp_ai_player

This commit is contained in:
Tuomas Järvinen
2024-07-14 19:52:45 +02:00
parent 813251b170
commit 8a7d681e5b
40 changed files with 43 additions and 0 deletions
-71
View File
@@ -1,71 +0,0 @@
#include <QDebug>
#include <opencv2/highgui.hpp>
#include "aiengine.h"
#include "aiengineinference.h"
#if defined(OPI5_BUILD)
#include "src-opi5/aiengineinferenceopi5.h"
#elif defined(OPENCV_BUILD)
#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
#else
#include "src-onnx-runtime/aiengineinferenceonnxruntime.h"
#endif
AiEngine::AiEngine(QString modelPath, QObject *parent)
: QObject{parent}
{
mRtspListener = new AiEngineRtspListener(this);
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
#if defined(OPI5_BUILD)
mInference = new AiEngineInferenceOpi5(modelPath);
#elif defined(OPENCV_BUILD)
mInference = new AiEngineInferenceOpencvOnnx(modelPath);
#else
mInference = new AiEngineInferencevOnnxRuntime(modelPath);
#endif
QThread *inferenceThread = new QThread(this);
mInference->moveToThread(inferenceThread);
connect(mInference, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
connect(this, &AiEngine::inferenceFrame, mInference, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
inferenceThread->start();
mGimbalControl = new AiEngineGimbalControl(this);
}
void AiEngine::start(void)
{
mRtspListener->startListening();
}
void AiEngine::stop(void)
{
mRtspListener->stopListening();
}
void AiEngine::inferenceResultsReceivedSlot(AiEngineInferenceResult result)
{
//qDebug() << "AiEngine got inference results in thread: " << QThread::currentThreadId();
#ifdef OPI5_BUILD
mGimbalControl->inferenceResultSlot(result);
#endif
cv::imshow("Received Frame", result.frame);
}
void AiEngine::frameReceivedSlot(cv::Mat frame)
{
//qDebug() << "AiEngine got frame from RTSP listener in thread: " << QThread::currentThreadId();
//cv::imshow("Received Frame", frame);
if (mInference->isActive() == false) {
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
emit inferenceFrame(frame.clone());
}
}
-29
View File
@@ -1,29 +0,0 @@
#pragma once
#include <QObject>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
#include "aienginertsplistener.h"
#include "aiengineinference.h"
#include "aienginegimbalcontrol.h"
class AiEngine : public QObject
{
Q_OBJECT
public:
explicit AiEngine(QString modelPath, QObject *parent = nullptr);
void start(void);
void stop(void);
public slots:
void frameReceivedSlot(cv::Mat frame);
void inferenceResultsReceivedSlot(AiEngineInferenceResult result);
signals:
void inferenceFrame(cv::Mat frame);
private:
AiEngineRtspListener *mRtspListener;
AiEngineInference *mInference;
AiEngineGimbalControl *mGimbalControl;
};
-73
View File
@@ -1,73 +0,0 @@
#include <QDebug>
#include <QVector>
#include "aienginegimbalcontrol.h"
AiEngineGimbalControl::AiEngineGimbalControl(QObject *parent)
: QObject{parent}
{}
AiEngineRectangle AiEngineGimbalControl::getGroupCoordinates(QVector<AiEngineObject> &objects)
{
AiEngineRectangle groupRectangle;
groupRectangle.top = 1000000;
groupRectangle.left = 1000000;
groupRectangle.bottom = 0;
groupRectangle.right = 0;
for (int i = 0; i < objects.size(); i++) {
const AiEngineRectangle &objectRectangle = objects[i].rectangle;
if (objectRectangle.top < groupRectangle.top) {
groupRectangle.top = objectRectangle.top;
}
if (objectRectangle.left < groupRectangle.left) {
groupRectangle.left = objectRectangle.left;
}
if (objectRectangle.bottom > groupRectangle.bottom) {
groupRectangle.bottom = objectRectangle.bottom;
}
if (objectRectangle.right > groupRectangle.right) {
groupRectangle.right = objectRectangle.right;
}
}
return groupRectangle;
}
void AiEngineGimbalControl::inferenceResultSlot(AiEngineInferenceResult result)
{
if (result.objects.size() == 0) {
return;
}
// We got list of all recognized objects, but at least for now we will zoom to all objects at
// once and not for each invidually. Got minimal coordinates which contains the all objects.
AiEngineRectangle groupRect = getGroupCoordinates(result.objects);
// AI did inference with 640x360 resolution. Scale back to A8's 1280x720 resolution.
groupRect.top *= 2;
groupRect.left *= 2;
groupRect.bottom *= 2;
groupRect.right *= 2;
if (groupRect.top > 720 || groupRect.bottom > 720) {
qDebug() << "ERROR! inferenceResultSlot() groupRect.top > 720 || groupRect.bottom > 720";
return;
}
if (groupRect.left > 1280 || groupRect.right > 1280) {
qDebug() << "ERROR! inferenceResultSlot() groupRect.left > 1280 || groupRect.right > 1280";
return;
}
if ((groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)) {
qDebug() << "ERROR! (groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)";
return;
}
qDebug() << "TUOMAS!! Zooming to square top=" << groupRect.top << "x" << groupRect.left << "and bottom:" << groupRect.bottom << "x" << groupRect.right;
mRemoteControl.sendData(groupRect.top, groupRect.left, groupRect.bottom, groupRect.right);
}
-20
View File
@@ -1,20 +0,0 @@
#pragma once
#include <QObject>
#include <QElapsedTimer>
#include "aiengineinference.h"
#include "remoteControl.hpp"
class AiEngineGimbalControl : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalControl(QObject *parent = nullptr);
private:
AiEngineRectangle getGroupCoordinates(QVector<AiEngineObject> &objects);
RemoteControl mRemoteControl;
public slots:
void inferenceResultSlot(AiEngineInferenceResult results);
};
-50
View File
@@ -1,50 +0,0 @@
#include "aiengineinference.h"
AiEngineInference::AiEngineInference(QString modelPath, QObject *parent)
: QObject{parent},
mModelPath(modelPath),
mActive(false)
{}
bool AiEngineInference::isActive(void)
{
return mActive;
}
cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src)
{
// Calculate the aspect ratio
float aspectRatio = static_cast<float>(src.cols) / src.rows;
// Determine new size while maintaining aspect ratio
int newWidth = src.cols;
int newHeight = src.rows;
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
if (aspectRatio > 1)
{
// Width is greater than height
newWidth = INFERENCE_SQUARE_WIDTH;
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
}
else {
// Height is greater than or equal to width
newHeight = INFERENCE_SQUARE_HEIGHT;
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
}
}
// Resize the original image if needed
cv::Mat resized;
cv::resize(src, resized, cv::Size(newWidth, newHeight));
// Create a new 640x640 image with a black background
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
// Copy the resized image to the top-left corner of the new image
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
return output;
}
-55
View File
@@ -1,55 +0,0 @@
#pragma once
#include <QObject>
#include <QString>
#include <QVector>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
const int INFERENCE_SQUARE_WIDTH = 640;
const int INFERENCE_SQUARE_HEIGHT = 640;
class AiEngineRectangle {
public:
int left;
int top;
int right;
int bottom;
};
class AiEngineObject {
public:
AiEngineRectangle rectangle;
float propability;
int classId;
};
class AiEngineInferenceResult {
public:
cv::Mat frame;
QVector<AiEngineObject> objects;
};
class AiEngineInference : public QObject
{
Q_OBJECT
public:
explicit AiEngineInference(QString modelPath, QObject *parent = nullptr);
bool isActive(void);
protected:
cv::Mat resizeAndPad(const cv::Mat& src);
QString mModelPath;
bool mActive;
public slots:
virtual void performInferenceSlot(cv::Mat frame) = 0;
signals:
void resultsReady(AiEngineInferenceResult results);
};
-54
View File
@@ -1,54 +0,0 @@
#include <QDebug>
#include <QtConcurrent/QtConcurrent>
#include "aienginertsplistener.h"
#include "config.h"
AiEngineRtspListener::AiEngineRtspListener(QObject *parent)
: QObject{parent},
mIsListening(false)
{
}
AiEngineRtspListener::~AiEngineRtspListener()
{
stopListening();
}
void AiEngineRtspListener::startListening(void)
{
mIsListening = true;
(void)QtConcurrent::run([this](){
listenLoop();
});
}
void AiEngineRtspListener::stopListening()
{
mIsListening = false;
if (mCap.isOpened()) {
mCap.release();
}
}
void AiEngineRtspListener::listenLoop(void)
{
qDebug() << "AiEngineRtspListener loop running in thread: " << QThread::currentThreadId();
mCap.open(rtspVideoUrl.toStdString());
cv::Mat frame;
while (mIsListening) {
mCap >> frame;
if (frame.empty() == false) {
emit frameReceived(frame.clone());
}
}
}
-23
View File
@@ -1,23 +0,0 @@
#pragma once
#include <QThread>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
class AiEngineRtspListener : public QObject
{
Q_OBJECT
public:
explicit AiEngineRtspListener(QObject *parent = nullptr);
~AiEngineRtspListener();
void startListening(void);
void stopListening(void);
private:
void listenLoop(void);
bool mIsListening;
cv::VideoCapture mCap;
signals:
void frameReceived(cv::Mat frame);
};
-10
View File
@@ -1,10 +0,0 @@
Armoured_vehicle
Truck
Vehicle
Artillery
Shadow_artillery
Tranches
Military man
Tyre_tracks
Additional_protection_tank
Smoke
-80
View File
@@ -1,80 +0,0 @@
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
-10
View File
@@ -1,10 +0,0 @@
#pragma once
#include <QString>
#ifdef OPI5_BUILD
//QString rtspVideoUrl = "rtsp://192.168.168.11:8554/live.stream";
QString rtspVideoUrl = "rtsp://192.168.0.25:8554/main.264";
#else
QString rtspVideoUrl = "rtsp://localhost:8554/live.stream";
#endif
-1
View File
@@ -1 +0,0 @@
yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640
-27
View File
@@ -1,27 +0,0 @@
#include <QCoreApplication>
#include <QDebug>
#include <QFile>
#include <opencv2/opencv.hpp>
#include "aiengine.h"
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
if (argc != 2) {
qDebug() << "\nUsage: " << argv[0] << " <model_file>";
return 1;
}
QString modelFile = argv[1];
if (QFile::exists(modelFile) == false) {
qDebug() << "\nUsage: " << modelFile << " <model_file>";
return 1;
}
AiEngine aiEngine(modelFile);
aiEngine.start();
return app.exec();
}
-39
View File
@@ -1,39 +0,0 @@
QT += core network
QT -= gui
CONFIG += c++11 link_pkgconfig concurrent console
PKGCONFIG += opencv4
MOC_DIR = moc
OBJECTS_DIR = obj
SOURCES = $$PWD/*.cpp $$PWD/../../misc/camera/a8_remote/remoteControl.cpp
HEADERS = $$PWD/*.h
INCLUDEPATH += $$PWD/../../misc/camera/a8_remote
opi5 {
message("OPI5 build")
PKGCONFIG += opencv4 librga stb libturbojpeg
INCLUDEPATH += /usr/include/rga
QMAKE_CXXFLAGS += -DOPI5_BUILD
LIBS += /usr/local/lib/librknnrt.so
SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
HEADERS += $$PWD/src-opi5/*.h
} else:opencv {
message("OpenCV build")
message("You must use YOLOv8 ONNX files. Azaion model does not work!")
message("OpenCV must be version 4.10.0 installed to /usr/local/")
QMAKE_CXXFLAGS += -DOPENCV_BUILD
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
SOURCES += $$PWD/src-opencv-onnx/*.cpp
HEADERS += $$PWD/src-opencv-onnx/*.h
}
else {
message("ONNX build")
message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.")
QMAKE_CXXFLAGS += -DONNX_BUILD
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
SOURCES += $$PWD/src-onnx-runtime/*.cpp
HEADERS += $$PWD/src-onnx-runtime/*.h
}
@@ -1,97 +0,0 @@
#include <QDebug>
#include <QThread>
#include <vector>
#include "aiengineinferenceonnxruntime.h"
static const float confThreshold = 0.4f;
static const float iouThreshold = 0.4f;
static const float maskThreshold = 0.5f;
AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) :
AiEngineInference{modelPath, parent},
mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold)
{
qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath;
mClassNames = {
"Armoured vehicle",
"Truck",
"Vehicle",
"Artillery",
"Shadow artillery",
"Trenches",
"Military man",
"Tyre tracks",
"Additional protection tank",
"Smoke"
};
}
cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
std::string label = mClassNames[detection.classId] + ": " + std::to_string(detection.conf);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.box.x, detection.box.y - labelSize.height),
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.box.x,
detection.box.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame)
{
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
std::vector<Yolov8Result> detections = mPredictor.predict(scaledImage);
AiEngineInferenceResult result;
for (uint i = 0; i < detections.size(); i++) {
const Yolov8Result &detection = detections[i];
// Add detected objects to the results
AiEngineObject object;
object.classId = detection.classId;
object.propability = detection.conf;
object.rectangle.top = detection.box.y;
object.rectangle.left = detection.box.x;
object.rectangle.bottom = detection.box.y + detection.box.height;
object.rectangle.right = detection.box.x + detection.box.width;
result.objects.append(object);
}
if (result.objects.empty() == false) {
qDebug() << __PRETTY_FUNCTION__ << "detections:" << detections.size();
result.frame = drawLabels(scaledImage, detections);
emit resultsReady(result);
}
mActive = false;
}
@@ -1,20 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "yolov8Predictor.h"
class AiEngineInferencevOnnxRuntime : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections);
YOLOPredictor mPredictor;
std::vector<std::string> mClassNames;
};
-167
View File
@@ -1,167 +0,0 @@
#include "utils.h"
size_t utils::vectorProduct(const std::vector<int64_t> &vector)
{
if (vector.empty())
return 0;
size_t product = 1;
for (const auto &element : vector)
product *= element;
return product;
}
std::wstring utils::charToWstring(const char *str)
{
typedef std::codecvt_utf8<wchar_t> convert_type;
std::wstring_convert<convert_type, wchar_t> converter;
return converter.from_bytes(str);
}
std::vector<std::string> utils::loadNames(const std::string &path)
{
// load class names
std::vector<std::string> classNames;
std::ifstream infile(path);
if (infile.good())
{
std::string line;
while (getline(infile, line))
{
if (line.back() == '\r')
line.pop_back();
classNames.emplace_back(line);
}
infile.close();
}
else
{
std::cerr << "ERROR: Failed to access class name path: " << path << std::endl;
}
// set color
srand(time(0));
for (int i = 0; i < (int)(2 * classNames.size()); i++)
{
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
colors.push_back(cv::Scalar(b, g, r));
}
return classNames;
}
void utils::visualizeDetection(cv::Mat &im, std::vector<Yolov8Result> &results,
const std::vector<std::string> &classNames)
{
cv::Mat image = im.clone();
for (const Yolov8Result &result : results)
{
int x = result.box.x;
int y = result.box.y;
int conf = (int)std::round(result.conf * 100);
int classId = result.classId;
std::string label = classNames[classId] + " 0." + std::to_string(conf);
int baseline = 0;
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline);
image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
cv::rectangle(image, result.box, colors[classId], 2);
cv::rectangle(image,
cv::Point(x, y), cv::Point(x + size.width, y + 12),
colors[classId], -1);
cv::putText(image, label,
cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
0.4, cv::Scalar(0, 0, 0), 1);
}
cv::addWeighted(im, 0.4, image, 0.6, 0, im);
}
void utils::letterbox(const cv::Mat &image, cv::Mat &outImage,
const cv::Size &newShape = cv::Size(640, 640),
const cv::Scalar &color = cv::Scalar(114, 114, 114),
bool auto_ = true,
bool scaleFill = false,
bool scaleUp = true,
int stride = 32)
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height,
(float)newShape.width / (float)shape.width);
if (!scaleUp)
r = std::min(r, 1.0f);
float ratio[2]{r, r};
int newUnpad[2]{(int)std::round((float)shape.width * r),
(int)std::round((float)shape.height * r)};
auto dw = (float)(newShape.width - newUnpad[0]);
auto dh = (float)(newShape.height - newUnpad[1]);
if (auto_)
{
dw = (float)((int)dw % stride);
dh = (float)((int)dh % stride);
}
else if (scaleFill)
{
dw = 0.0f;
dh = 0.0f;
newUnpad[0] = newShape.width;
newUnpad[1] = newShape.height;
ratio[0] = (float)newShape.width / (float)shape.width;
ratio[1] = (float)newShape.height / (float)shape.height;
}
dw /= 2.0f;
dh /= 2.0f;
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
{
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
}
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
void utils::scaleCoords(cv::Rect &coords,
cv::Mat &mask,
const float maskThreshold,
const cv::Size &imageShape,
const cv::Size &imageOriginalShape)
{
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
(float)imageShape.width / (float)imageOriginalShape.width);
int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
(int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
coords.x = std::max(0, coords.x);
coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
coords.y = std::max(0, coords.y);
coords.width = (int)std::round(((float)coords.width / gain));
coords.width = std::min(coords.width, imageOriginalShape.width - coords.x);
coords.height = (int)std::round(((float)coords.height / gain));
coords.height = std::min(coords.height, imageOriginalShape.height - coords.y);
mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1]));
cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR);
mask = mask(coords) > maskThreshold;
}
template <typename T>
T utils::clip(const T &n, const T &lower, const T &upper)
{
return std::max(lower, std::min(n, upper));
}
-38
View File
@@ -1,38 +0,0 @@
#pragma once
#include <codecvt>
#include <fstream>
#include <opencv2/opencv.hpp>
struct Yolov8Result
{
cv::Rect box;
cv::Mat boxMask; // mask in box
float conf{};
int classId{};
};
namespace utils
{
static std::vector<cv::Scalar> colors;
size_t vectorProduct(const std::vector<int64_t> &vector);
std::wstring charToWstring(const char *str);
std::vector<std::string> loadNames(const std::string &path);
void visualizeDetection(cv::Mat &image, std::vector<Yolov8Result> &results,
const std::vector<std::string> &classNames);
void letterbox(const cv::Mat &image, cv::Mat &outImage,
const cv::Size &newShape,
const cv::Scalar &color,
bool auto_,
bool scaleFill,
bool scaleUp,
int stride);
void scaleCoords(cv::Rect &coords, cv::Mat &mask,
const float maskThreshold,
const cv::Size &imageShape, const cv::Size &imageOriginalShape);
template <typename T>
T clip(const T &n, const T &lower, const T &upper);
}
@@ -1,243 +0,0 @@
#include "yolov8Predictor.h"
YOLOPredictor::YOLOPredictor(const std::string &modelPath,
float confThreshold,
float iouThreshold,
float maskThreshold)
{
this->confThreshold = confThreshold;
this->iouThreshold = iouThreshold;
this->maskThreshold = maskThreshold;
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
sessionOptions = Ort::SessionOptions();
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
std::cout << "Inference device: CPU" << std::endl;
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
const size_t num_input_nodes = session.GetInputCount(); //==1
const size_t num_output_nodes = session.GetOutputCount(); //==1,2
if (num_output_nodes > 1)
{
this->hasMask = true;
std::cout << "Instance Segmentation" << std::endl;
}
else
std::cout << "Object Detection" << std::endl;
Ort::AllocatorWithDefaultOptions allocator;
for (int i = 0; i < (int)num_input_nodes; i++)
{
auto input_name = session.GetInputNameAllocated(i, allocator);
this->inputNames.push_back(input_name.get());
input_names_ptr.push_back(std::move(input_name));
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->inputShapes.push_back(inputTensorShape);
this->isDynamicInputShape = true;
// checking if width and height are dynamic
if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
{
std::cout << "Dynamic input shape" << std::endl;
this->isDynamicInputShape = true;
}
}
for (int i = 0; i < (int)num_output_nodes; i++)
{
auto output_name = session.GetOutputNameAllocated(i, allocator);
this->outputNames.push_back(output_name.get());
output_names_ptr.push_back(std::move(output_name));
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->outputShapes.push_back(outputTensorShape);
if (i == 0)
{
if (!this->hasMask)
classNums = outputTensorShape[1] - 4;
else
classNums = outputTensorShape[1] - 4 - 32;
}
}
// for (const char *x : this->inputNames)
// {
// std::cout << x << std::endl;
// }
// for (const char *x : this->outputNames)
// {
// std::cout << x << std::endl;
// }
// std::cout << classNums << std::endl;
}
void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
float &bestConf,
int &bestClassId,
const int _classNums)
{
// first 4 element are box
bestClassId = 4;
bestConf = 0;
for (int i = 4; i < _classNums + 4; i++)
{
if (it[i] > bestConf)
{
bestConf = it[i];
bestClassId = i - 4;
}
}
}
cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
const cv::Mat &maskProtos)
{
cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
cv::Mat matmul_res = (maskProposals * protos).t();
cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
cv::Mat dest;
// sigmoid
cv::exp(-masks, dest);
dest = 1.0 / (1.0 + dest);
cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
return dest;
}
void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape)
{
cv::Mat resizedImage, floatImage;
cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB);
utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
cv::Scalar(114, 114, 114), this->isDynamicInputShape,
false, true, 32);
inputTensorShape[2] = resizedImage.rows;
inputTensorShape[3] = resizedImage.cols;
resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
cv::Size floatImageSize{floatImage.cols, floatImage.rows};
// hwc -> chw
std::vector<cv::Mat> chw(floatImage.channels());
for (int i = 0; i < floatImage.channels(); ++i)
{
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
}
cv::split(floatImage, chw);
}
std::vector<Yolov8Result> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
const cv::Size &originalImageShape,
std::vector<Ort::Value> &outputTensors)
{
// for box
std::vector<cv::Rect> boxes;
std::vector<float> confs;
std::vector<int> classIds;
float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
float *output0ptr = (float *)output0.data;
int rows = (int)this->outputShapes[0][2];
int cols = (int)this->outputShapes[0][1];
// std::cout << rows << cols << std::endl;
// if hasMask
std::vector<std::vector<float>> picked_proposals;
cv::Mat mask_protos;
for (int i = 0; i < rows; i++)
{
std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
float confidence;
int classId;
this->getBestClassInfo(it.begin(), confidence, classId, classNums);
if (confidence > this->confThreshold)
{
if (this->hasMask)
{
std::vector<float> temp(it.begin() + 4 + classNums, it.end());
picked_proposals.push_back(temp);
}
int centerX = (int)(it[0]);
int centerY = (int)(it[1]);
int width = (int)(it[2]);
int height = (int)(it[3]);
int left = centerX - width / 2;
int top = centerY - height / 2;
boxes.emplace_back(left, top, width, height);
confs.emplace_back(confidence);
classIds.emplace_back(classId);
}
}
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices);
if (this->hasMask)
{
float *maskOutput = outputTensors[1].GetTensorMutableData<float>();
std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
}
std::vector<Yolov8Result> results;
for (int idx : indices)
{
Yolov8Result res;
res.box = cv::Rect(boxes[idx]);
if (this->hasMask)
res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos);
else
res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape);
res.conf = confs[idx];
res.classId = classIds[idx];
results.emplace_back(res);
}
return results;
}
std::vector<Yolov8Result> YOLOPredictor::predict(cv::Mat &image)
{
float *blob = nullptr;
std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
this->preprocessing(image, blob, inputTensorShape);
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
std::vector<Ort::Value> inputTensors;
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
inputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, inputTensorValues.data(), inputTensorSize,
inputTensorShape.data(), inputTensorShape.size()));
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
this->inputNames.data(),
inputTensors.data(),
1,
this->outputNames.data(),
this->outputNames.size());
cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]);
std::vector<Yolov8Result> result = this->postprocessing(resizedShape,
image.size(),
outputTensors);
delete[] blob;
return result;
}
@@ -1,50 +0,0 @@
#pragma once
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
#include <utility>
#include "utils.h"
class YOLOPredictor
{
public:
explicit YOLOPredictor(std::nullptr_t){};
YOLOPredictor(const std::string &modelPath,
float confThreshold,
float iouThreshold,
float maskThreshold);
// ~YOLOPredictor();
std::vector<Yolov8Result> predict(cv::Mat &image);
int classNums = 10;
private:
Ort::Env env{nullptr};
Ort::SessionOptions sessionOptions{nullptr};
Ort::Session session{nullptr};
void preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape);
std::vector<Yolov8Result> postprocessing(const cv::Size &resizedImageShape,
const cv::Size &originalImageShape,
std::vector<Ort::Value> &outputTensors);
static void getBestClassInfo(std::vector<float>::iterator it,
float &bestConf,
int &bestClassId,
const int _classNums);
cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos);
bool isDynamicInputShape{};
std::vector<const char *> inputNames;
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
std::vector<const char *> outputNames;
std::vector<Ort::AllocatedStringPtr> output_names_ptr;
std::vector<std::vector<int64_t>> inputShapes;
std::vector<std::vector<int64_t>> outputShapes;
float confThreshold = 0.3f;
float iouThreshold = 0.4f;
bool hasMask = false;
float maskThreshold = 0.5f;
};
@@ -1,53 +0,0 @@
#include <QDebug>
#include <QThread>
#include "aiengineinferenceopencvonnx.h"
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent},
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
{
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
//mEngine = new InferenceEngine(modelPath.toStdString());
//mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
}
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
{
try {
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
std::vector<Detection> detections = mInference.runInference(scaledImage);
AiEngineInferenceResult result;
//qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
for (uint i = 0; i < detections.size(); ++i) {
const Detection &detection = detections[i];
// Add detected objects to the results
AiEngineObject object;
object.classId = detection.class_id;
object.propability = detection.confidence;
object.rectangle.top = detection.box.y;
object.rectangle.left = detection.box.x;
object.rectangle.bottom = detection.box.y + detection.box.height;
object.rectangle.right = detection.box.x + detection.box.width;
result.objects.append(object);
}
if (result.objects.empty() == false) {
result.frame = mInference.drawLabels(scaledImage, detections);
emit resultsReady(result);
}
mActive = false;
}
catch (const cv::Exception& e) {
std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
}
}
@@ -1,18 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "src-opencv-onnx/inference.h"
class AiEngineInferenceOpencvOnnx : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
Inference mInference;
};
-223
View File
@@ -1,223 +0,0 @@
#include "inference.h"
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
classesPath = classesTxtFile;
cudaEnabled = runWithCuda;
std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
loadOnnxNetwork();
// loadClassesFromFile(); The classes are hard-coded for this example
}
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
cv::Mat blob;
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
bool yolov8 = false;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data+4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data+5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Detection> detections{};
for (unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id];
result.box = boxes[idx];
detections.push_back(result);
}
return detections;
}
void Inference::loadClassesFromFile()
{
std::ifstream inputFile(classesPath);
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
classes.push_back(classLine);
inputFile.close();
}
}
void Inference::loadOnnxNetwork()
{
printf("loadOnnxNetwork() starts\n");
net = cv::dnn::readNetFromONNX(modelPath);
if (cudaEnabled)
{
std::cout << "\nRunning on CUDA" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
}
else
{
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
std::string label = detection.className + ": " + std::to_string(detection.confidence);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.box.x, detection.box.y - labelSize.height),
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.box.x,
detection.box.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
-51
View File
@@ -1,51 +0,0 @@
#pragma once
// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>
// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
int class_id{0};
std::string className{};
float confidence{0.0};
cv::Scalar color{};
cv::Rect box{};
};
class Inference
{
public:
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
std::vector<Detection> runInference(const cv::Mat &input);
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
private:
void loadClassesFromFile();
void loadOnnxNetwork();
cv::Mat formatToSquare(const cv::Mat &source);
std::string modelPath{};
std::string classesPath{};
bool cudaEnabled{};
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
cv::Size2f modelShape{};
float modelConfidenceThreshold {0.25};
float modelScoreThreshold {0.45};
float modelNMSThreshold {0.50};
bool letterBoxForSquare = false;
cv::dnn::Net net;
};
@@ -1,168 +0,0 @@
#include <QDebug>
#include <QThread>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <iomanip>
#include "aiengineinferenceopi5.h"
#include "file_utils.h"
#include "image_drawing.h"
AiEngineInferenceOpi5::AiEngineInferenceOpi5(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent}
{
qDebug() << "AiEngineInferenceOpi5() test mModelPath=" << mModelPath;
memset(&mRrknnAppCtx, 0, sizeof(rknn_app_context_t));
init_post_process();
int ret = init_yolov8_model(modelPath.toLocal8Bit(), &mRrknnAppCtx);
if (ret != 0) {
qDebug() << "init_yolov8_model() failure! ret: " << ret << "modelPath = " << modelPath;
return;
}
}
AiEngineInferenceOpi5::~AiEngineInferenceOpi5()
{
deinit_post_process();
release_yolov8_model(&mRrknnAppCtx);
}
image_buffer_t AiEngineInferenceOpi5::convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame)
{
// Convert BGR to RGB
cv::Mat rgbFrame;
cv::cvtColor(bgrFrame, rgbFrame, cv::COLOR_BGR2RGB);
image_buffer_t imgBuffer;
memset(&imgBuffer, 0, sizeof(image_buffer_t));
imgBuffer.width = rgbFrame.cols;
imgBuffer.height = rgbFrame.rows;
imgBuffer.width_stride = rgbFrame.step;
imgBuffer.height_stride = rgbFrame.rows;
imgBuffer.format = IMAGE_FORMAT_RGB888;
imgBuffer.size = rgbFrame.total() * rgbFrame.elemSize();
imgBuffer.virt_addr = new unsigned char[imgBuffer.size];
std::memcpy(imgBuffer.virt_addr, rgbFrame.data, imgBuffer.size);
return imgBuffer;
}
void AiEngineInferenceOpi5::freeImageBuffer(image_buffer_t& imgBuffer)
{
if (imgBuffer.virt_addr) {
delete[] imgBuffer.virt_addr;
imgBuffer.virt_addr = nullptr;
}
}
cv::Mat AiEngineInferenceOpi5::resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame)
{
// Resize input frame to half size
cv::Mat resizedFrame;
cv::resize(inputFrame, resizedFrame, cv::Size(), 0.5, 0.5);
// Create a 640x640 frame to place the resized frame
cv::Mat outputFrame = cv::Mat::zeros(640, 640, inputFrame.type());
// Copy the resized frame to the top-left corner of the output frame
cv::Rect roi(0, 0, resizedFrame.cols, resizedFrame.rows);
resizedFrame.copyTo(outputFrame(roi));
return outputFrame;
}
void AiEngineInferenceOpi5::drawObjects(cv::Mat& image, const object_detect_result_list& result_list)
{
for (int i = 0; i < result_list.count; i++) {
const object_detect_result& result = result_list.results[i];
int left = result.box.left;
int top = result.box.top;
int right = result.box.right;
int bottom = result.box.bottom;
cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
// Text
char c_text[256];
sprintf(c_text, "%s %.1f%%", coco_cls_to_name(result.cls_id), result.prop * 100);
cv::Point textOrg(left, top - 5);
cv::putText(image, std::string(c_text), textOrg, cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 255), 1);
}
}
void AiEngineInferenceOpi5::performInferenceSlot(cv::Mat frame)
{
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
cv::Mat scaledFrame = resizeToHalfAndAssigntoTopLeft640x640(frame);
image_buffer_t imgBuffer = convertCV2FrameToImageBuffer(scaledFrame);
object_detect_result_list od_results;
int ret = inference_yolov8_model(&mRrknnAppCtx, &imgBuffer, &od_results);
if (ret != 0) {
qDebug() << "inference_yolov8_model() failure! ret: " << ret;
return;
}
AiEngineInferenceResult result;
for (int i = 0; i < od_results.count; i++) {
object_detect_result *det_result = &(od_results.results[i]);
AiEngineObject object;
object.classId = det_result->cls_id;
object.propability = det_result->prop;
object.rectangle.top = det_result->box.top;
object.rectangle.left = det_result->box.left;
object.rectangle.bottom = det_result->box.bottom;
object.rectangle.right = det_result->box.right;
result.objects.append(object);
}
/*
char text[256];
for (int i = 0; i < od_results.count; i++) {
object_detect_result *det_result = &(od_results.results[i]);
printf("%s @ (%d %d %d %d) %.3f\n", coco_cls_to_name(det_result->cls_id),
det_result->box.left, det_result->box.top,
det_result->box.right, det_result->box.bottom,
det_result->prop);
int x1 = det_result->box.left;
int y1 = det_result->box.top;
int x2 = det_result->box.right;
int y2 = det_result->box.bottom;
draw_rectangle(&imgBuffer, x1, y1, x2 - x1, y2 - y1, COLOR_BLUE, 3);
sprintf(text, "%s %.1f%%", coco_cls_to_name(det_result->cls_id), det_result->prop * 100);
draw_text(&imgBuffer, text, x1, y1 - 20, COLOR_RED, 10);
}
*/
/*
static int imageNum = 0;
std::stringstream ss;
ss << std::setw(4) << std::setfill('0') << imageNum++;
std::string formatted_number = ss.str();
std::string filename = "/tmp/out-" + formatted_number + ".png";
cv::imwrite(filename, scaledFrame);
*/
drawObjects(scaledFrame, od_results);
freeImageBuffer(imgBuffer);
result.frame = scaledFrame.clone();
emit resultsReady(result);
mActive = false;
}
@@ -1,25 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "yolov8.h"
#include "image_utils.h"
class AiEngineInferenceOpi5 : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOpi5(QString modelPath, QObject *parent = nullptr);
~AiEngineInferenceOpi5();
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
image_buffer_t convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame);
void freeImageBuffer(image_buffer_t& imgBuffer);
cv::Mat resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame);
void drawObjects(cv::Mat& image, const object_detect_result_list& result_list);
rknn_app_context_t mRrknnAppCtx;
};
Binary file not shown.

Before

Width:  |  Height:  |  Size: 602 KiB

-42
View File
@@ -1,42 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_COMMON_H_
#define _RKNN_MODEL_ZOO_COMMON_H_
/**
* @brief Image pixel format
*
*/
typedef enum {
IMAGE_FORMAT_GRAY8,
IMAGE_FORMAT_RGB888,
IMAGE_FORMAT_RGBA8888,
IMAGE_FORMAT_YUV420SP_NV21,
IMAGE_FORMAT_YUV420SP_NV12,
} image_format_t;
/**
* @brief Image buffer
*
*/
typedef struct {
int width;
int height;
int width_stride;
int height_stride;
image_format_t format;
unsigned char* virt_addr;
int size;
int fd;
} image_buffer_t;
/**
* @brief Image rectangle
*
*/
typedef struct {
int left;
int top;
int right;
int bottom;
} image_rect_t;
#endif //_RKNN_MODEL_ZOO_COMMON_H_
-129
View File
@@ -1,129 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TEXT_LINE_LENGTH 1024
unsigned char* load_model(const char* filename, int* model_size)
{
FILE* fp = fopen(filename, "rb");
if (fp == NULL) {
printf("fopen %s fail!\n", filename);
return NULL;
}
fseek(fp, 0, SEEK_END);
int model_len = ftell(fp);
unsigned char* model = (unsigned char*)malloc(model_len);
fseek(fp, 0, SEEK_SET);
if (model_len != fread(model, 1, model_len, fp)) {
printf("fread %s fail!\n", filename);
free(model);
fclose(fp);
return NULL;
}
*model_size = model_len;
fclose(fp);
return model;
}
int read_data_from_file(const char *path, char **out_data)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
char *data = (char *)malloc(file_size+1);
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
fclose(fp);
return -1;
}
if(fp) {
fclose(fp);
}
*out_data = data;
return file_size;
}
int write_data_to_file(const char *path, const char *data, unsigned int size)
{
FILE *fp;
fp = fopen(path, "w");
if(fp == NULL) {
printf("open error: %s\n", path);
return -1;
}
fwrite(data, 1, size, fp);
fflush(fp);
fclose(fp);
return 0;
}
int count_lines(FILE* file)
{
int count = 0;
char ch;
while(!feof(file))
{
ch = fgetc(file);
if(ch == '\n')
{
count++;
}
}
count += 1;
rewind(file);
return count;
}
char** read_lines_from_file(const char* filename, int* line_count)
{
FILE* file = fopen(filename, "r");
if (file == NULL) {
printf("Failed to open the file.\n");
return NULL;
}
int num_lines = count_lines(file);
printf("num_lines=%d\n", num_lines);
char** lines = (char**)malloc(num_lines * sizeof(char*));
memset(lines, 0, num_lines * sizeof(char*));
char buffer[MAX_TEXT_LINE_LENGTH];
int line_index = 0;
while (fgets(buffer, sizeof(buffer), file) != NULL) {
buffer[strcspn(buffer, "\n")] = '\0'; // 移除换行符
lines[line_index] = (char*)malloc(strlen(buffer) + 1);
strcpy(lines[line_index], buffer);
line_index++;
}
fclose(file);
*line_count = num_lines;
return lines;
}
void free_lines(char** lines, int line_count)
{
for (int i = 0; i < line_count; i++) {
if (lines[i] != NULL) {
free(lines[i]);
}
}
free(lines);
}
-48
View File
@@ -1,48 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_FILE_UTILS_H_
#define _RKNN_MODEL_ZOO_FILE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Read data from file
*
* @param path [in] File path
* @param out_data [out] Read data
* @return int -1: error; > 0: Read data size
*/
int read_data_from_file(const char *path, char **out_data);
/**
* @brief Write data to file
*
* @param path [in] File path
* @param data [in] Write data
* @param size [in] Write data size
* @return int 0: success; -1: error
*/
int write_data_to_file(const char *path, const char *data, unsigned int size);
/**
* @brief Read all lines from text file
*
* @param path [in] File path
* @param line_count [out] File line count
* @return char** String array of all lines, remeber call free_lines() to release after used
*/
char** read_lines_from_file(const char* path, int* line_count);
/**
* @brief Free lines string array
*
* @param lines [in] String array
* @param line_count [in] Line count
*/
void free_lines(char** lines, int line_count);
#ifdef __cplusplus
} // extern "C"
#endif
#endif //_RKNN_MODEL_ZOO_FILE_UTILS_H_
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-89
View File
@@ -1,89 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#define _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
// Color Format ARGB8888
#define COLOR_GREEN 0xFF00FF00
#define COLOR_BLUE 0xFF0000FF
#define COLOR_RED 0xFFFF0000
#define COLOR_YELLOW 0xFFFFFF00
#define COLOR_ORANGE 0xFFFF4500
#define COLOR_BLACK 0xFF000000
#define COLOR_WHITE 0xFFFFFFFF
/**
* @brief Draw rectangle
*
* @param image [in] Image buffer
* @param rx [in] Rectangle top left x
* @param ry [in] Rectangle top left y
* @param rw [in] Rectangle width
* @param rh [in] Rectangle height
* @param color [in] Rectangle line color
* @param thickness [in] Rectangle line thickness
*/
void draw_rectangle(image_buffer_t* image, int rx, int ry, int rw, int rh, unsigned int color,
int thickness);
/**
* @brief Draw line
*
* @param image [in] Image buffer
* @param x0 [in] Line begin point x
* @param y0 [in] Line begin point y
* @param x1 [in] Line end point x
* @param y1 [in] Line end point y
* @param color [in] Line color
* @param thickness [in] Line thickness
*/
void draw_line(image_buffer_t* image, int x0, int y0, int x1, int y1, unsigned int color,
int thickness);
/**
* @brief Draw text (only support ASCII char)
*
* @param image [in] Image buffer
* @param text [in] Text
* @param x [in] Text position x
* @param y [in] Text position y
* @param color [in] Text color
* @param fontsize [in] Text fontsize
*/
void draw_text(image_buffer_t* image, const char* text, int x, int y, unsigned int color,
int fontsize);
/**
* @brief Draw circle
*
* @param image [in] Image buffer
* @param cx [in] Circle center x
* @param cy [in] Circle center y
* @param radius [in] Circle radius
* @param color [in] Circle color
* @param thickness [in] Circle thickness
*/
void draw_circle(image_buffer_t* image, int cx, int cy, int radius, unsigned int color,
int thickness);
/**
* @brief Draw image
*
* @param image [in] Target Image buffer
* @param draw_img [in] Image for drawing
* @param x [in] Target Image draw position x
* @param y [in] Target Image draw position y
* @param rw [in] Width of image for drawing
* @param rh [in] Height of image for drawing
*/
void draw_image(image_buffer_t* image, unsigned char* draw_img, int x, int y, int rw, int rh);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
-782
View File
@@ -1,782 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <math.h>
#include <sys/time.h>
#include "im2d.h"
#include "drmrga.h"
#define STB_IMAGE_IMPLEMENTATION
#define STBI_NO_THREAD_LOCALS
#define STBI_ONLY_JPEG
#define STBI_ONLY_PNG
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "turbojpeg.h"
#include "image_utils.h"
#include "file_utils.h"
static const char* filter_image_names[] = {
"jpg",
"jpeg",
"JPG",
"JPEG",
"png",
"PNG",
"data",
NULL
};
static const char* subsampName[TJ_NUMSAMP] = {"4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1"};
static const char* colorspaceName[TJ_NUMCS] = {"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"};
static int image_file_filter(const struct dirent *entry)
{
const char ** filter;
for (filter = filter_image_names; *filter; ++filter) {
if(strstr(entry->d_name, *filter) != NULL) {
return 1;
}
}
return 0;
}
static int read_image_jpeg(const char* path, image_buffer_t* image)
{
FILE* jpegFile = NULL;
unsigned long jpegSize;
int flags = 0;
int width, height;
int origin_width, origin_height;
unsigned char* imgBuf = NULL;
unsigned char* jpegBuf = NULL;
unsigned long size;
unsigned short orientation = 1;
struct timeval tv1, tv2;
if ((jpegFile = fopen(path, "rb")) == NULL) {
printf("open input file failure\n");
}
if (fseek(jpegFile, 0, SEEK_END) < 0 || (size = ftell(jpegFile)) < 0 || fseek(jpegFile, 0, SEEK_SET) < 0) {
printf("determining input file size failure\n");
}
if (size == 0) {
printf("determining input file size, Input file contains no data\n");
}
jpegSize = (unsigned long)size;
if ((jpegBuf = (unsigned char*)malloc(jpegSize * sizeof(unsigned char))) == NULL) {
printf("allocating JPEG buffer\n");
}
if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) {
printf("reading input file");
}
fclose(jpegFile);
jpegFile = NULL;
tjhandle handle = NULL;
int subsample, colorspace;
int padding = 1;
int ret = 0;
handle = tjInitDecompress();
ret = tjDecompressHeader3(handle, jpegBuf, size, &origin_width, &origin_height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
// 对图像做裁剪16对齐,利于后续rga操作
int crop_width = origin_width / 16 * 16;
int crop_height = origin_height / 16 * 16;
printf("origin size=%dx%d crop size=%dx%d\n", origin_width, origin_height, crop_width, crop_height);
// gettimeofday(&tv1, NULL);
ret = tjDecompressHeader3(handle, jpegBuf, size, &width, &height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
printf("input image: %d x %d, subsampling: %s, colorspace: %s, orientation: %d\n",
width, height, subsampName[subsample], colorspaceName[colorspace], orientation);
int sw_out_size = width * height * 3;
unsigned char* sw_out_buf = image->virt_addr;
if (sw_out_buf == NULL) {
sw_out_buf = (unsigned char*)malloc(sw_out_size * sizeof(unsigned char));
}
if (sw_out_buf == NULL) {
printf("sw_out_buf is NULL\n");
goto out;
}
flags |= 0;
// 错误码为0时,表示警告,错误码为-1时表示错误
int pixelFormat = TJPF_RGB;
ret = tjDecompress2(handle, jpegBuf, size, sw_out_buf, width, 0, height, pixelFormat, flags);
// ret = tjDecompressToYUV2(handle, jpeg_buf, size, dst_buf, *width, padding, *height, flags);
if ((0 != tjGetErrorCode(handle)) && (ret < 0)) {
printf("error : decompress to yuv failed, errorStr:%s, errorCode:%d\n", tjGetErrorStr(),
tjGetErrorCode(handle));
goto out;
}
if ((0 == tjGetErrorCode(handle)) && (ret < 0)) {
printf("warning : errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
}
tjDestroy(handle);
// gettimeofday(&tv2, NULL);
// printf("decode time %ld ms\n", (tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000);
image->width = width;
image->height = height;
image->format = IMAGE_FORMAT_RGB888;
image->virt_addr = sw_out_buf;
image->size = sw_out_size;
out:
if (jpegBuf) {
free(jpegBuf);
}
return 0;
}
static int read_image_raw(const char* path, image_buffer_t* image)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
unsigned char *data = image->virt_addr;
if (image->virt_addr == NULL) {
data = (unsigned char *)malloc(file_size+1);
}
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
return -1;
}
if(fp) {
fclose(fp);
}
if (image->virt_addr == NULL) {
image->virt_addr = data;
image->size = file_size;
}
return 0;
}
static int write_image_jpeg(const char* path, int quality, const image_buffer_t* image)
{
int ret;
int jpegSubsamp = TJSAMP_422;
unsigned char* jpegBuf = NULL;
unsigned long jpegSize = 0;
int flags = 0;
const unsigned char* data = image->virt_addr;
int width = image->width;
int height = image->height;
int pixelFormat = TJPF_RGB;
tjhandle handle = tjInitCompress();
if (image->format == IMAGE_FORMAT_RGB888) {
ret = tjCompress2(handle, data, width, 0, height, pixelFormat, &jpegBuf, &jpegSize, jpegSubsamp, quality, flags);
} else {
printf("write_image_jpeg: pixel format %d not support\n", image->format);
return -1;
}
// printf("ret=%d jpegBuf=%p jpegSize=%d\n", ret, jpegBuf, jpegSize);
if (jpegBuf != NULL && jpegSize > 0) {
write_data_to_file(path, (const char*)jpegBuf, jpegSize);
tjFree(jpegBuf);
}
tjDestroy(handle);
return 0;
}
static int read_image_stb(const char* path, image_buffer_t* image)
{
// 默认图像为3通道
int w, h, c;
unsigned char* pixeldata = stbi_load(path, &w, &h, &c, 0);
if (!pixeldata) {
printf("error: read image %s fail\n", path);
return -1;
}
// printf("load image wxhxc=%dx%dx%d path=%s\n", w, h, c, path);
int size = w * h * c;
// 设置图像数据
if (image->virt_addr != NULL) {
memcpy(image->virt_addr, pixeldata, size);
stbi_image_free(pixeldata);
} else {
image->virt_addr = pixeldata;
}
image->width = w;
image->height = h;
if (c == 4) {
image->format = IMAGE_FORMAT_RGBA8888;
} else if (c == 1) {
image->format = IMAGE_FORMAT_GRAY8;
} else {
image->format = IMAGE_FORMAT_RGB888;
}
return 0;
}
int read_image(const char* path, image_buffer_t* image)
{
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -666;
}
if (strcmp(_ext, ".data") == 0) {
return read_image_raw(path, image);
} else if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
return read_image_jpeg(path, image);
} else {
return read_image_stb(path, image);
}
}
int write_image(const char* path, const image_buffer_t* img)
{
int ret;
int width = img->width;
int height = img->height;
int channel = 3;
void* data = img->virt_addr;
printf("write_image path: %s width=%d height=%d channel=%d data=%p\n",
path, width, height, channel, data);
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -1;
}
if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
int quality = 95;
ret = write_image_jpeg(path, quality, img);
} else if (strcmp(_ext, ".png") == 0 | strcmp(_ext, ".PNG") == 0) {
ret = stbi_write_png(path, width, height, channel, data, 0);
} else if (strcmp(_ext, ".data") == 0 | strcmp(_ext, ".DATA") == 0) {
int size = get_image_size(img);
ret = write_data_to_file(path, data, size);
} else {
// unknown extension type
return -1;
}
return ret;
}
static int crop_and_scale_image_c(int channel, unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
if (dst == NULL) {
printf("dst buffer is null\n");
return -1;
}
float x_ratio = (float)crop_width / (float)dst_box_width;
float y_ratio = (float)crop_height / (float)dst_box_height;
// printf("src_width=%d src_height=%d crop_x=%d crop_y=%d crop_width=%d crop_height=%d\n",
// src_width, src_height, crop_x, crop_y, crop_width, crop_height);
// printf("dst_width=%d dst_height=%d dst_box_x=%d dst_box_y=%d dst_box_width=%d dst_box_height=%d\n",
// dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
// printf("channel=%d x_ratio=%f y_ratio=%f\n", channel, x_ratio, y_ratio);
// 从原图指定区域取数据,双线性缩放到目标指定区域
for (int dst_y = dst_box_y; dst_y < dst_box_y + dst_box_height; dst_y++) {
for (int dst_x = dst_box_x; dst_x < dst_box_x + dst_box_width; dst_x++) {
int dst_x_offset = dst_x - dst_box_x;
int dst_y_offset = dst_y - dst_box_y;
int src_x = (int)(dst_x_offset * x_ratio) + crop_x;
int src_y = (int)(dst_y_offset * y_ratio) + crop_y;
float x_diff = (dst_x_offset * x_ratio) - (src_x - crop_x);
float y_diff = (dst_y_offset * y_ratio) - (src_y - crop_y);
int index1 = src_y * src_width * channel + src_x * channel;
int index2 = index1 + src_width * channel; // down
if (src_y == src_height - 1) {
// 如果到图像最下边缘,变成选择上面的像素
index2 = index1 - src_width * channel;
}
int index3 = index1 + 1 * channel; // right
int index4 = index2 + 1 * channel; // down right
if (src_x == src_width - 1) {
// 如果到图像最右边缘,变成选择左边的像素
index3 = index1 - 1 * channel;
index4 = index2 - 1 * channel;
}
// printf("dst_x=%d dst_y=%d dst_x_offset=%d dst_y_offset=%d src_x=%d src_y=%d x_diff=%f y_diff=%f src index=%d %d %d %d\n",
// dst_x, dst_y, dst_x_offset, dst_y_offset,
// src_x, src_y, x_diff, y_diff,
// index1, index2, index3, index4);
for (int c = 0; c < channel; c++) {
unsigned char A = src[index1+c];
unsigned char B = src[index3+c];
unsigned char C = src[index2+c];
unsigned char D = src[index4+c];
unsigned char pixel = (unsigned char)(
A * (1 - x_diff) * (1 - y_diff) +
B * x_diff * (1 - y_diff) +
C * y_diff * (1 - x_diff) +
D * x_diff * y_diff
);
dst[(dst_y * dst_width + dst_x) * channel + c] = pixel;
}
}
}
return 0;
}
static int crop_and_scale_image_yuv420sp(unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
unsigned char* src_y = src;
unsigned char* src_uv = src + src_width * src_height;
unsigned char* dst_y = dst;
unsigned char* dst_uv = dst + dst_width * dst_height;
crop_and_scale_image_c(1, src_y, src_width, src_height, crop_x, crop_y, crop_width, crop_height,
dst_y, dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
crop_and_scale_image_c(2, src_uv, src_width / 2, src_height / 2, crop_x / 2, crop_y / 2, crop_width / 2, crop_height / 2,
dst_uv, dst_width / 2, dst_height / 2, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
return 0;
}
static int convert_image_cpu(image_buffer_t *src, image_buffer_t *dst, image_rect_t *src_box, image_rect_t *dst_box, char color) {
int ret;
if (dst->virt_addr == NULL) {
return -1;
}
if (src->virt_addr == NULL) {
return -1;
}
if (src->format != dst->format) {
return -1;
}
int src_box_x = 0;
int src_box_y = 0;
int src_box_w = src->width;
int src_box_h = src->height;
if (src_box != NULL) {
src_box_x = src_box->left;
src_box_y = src_box->top;
src_box_w = src_box->right - src_box->left + 1;
src_box_h = src_box->bottom - src_box->top + 1;
}
int dst_box_x = 0;
int dst_box_y = 0;
int dst_box_w = dst->width;
int dst_box_h = dst->height;
if (dst_box != NULL) {
dst_box_x = dst_box->left;
dst_box_y = dst_box->top;
dst_box_w = dst_box->right - dst_box->left + 1;
dst_box_h = dst_box->bottom - dst_box->top + 1;
}
// fill pad color
if (dst_box_w != dst->width || dst_box_h != dst->height) {
int dst_size = get_image_size(dst);
memset(dst->virt_addr, color, dst_size);
}
int need_release_dst_buffer = 0;
int reti = 0;
if (src->format == IMAGE_FORMAT_RGB888) {
reti = crop_and_scale_image_c(3, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_RGBA8888) {
reti = crop_and_scale_image_c(4, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_GRAY8) {
reti = crop_and_scale_image_c(1, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_YUV420SP_NV12 || src->format == IMAGE_FORMAT_YUV420SP_NV12) {
reti = crop_and_scale_image_yuv420sp(src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else {
printf("no support format %d\n", src->format);
}
if (reti != 0) {
printf("convert_image_cpu fail %d\n", reti);
return -1;
}
printf("finish\n");
return 0;
}
static int get_rga_fmt(image_format_t fmt) {
switch (fmt)
{
case IMAGE_FORMAT_RGB888:
return RK_FORMAT_RGB_888;
case IMAGE_FORMAT_RGBA8888:
return RK_FORMAT_RGBA_8888;
case IMAGE_FORMAT_YUV420SP_NV12:
return RK_FORMAT_YCbCr_420_SP;
case IMAGE_FORMAT_YUV420SP_NV21:
return RK_FORMAT_YCrCb_420_SP;
default:
return -1;
}
}
int get_image_size(image_buffer_t* image)
{
if (image == NULL) {
return 0;
}
switch (image->format)
{
case IMAGE_FORMAT_GRAY8:
return image->width * image->height;
case IMAGE_FORMAT_RGB888:
return image->width * image->height * 3;
case IMAGE_FORMAT_RGBA8888:
return image->width * image->height * 4;
case IMAGE_FORMAT_YUV420SP_NV12:
case IMAGE_FORMAT_YUV420SP_NV21:
return image->width * image->height * 3 / 2;
default:
break;
}
}
static int convert_image_rga(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret = 0;
int srcWidth = src_img->width;
int srcHeight = src_img->height;
void *src = src_img->virt_addr;
int src_fd = src_img->fd;
void *src_phy = NULL;
int srcFmt = get_rga_fmt(src_img->format);
int dstWidth = dst_img->width;
int dstHeight = dst_img->height;
void *dst = dst_img->virt_addr;
int dst_fd = dst_img->fd;
void *dst_phy = NULL;
int dstFmt = get_rga_fmt(dst_img->format);
int rotate = 0;
int use_handle = 0;
#if defined(LIBRGA_IM2D_HANDLE)
use_handle = 1;
#endif
// printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// srcWidth, srcHeight, srcFmt, src, src_fd);
// printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// dstWidth, dstHeight, dstFmt, dst, dst_fd);
// printf("rotate=%d\n", rotate);
int usage = 0;
IM_STATUS ret_rga = IM_STATUS_NOERROR;
// set rga usage
usage |= rotate;
// set rga rect
im_rect srect;
im_rect drect;
im_rect prect;
memset(&prect, 0, sizeof(im_rect));
if (src_box != NULL) {
srect.x = src_box->left;
srect.y = src_box->top;
srect.width = src_box->right - src_box->left + 1;
srect.height = src_box->bottom - src_box->top + 1;
} else {
srect.x = 0;
srect.y = 0;
srect.width = srcWidth;
srect.height = srcHeight;
}
if (dst_box != NULL) {
drect.x = dst_box->left;
drect.y = dst_box->top;
drect.width = dst_box->right - dst_box->left + 1;
drect.height = dst_box->bottom - dst_box->top + 1;
} else {
drect.x = 0;
drect.y = 0;
drect.width = dstWidth;
drect.height = dstHeight;
}
// set rga buffer
rga_buffer_t rga_buf_src;
rga_buffer_t rga_buf_dst;
rga_buffer_t pat;
rga_buffer_handle_t rga_handle_src = 0;
rga_buffer_handle_t rga_handle_dst = 0;
memset(&pat, 0, sizeof(rga_buffer_t));
im_handle_param_t in_param;
in_param.width = srcWidth;
in_param.height = srcHeight;
in_param.format = srcFmt;
im_handle_param_t dst_param;
dst_param.width = dstWidth;
dst_param.height = dstHeight;
dst_param.format = dstFmt;
if (use_handle) {
if (src_phy != NULL) {
rga_handle_src = importbuffer_physicaladdr((uint64_t)src_phy, &in_param);
} else if (src_fd > 0) {
rga_handle_src = importbuffer_fd(src_fd, &in_param);
} else {
rga_handle_src = importbuffer_virtualaddr(src, &in_param);
}
if (rga_handle_src <= 0) {
printf("src handle error %d\n", rga_handle_src);
ret = -1;
goto err;
}
rga_buf_src = wrapbuffer_handle(rga_handle_src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
if (src_phy != NULL) {
rga_buf_src = wrapbuffer_physicaladdr(src_phy, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else if (src_fd > 0) {
rga_buf_src = wrapbuffer_fd(src_fd, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
rga_buf_src = wrapbuffer_virtualaddr(src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
}
}
if (use_handle) {
if (dst_phy != NULL) {
rga_handle_dst = importbuffer_physicaladdr((uint64_t)dst_phy, &dst_param);
} else if (dst_fd > 0) {
rga_handle_dst = importbuffer_fd(dst_fd, &dst_param);
} else {
rga_handle_dst = importbuffer_virtualaddr(dst, &dst_param);
}
if (rga_handle_dst <= 0) {
printf("dst handle error %d\n", rga_handle_dst);
ret = -1;
goto err;
}
rga_buf_dst = wrapbuffer_handle(rga_handle_dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
if (dst_phy != NULL) {
rga_buf_dst = wrapbuffer_physicaladdr(dst_phy, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else if (dst_fd > 0) {
rga_buf_dst = wrapbuffer_fd(dst_fd, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
rga_buf_dst = wrapbuffer_virtualaddr(dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
}
}
if (drect.width != dstWidth || drect.height != dstHeight) {
im_rect dst_whole_rect = {0, 0, dstWidth, dstHeight};
int imcolor;
char* p_imcolor = &imcolor;
p_imcolor[0] = color;
p_imcolor[1] = color;
p_imcolor[2] = color;
p_imcolor[3] = color;
printf("fill dst image (x y w h)=(%d %d %d %d) with color=0x%x\n",
dst_whole_rect.x, dst_whole_rect.y, dst_whole_rect.width, dst_whole_rect.height, imcolor);
ret_rga = imfill(rga_buf_dst, dst_whole_rect, imcolor);
if (ret_rga <= 0) {
if (dst != NULL) {
size_t dst_size = get_image_size(dst_img);
memset(dst, color, dst_size);
} else {
printf("Warning: Can not fill color on target image\n");
}
}
}
// rga process
ret_rga = improcess(rga_buf_src, rga_buf_dst, pat, srect, drect, prect, usage);
if (ret_rga <= 0) {
printf("Error on improcess STATUS=%d\n", ret_rga);
printf("RGA error message: %s\n", imStrError((IM_STATUS)ret_rga));
ret = -1;
}
err:
if (rga_handle_src > 0) {
releasebuffer_handle(rga_handle_src);
}
if (rga_handle_dst > 0) {
releasebuffer_handle(rga_handle_dst);
}
// printf("finish\n");
return ret;
}
int convert_image(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret;
printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
src_img->width, src_img->height, src_img->format, src_img->virt_addr, src_img->fd);
printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
dst_img->width, dst_img->height, dst_img->format, dst_img->virt_addr, dst_img->fd);
if (src_box != NULL) {
printf("src_box=(%d %d %d %d)\n", src_box->left, src_box->top, src_box->right, src_box->bottom);
}
if (dst_box != NULL) {
printf("dst_box=(%d %d %d %d)\n", dst_box->left, dst_box->top, dst_box->right, dst_box->bottom);
}
printf("color=0x%x\n", color);
ret = convert_image_rga(src_img, dst_img, src_box, dst_box, color);
if (ret != 0) {
printf("try convert image use cpu\n");
ret = convert_image_cpu(src_img, dst_img, src_box, dst_box, color);
}
return ret;
}
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color)
{
int ret = 0;
int allow_slight_change = 1;
int src_w = src_image->width;
int src_h = src_image->height;
int dst_w = dst_image->width;
int dst_h = dst_image->height;
int resize_w = dst_w;
int resize_h = dst_h;
int padding_w = 0;
int padding_h = 0;
int _left_offset = 0;
int _top_offset = 0;
float scale = 1.0;
image_rect_t src_box;
src_box.left = 0;
src_box.top = 0;
src_box.right = src_image->width - 1;
src_box.bottom = src_image->height - 1;
image_rect_t dst_box;
dst_box.left = 0;
dst_box.top = 0;
dst_box.right = dst_image->width - 1;
dst_box.bottom = dst_image->height - 1;
float _scale_w = (float)dst_w / src_w;
float _scale_h = (float)dst_h / src_h;
if(_scale_w < _scale_h) {
scale = _scale_w;
resize_h = (int) src_h*scale;
} else {
scale = _scale_h;
resize_w = (int) src_w*scale;
}
// slight change image size for align
if (allow_slight_change == 1 && (resize_w % 4 != 0)) {
resize_w -= resize_w % 4;
}
if (allow_slight_change == 1 && (resize_h % 2 != 0)) {
resize_h -= resize_h % 2;
}
// padding
padding_h = dst_h - resize_h;
padding_w = dst_w - resize_w;
// center
if (_scale_w < _scale_h) {
dst_box.top = padding_h / 2;
if (dst_box.top % 2 != 0) {
dst_box.top -= dst_box.top % 2;
if (dst_box.top < 0) {
dst_box.top = 0;
}
}
dst_box.bottom = dst_box.top + resize_h - 1;
_top_offset = dst_box.top;
} else {
dst_box.left = padding_w / 2;
if (dst_box.left % 2 != 0) {
dst_box.left -= dst_box.left % 2;
if (dst_box.left < 0) {
dst_box.left = 0;
}
}
dst_box.right = dst_box.left + resize_w - 1;
_left_offset = dst_box.left;
}
printf("scale=%f dst_box=(%d %d %d %d) allow_slight_change=%d _left_offset=%d _top_offset=%d padding_w=%d padding_h=%d\n",
scale, dst_box.left, dst_box.top, dst_box.right, dst_box.bottom, allow_slight_change,
_left_offset, _top_offset, padding_w, padding_h);
//set offset and scale
if(letterbox != NULL){
letterbox->scale = scale;
letterbox->x_pad = _left_offset;
letterbox->y_pad = _top_offset;
}
// alloc memory buffer for dst image,
// remember to free
if (dst_image->virt_addr == NULL && dst_image->fd <= 0) {
int dst_size = get_image_size(dst_image);
dst_image->virt_addr = (uint8_t *)malloc(dst_size);
if (dst_image->virt_addr == NULL) {
printf("malloc size %d error\n", dst_size);
return -1;
}
}
ret = convert_image(src_image, dst_image, &src_box, &dst_box, color);
return ret;
}
-73
View File
@@ -1,73 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#define _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
/**
* @brief LetterBox
*
*/
typedef struct {
int x_pad;
int y_pad;
float scale;
} letterbox_t;
/**
* @brief Read image file (support png/jpeg/bmp)
*
* @param path [in] Image path
* @param image [out] Read image
* @return int 0: success; -1: error
*/
int read_image(const char* path, image_buffer_t* image);
/**
* @brief Write image file (support jpg/png)
*
* @param path [in] Image path
* @param image [in] Image for write (only support IMAGE_FORMAT_RGB888)
* @return int 0: success; -1: error
*/
int write_image(const char* path, const image_buffer_t* image);
/**
* @brief Convert image for resize and pixel format change
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param src_box [in] Crop rectangle on source image
* @param dst_box [in] Crop rectangle on target image
* @param color [in] Pading color if dst_box can not fill target image
* @return int
*/
int convert_image(image_buffer_t* src_image, image_buffer_t* dst_image, image_rect_t* src_box, image_rect_t* dst_box, char color);
/**
* @brief Convert image with letterbox
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param letterbox [out] Letterbox
* @param color [in] Fill color on target image
* @return int
*/
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color);
/**
* @brief Get the image size
*
* @param image [in] Image
* @return int image size
*/
int get_image_size(image_buffer_t* image);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
-500
View File
@@ -1,500 +0,0 @@
// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "yolov8.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <set>
#include <vector>
#ifdef OPI5_BUILD
#define LABEL_NALE_TXT_PATH "azaion_10_labels_list.txt"
#else
#define LABEL_NALE_TXT_PATH "coco_80_labels_list.txt"
#endif
static char *labels[OBJ_CLASS_NUM];
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
static char *readLine(FILE *fp, char *buffer, int *len)
{
int ch;
int i = 0;
size_t buff_len = 0;
buffer = (char *)malloc(buff_len + 1);
if (!buffer)
return NULL; // Out of memory
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
{
buff_len++;
void *tmp = realloc(buffer, buff_len + 1);
if (tmp == NULL)
{
free(buffer);
return NULL; // Out of memory
}
buffer = (char *)tmp;
buffer[i] = (char)ch;
i++;
}
buffer[i] = '\0';
*len = buff_len;
// Detect end
if (ch == EOF && (i == 0 || ferror(fp)))
{
free(buffer);
return NULL;
}
return buffer;
}
static int readLines(const char *fileName, char *lines[], int max_line)
{
FILE *file = fopen(fileName, "r");
char *s;
int i = 0;
int n = 0;
if (file == NULL)
{
printf("Open %s fail!\n", fileName);
return -1;
}
while ((s = readLine(file, s, &n)) != NULL)
{
lines[i++] = s;
if (i >= max_line)
break;
}
fclose(file);
return i;
}
static int loadLabelName(const char *locationFilename, char *label[])
{
printf("load lable %s\n", locationFilename);
readLines(locationFilename, label, OBJ_CLASS_NUM);
return 0;
}
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
float ymax1)
{
float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
float i = w * h;
float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
return u <= 0.f ? 0.f : (i / u);
}
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
int filterId, float threshold)
{
for (int i = 0; i < validCount; ++i)
{
if (order[i] == -1 || classIds[i] != filterId)
{
continue;
}
int n = order[i];
for (int j = i + 1; j < validCount; ++j)
{
int m = order[j];
if (m == -1 || classIds[i] != filterId)
{
continue;
}
float xmin0 = outputLocations[n * 4 + 0];
float ymin0 = outputLocations[n * 4 + 1];
float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
float xmin1 = outputLocations[m * 4 + 0];
float ymin1 = outputLocations[m * 4 + 1];
float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
if (iou > threshold)
{
order[j] = -1;
}
}
}
return 0;
}
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
{
float key;
int key_index;
int low = left;
int high = right;
if (left < right)
{
key_index = indices[left];
key = input[left];
while (low < high)
{
while (low < high && input[high] <= key)
{
high--;
}
input[low] = input[high];
indices[low] = indices[high];
while (low < high && input[low] >= key)
{
low++;
}
input[high] = input[low];
indices[high] = indices[low];
}
input[low] = key;
indices[low] = key_index;
quick_sort_indice_inverse(input, left, low - 1, indices);
quick_sort_indice_inverse(input, low + 1, right, indices);
}
return low;
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
inline static int32_t __clip(float val, float min, float max)
{
float f = val <= min ? min : (val >= max ? max : val);
return f;
}
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
{
float dst_val = (f32 / scale) + zp;
int8_t res = (int8_t)__clip(dst_val, -128, 127);
return res;
}
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
void compute_dfl(float* tensor, int dfl_len, float* box){
for (int b=0; b<4; b++){
float exp_t[dfl_len];
float exp_sum=0;
float acc_sum=0;
for (int i=0; i< dfl_len; i++){
exp_t[i] = exp(tensor[i+b*dfl_len]);
exp_sum += exp_t[i];
}
for (int i=0; i< dfl_len; i++){
acc_sum += exp_t[i]/exp_sum *i;
}
box[b] = acc_sum;
}
}
static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
int8_t *score_tensor, int32_t score_zp, float score_scale,
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < score_sum_thres_i8){
continue;
}
}
int8_t max_score = -score_zp;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> score_thres_i8){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < threshold){
continue;
}
}
float max_score = 0;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> threshold){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = box_tensor[offset];
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(max_score);
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
{
std::vector<float> filterBoxes;
std::vector<float> objProbs;
std::vector<int> classId;
int validCount = 0;
int stride = 0;
int grid_h = 0;
int grid_w = 0;
int model_in_w = app_ctx->model_width;
int model_in_h = app_ctx->model_height;
memset(od_results, 0, sizeof(object_detect_result_list));
// default 3 branch
int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
int output_per_branch = app_ctx->io_num.n_output / 3;
for (int i = 0; i < 3; i++)
{
void *score_sum = nullptr;
int32_t score_sum_zp = 0;
float score_sum_scale = 1.0;
if (output_per_branch == 3){
score_sum = outputs[i*output_per_branch + 2].buf;
score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
}
int box_idx = i*output_per_branch;
int score_idx = i*output_per_branch + 1;
grid_h = app_ctx->output_attrs[box_idx].dims[2];
grid_w = app_ctx->output_attrs[box_idx].dims[3];
stride = model_in_h / grid_h;
if (app_ctx->is_quant)
{
validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
(int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
else
{
validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
}
// no object detect
if (validCount <= 0)
{
return 0;
}
std::vector<int> indexArray;
for (int i = 0; i < validCount; ++i)
{
indexArray.push_back(i);
}
quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
std::set<int> class_set(std::begin(classId), std::end(classId));
for (auto c : class_set)
{
nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
}
int last_count = 0;
od_results->count = 0;
/* box valid detect target */
for (int i = 0; i < validCount; ++i)
{
if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE)
{
continue;
}
int n = indexArray[i];
float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
float x2 = x1 + filterBoxes[n * 4 + 2];
float y2 = y1 + filterBoxes[n * 4 + 3];
int id = classId[n];
float obj_conf = objProbs[i];
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].prop = obj_conf;
od_results->results[last_count].cls_id = id;
last_count++;
}
od_results->count = last_count;
return 0;
}
int init_post_process()
{
int ret = 0;
ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
if (ret < 0)
{
printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
return -1;
}
return 0;
}
char *coco_cls_to_name(int cls_id)
{
if (cls_id >= OBJ_CLASS_NUM)
{
return "null";
}
if (labels[cls_id])
{
return labels[cls_id];
}
return "null";
}
void deinit_post_process()
{
for (int i = 0; i < OBJ_CLASS_NUM; i++)
{
if (labels[i] != nullptr)
{
free(labels[i]);
labels[i] = nullptr;
}
}
}
-36
View File
@@ -1,36 +0,0 @@
#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#include <stdint.h>
#include <vector>
#include "rknn_api.h"
#include "common.h"
#include "image_utils.h"
#define OBJ_NAME_MAX_SIZE 64
#define OBJ_NUMB_MAX_SIZE 128
#define OBJ_CLASS_NUM 80
#define NMS_THRESH 0.45
#define BOX_THRESH 0.25
// class rknn_app_context_t;
typedef struct {
image_rect_t box;
float prop;
int cls_id;
} object_detect_result;
typedef struct {
int id;
int count;
object_detect_result results[OBJ_NUMB_MAX_SIZE];
} object_detect_result_list;
int init_post_process();
void deinit_post_process();
char *coco_cls_to_name(int cls_id);
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results);
void deinitPostProcess();
#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
-250
View File
@@ -1,250 +0,0 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "yolov8.h"
#include "common.h"
#include "file_utils.h"
#include "image_utils.h"
static void dump_tensor_attr(rknn_tensor_attr *attr)
{
printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
"zp=%d, scale=%f\n",
attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
}
int init_yolov8_model(const char *model_path, rknn_app_context_t *app_ctx)
{
int ret;
int model_len = 0;
char *model;
rknn_context ctx = 0;
// Load RKNN Model
model_len = read_data_from_file(model_path, &model);
if (model == NULL)
{
printf("load_model fail!\n");
return -1;
}
ret = rknn_init(&ctx, model, model_len, 0, NULL);
free(model);
if (ret < 0)
{
printf("rknn_init fail! ret=%d\n", ret);
return -1;
}
// Get Model Input Output Number
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
// Get Model Input Info
printf("input tensors:\n");
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs[i]));
}
// Get Model Output Info
printf("output tensors:\n");
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(output_attrs[i]));
}
// Set to context
app_ctx->rknn_ctx = ctx;
// TODO
if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type == RKNN_TENSOR_INT8)
{
app_ctx->is_quant = true;
}
else
{
app_ctx->is_quant = false;
}
app_ctx->io_num = io_num;
app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
app_ctx->model_channel = input_attrs[0].dims[1];
app_ctx->model_height = input_attrs[0].dims[2];
app_ctx->model_width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
app_ctx->model_height = input_attrs[0].dims[1];
app_ctx->model_width = input_attrs[0].dims[2];
app_ctx->model_channel = input_attrs[0].dims[3];
}
printf("model input height=%d, width=%d, channel=%d\n",
app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
return 0;
}
int release_yolov8_model(rknn_app_context_t *app_ctx)
{
if (app_ctx->rknn_ctx != 0)
{
rknn_destroy(app_ctx->rknn_ctx);
app_ctx->rknn_ctx = 0;
}
if (app_ctx->input_attrs != NULL)
{
free(app_ctx->input_attrs);
app_ctx->input_attrs = NULL;
}
if (app_ctx->output_attrs != NULL)
{
free(app_ctx->output_attrs);
app_ctx->output_attrs = NULL;
}
return 0;
}
int inference_yolov8_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results)
{
int ret;
image_buffer_t dst_img;
letterbox_t letter_box;
rknn_input inputs[app_ctx->io_num.n_input];
rknn_output outputs[app_ctx->io_num.n_output];
const float nms_threshold = NMS_THRESH; // 默认的NMS阈值
const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
int bg_color = 114;
if ((!app_ctx) || !(img) || (!od_results))
{
return -1;
}
memset(od_results, 0x00, sizeof(*od_results));
memset(&letter_box, 0, sizeof(letterbox_t));
memset(&dst_img, 0, sizeof(image_buffer_t));
memset(inputs, 0, sizeof(inputs));
memset(outputs, 0, sizeof(outputs));
// Pre Process
dst_img.width = app_ctx->model_width;
dst_img.height = app_ctx->model_height;
dst_img.format = IMAGE_FORMAT_RGB888;
dst_img.size = get_image_size(&dst_img);
dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
if (dst_img.virt_addr == NULL)
{
printf("malloc buffer size:%d fail!\n", dst_img.size);
return -1;
}
// letterbox
ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
if (ret < 0)
{
printf("convert_image_with_letterbox fail! ret=%d\n", ret);
return -1;
}
// Set Input Data
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
inputs[0].buf = dst_img.virt_addr;
ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
if (ret < 0)
{
printf("rknn_input_set fail! ret=%d\n", ret);
return -1;
}
// Run
printf("rknn_run\n");
ret = rknn_run(app_ctx->rknn_ctx, nullptr);
if (ret < 0)
{
printf("rknn_run fail! ret=%d\n", ret);
return -1;
}
// Get Output
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < app_ctx->io_num.n_output; i++)
{
outputs[i].index = i;
outputs[i].want_float = (!app_ctx->is_quant);
}
ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
if (ret < 0)
{
printf("rknn_outputs_get fail! ret=%d\n", ret);
goto out;
}
// Post Process
post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
// Remeber to release rknn output
rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
out:
if (dst_img.virt_addr != NULL)
{
free(dst_img.virt_addr);
}
return ret;
}
-42
View File
@@ -1,42 +0,0 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _RKNN_DEMO_YOLOV8_H_
#define _RKNN_DEMO_YOLOV8_H_
#include "rknn_api.h"
#include "common.h"
typedef struct {
rknn_context rknn_ctx;
rknn_input_output_num io_num;
rknn_tensor_attr* input_attrs;
rknn_tensor_attr* output_attrs;
int model_channel;
int model_width;
int model_height;
bool is_quant;
} rknn_app_context_t;
#include "postprocess.h"
int init_yolov8_model(const char* model_path, rknn_app_context_t* app_ctx);
int release_yolov8_model(rknn_app_context_t* app_ctx);
int inference_yolov8_model(rknn_app_context_t* app_ctx, image_buffer_t* img, object_detect_result_list* od_results);
#endif //_RKNN_DEMO_YOLOV8_H_