mirror of
https://github.com/azaion/autopilot.git
synced 2026-04-22 06:46:34 +00:00
Changed directory structure and renamed applications
- autopilot -> drone_controller - rtsp_ai_player -> ai_controller - added top level qmake project file - updated documentation - moved small demo applications from tmp/ to misc/
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
QT += core network serialport
|
||||
QT -= gui
|
||||
CONFIG += concurrent console c++17
|
||||
MOC_DIR = moc
|
||||
OBJECTS_DIR = obj
|
||||
|
||||
SOURCES = $$PWD/*.cpp
|
||||
HEADERS = $$PWD/*.h
|
||||
|
||||
ai_bench {
|
||||
QMAKE_CXXFLAGS += -DAI_BENCH
|
||||
}
|
||||
|
||||
gimbal {
|
||||
message("Using real gimbal camera.")
|
||||
QMAKE_CXXFLAGS += -DGIMBAL
|
||||
}
|
||||
else {
|
||||
message("Not using real gimbal camera.")
|
||||
}
|
||||
|
||||
yolo_onnx {
|
||||
message("Using YOLOv8 ONNX models for indoor testing.")
|
||||
QMAKE_CXXFLAGS += -DYOLO_ONNX
|
||||
}
|
||||
|
||||
save_images {
|
||||
message("Saving inference images to /tmp.")
|
||||
QMAKE_CXXFLAGS += -DSAVE_IMAGES
|
||||
}
|
||||
|
||||
opi5 {
|
||||
message("OPI5 build")
|
||||
CONFIG += link_pkgconfig
|
||||
PKGCONFIG += opencv4 librga stb libturbojpeg
|
||||
INCLUDEPATH += /usr/include/rga
|
||||
QMAKE_CXXFLAGS += -DOPI5_BUILD
|
||||
LIBS += /usr/local/lib/librknnrt.so
|
||||
QMAKE_RPATHDIR += /usr/local/lib
|
||||
SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
|
||||
HEADERS += $$PWD/src-opi5/*.h
|
||||
}
|
||||
else:ncnn {
|
||||
message("NCNN build")
|
||||
CONFIG += link_pkgconfig
|
||||
PKGCONFIG += opencv4
|
||||
QMAKE_CXXFLAGS += -DNCNN_BUILD -fopenmp
|
||||
QMAKE_LFLAGS += -fopenmp
|
||||
INCLUDEPATH += /opt/ncnn/include
|
||||
LIBS += /opt/ncnn/lib/libncnn.a -lgomp
|
||||
SOURCES += $$PWD/src-ncnn/*.cpp
|
||||
HEADERS += $$PWD/src-ncnn/*.h
|
||||
}
|
||||
else:opencv {
|
||||
message("OpenCV build")
|
||||
message("You must use YOLOv8 ONNX files. Azaion model does not work!")
|
||||
message("OpenCV must be version 4.10.0 installed to /usr/local/")
|
||||
INCLUDEPATH += /opt/opencv-4.10.0/include/opencv4/
|
||||
LIBS += /opt/opencv-4.10.0/lib/libopencv_world.so
|
||||
QMAKE_LFLAGS += -Wl,-rpath,/opt/opencv-4.10.0/lib
|
||||
QMAKE_CXXFLAGS += -DOPENCV_BUILD
|
||||
SOURCES += $$PWD/src-opencv-onnx/*.cpp
|
||||
HEADERS += $$PWD/src-opencv-onnx/*.h
|
||||
}
|
||||
else {
|
||||
message("ONNX build")
|
||||
message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.")
|
||||
QMAKE_CXXFLAGS += -DONNX_BUILD
|
||||
|
||||
# ONNX-runtime
|
||||
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
|
||||
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
|
||||
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
|
||||
|
||||
# OpenCV 4.10.0
|
||||
INCLUDEPATH += /opt/opencv-4.10.0/include/opencv4/
|
||||
QMAKE_LFLAGS += -Wl,-rpath,/opt/opencv-4.10.0/lib
|
||||
LIBS += /opt/opencv-4.10.0/lib/libopencv_world.so
|
||||
|
||||
SOURCES += $$PWD/src-onnx-runtime/*.cpp
|
||||
HEADERS += $$PWD/src-onnx-runtime/*.h
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
#include <QDebug>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
#include "aiengine.h"
|
||||
#include "aiengineinference.h"
|
||||
#include "aiengineimagesaver.h"
|
||||
|
||||
#if defined(OPI5_BUILD)
|
||||
#include "src-opi5/aiengineinferenceopi5.h"
|
||||
#elif defined(OPENCV_BUILD)
|
||||
#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
|
||||
#elif defined(NCNN_BUILD)
|
||||
#include "src-ncnn/aiengineinferencencnn.h"
|
||||
#else
|
||||
#include "src-onnx-runtime/aiengineinferenceonnxruntime.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
AiEngine::AiEngine(QString modelPath, QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
mRtspListener = new AiEngineRtspListener(this);
|
||||
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
|
||||
|
||||
#if defined(OPI5_BUILD)
|
||||
mInference = new AiEngineInferenceOpi5(modelPath);
|
||||
mInference->initialize(0);
|
||||
mInference2 = new AiEngineInferenceOpi5(modelPath);
|
||||
mInference2->initialize(1);
|
||||
mInference3 = new AiEngineInferenceOpi5(modelPath);
|
||||
mInference3->initialize(2);
|
||||
#elif defined(OPENCV_BUILD)
|
||||
mInference = new AiEngineInferenceOpencvOnnx(modelPath);
|
||||
#elif defined(NCNN_BUILD)
|
||||
mInference = new AiEngineInferencevNcnn(modelPath);
|
||||
#else
|
||||
mInference = new AiEngineInferencevOnnxRuntime(modelPath);
|
||||
#endif
|
||||
|
||||
QThread *inferenceThread = new QThread(this);
|
||||
mInference->moveToThread(inferenceThread);
|
||||
connect(mInference, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
|
||||
connect(this, &AiEngine::inferenceFrame, mInference, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
|
||||
inferenceThread->start();
|
||||
|
||||
#ifdef OPI5_BUILD
|
||||
QThread *inferenceThread2 = new QThread(this);
|
||||
mInference2->moveToThread(inferenceThread2);
|
||||
connect(mInference2, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
|
||||
connect(this, &AiEngine::inferenceFrame2, mInference2, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
|
||||
inferenceThread2->start();
|
||||
|
||||
QThread *inferenceThread3 = new QThread(this);
|
||||
mInference3->moveToThread(inferenceThread3);
|
||||
connect(mInference3, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
|
||||
connect(this, &AiEngine::inferenceFrame3, mInference3, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
|
||||
inferenceThread3->start();
|
||||
#endif
|
||||
|
||||
#ifdef GIMBAL
|
||||
mGimbalClient = new AiEngineGimbalClient(this);
|
||||
#else
|
||||
mGimbalClient = nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void AiEngine::start(void)
|
||||
{
|
||||
mRtspListener->startListening();
|
||||
mElapsedTimer.start();
|
||||
}
|
||||
|
||||
|
||||
void AiEngine::stop(void)
|
||||
{
|
||||
mRtspListener->stopListening();
|
||||
}
|
||||
|
||||
|
||||
void AiEngine::inferenceResultsReceivedSlot(AiEngineInferenceResult result)
|
||||
{
|
||||
mFrameCounter++;
|
||||
qDebug() << "FPS = " << (mFrameCounter / (mElapsedTimer.elapsed()/1000.0f));
|
||||
//qDebug() << "DEBUG. inference frame counter:" << mFrameCounter;
|
||||
|
||||
//qDebug() << "AiEngine got inference results in thread: " << QThread::currentThreadId();
|
||||
if (mGimbalClient != nullptr) {
|
||||
mGimbalClient->inferenceResultSlot(result);
|
||||
}
|
||||
|
||||
cv::imshow("Received Frame", result.frame);
|
||||
|
||||
#ifdef SAVE_IMAGES
|
||||
static int imageCounter = 0;
|
||||
AiEngineImageSaver *saver = new AiEngineImageSaver(result.frame, ++imageCounter);
|
||||
saver->start();
|
||||
connect(saver, &AiEngineImageSaver::finished, saver, &QObject::deleteLater);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void AiEngine::frameReceivedSlot(cv::Mat frame)
|
||||
{
|
||||
//qDebug() << "AiEngine got frame from RTSP listener in thread: " << QThread::currentThreadId();
|
||||
//cv::imshow("Received Frame", frame);
|
||||
static int framecounter = 0;
|
||||
//qDebug() << "DEBUG. RTSP frame counter:" << framecounter;
|
||||
|
||||
if (mInference->isActive() == false) {
|
||||
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
|
||||
emit inferenceFrame(frame);
|
||||
framecounter++;
|
||||
}
|
||||
#ifdef OPI5_BUILD
|
||||
else if (mInference2->isActive() == false) {
|
||||
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
|
||||
emit inferenceFrame2(frame);
|
||||
framecounter++;
|
||||
}
|
||||
else if (mInference3->isActive() == false) {
|
||||
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
|
||||
emit inferenceFrame3(frame);
|
||||
framecounter++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QElapsedTimer>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include "aienginertsplistener.h"
|
||||
#include "aiengineinference.h"
|
||||
#include "aienginegimbalclient.h"
|
||||
|
||||
class AiEngine : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngine(QString modelPath, QObject *parent = nullptr);
|
||||
void start(void);
|
||||
void stop(void);
|
||||
|
||||
public slots:
|
||||
void frameReceivedSlot(cv::Mat frame);
|
||||
void inferenceResultsReceivedSlot(AiEngineInferenceResult result);
|
||||
|
||||
signals:
|
||||
void inferenceFrame(cv::Mat frame);
|
||||
void inferenceFrame2(cv::Mat frame);
|
||||
void inferenceFrame3(cv::Mat frame);
|
||||
|
||||
private:
|
||||
QElapsedTimer mElapsedTimer;
|
||||
uint32_t mFrameCounter = 0;
|
||||
AiEngineRtspListener *mRtspListener;
|
||||
AiEngineInference *mInference;
|
||||
AiEngineInference *mInference2;
|
||||
AiEngineInference *mInference3;
|
||||
AiEngineGimbalClient *mGimbalClient;
|
||||
};
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <QString>
|
||||
|
||||
#ifdef OPI5_BUILD
|
||||
QString rtspVideoUrl = "rtsp://192.168.0.25:8554/main.264";
|
||||
#else
|
||||
QString rtspVideoUrl = "rtsp://localhost:8554/live.stream";
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include <QByteArray>
|
||||
#include <QString>
|
||||
#include "aienginegimbalserverdefines.h"
|
||||
|
||||
|
||||
// Common geoposition
|
||||
typedef struct {
|
||||
float lat;
|
||||
float lon;
|
||||
float alt;
|
||||
} AiEngineGeoPosition;
|
||||
|
||||
|
||||
// AiEngineGimbalClient -> AiEngineGimbalServer is sent when location or position of the drone changes.
|
||||
typedef struct {
|
||||
AiEngineGeoPosition position;
|
||||
float pitch;
|
||||
float yaw;
|
||||
} AiEngineDronePosition;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int left;
|
||||
int top;
|
||||
int right;
|
||||
int bottom;
|
||||
} AiEngineRectangle;
|
||||
|
||||
|
||||
// AiEngineGimbalClient -> AiEngineGimbalServer when AI finds the target. Index is a new variable to
|
||||
// keep track of the targets.
|
||||
typedef struct {
|
||||
int index;
|
||||
AiEngineRectangle rectangle;
|
||||
} AiEngineCameraTarget;
|
||||
|
||||
|
||||
// AiEngineGimbalServer -> AiEngineGimbalClient when AiEngineGimbalServer has zoomed to the target
|
||||
// defined in AiEngineCameraTarget. Position variable contains calculated geoposition of the target.
|
||||
typedef struct {
|
||||
int targetIndex;
|
||||
AiEngineGeoPosition position;
|
||||
} AiEngineTargetPosition;
|
||||
|
||||
|
||||
// AiEngineGimbalClient -> AiEngineGimbalServer when AI asks to change camera direction. For example
|
||||
// nothing interesting was not found. AiEngineGimbalServer -> AiEngineGimbalClient every few second so
|
||||
// that AI knows the position of the camera.
|
||||
typedef struct {
|
||||
float yaw;
|
||||
float pitch;
|
||||
float zoom;
|
||||
} AiEngineCameraPosition;
|
||||
|
||||
|
||||
struct AiEngineRectangleProperties
|
||||
{
|
||||
int width;
|
||||
int height;
|
||||
int middleX;
|
||||
int middleY;
|
||||
};
|
||||
|
||||
|
||||
typedef struct {
|
||||
// Get these only once
|
||||
uint resolutionX;
|
||||
uint resolutionY;
|
||||
uint hardwareID;
|
||||
float maxZoom;
|
||||
float aiEngineCameraFOVHMin;
|
||||
float aiEngineCameraFOVHMax;
|
||||
float aiEngineCameraFOVVMin;
|
||||
float aiEngineCameraFOVVMax;
|
||||
float aiEngineCameraFLMin;
|
||||
float aiEngineCameraFLMax;
|
||||
|
||||
// Update these before every command
|
||||
float currentYaw;
|
||||
float currentPitch;
|
||||
float currentRoll;
|
||||
float currentZoom;
|
||||
} AiEngineGimbalStatus;
|
||||
|
||||
|
||||
struct AiEngineServerSerialCommandStructure
|
||||
{
|
||||
UDP_COMMAND_ID id;
|
||||
QByteArray command;
|
||||
QString description;
|
||||
};
|
||||
@@ -0,0 +1,138 @@
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
#include <QVector>
|
||||
#include "aienginegimbalclient.h"
|
||||
#include "aiengineinference.h"
|
||||
|
||||
AiEngineGimbalClient::AiEngineGimbalClient(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
// Create server and run it in the new thread.
|
||||
// Connect all signal and slots here. No need to do the same in AiEngineGimbalServer class.
|
||||
|
||||
mGimbalServer = new AiEngineGimbalServer();
|
||||
QThread *gimbalServerThread = new QThread(this);
|
||||
mGimbalServer->moveToThread(gimbalServerThread);
|
||||
|
||||
// Client -> Server communication. Only zoomToAiTarget() signal is emitted ATM.
|
||||
connect(this, &AiEngineGimbalClient::setDronePosition, mGimbalServer, &AiEngineGimbalServer::dronePositionSlot, Qt::QueuedConnection);
|
||||
connect(this, &AiEngineGimbalClient::zoomToAiTarget, mGimbalServer, &AiEngineGimbalServer::zoomToAiTargetSlot, Qt::QueuedConnection);
|
||||
connect(this, &AiEngineGimbalClient::setCameraPosition, mGimbalServer, &AiEngineGimbalServer::cameraPositionSlot, Qt::QueuedConnection);
|
||||
|
||||
// Server -> Client communication
|
||||
connect(mGimbalServer, &AiEngineGimbalServer::aiTargetZoomed, this, &AiEngineGimbalClient::aiTargetZoomedSlot, Qt::QueuedConnection);
|
||||
connect(mGimbalServer, &AiEngineGimbalServer::newCameraPosition, this, &AiEngineGimbalClient::cameraPositionSlot, Qt::QueuedConnection);
|
||||
|
||||
gimbalServerThread->start();
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalClient::aiTargetZoomedSlot(AiEngineTargetPosition targetPosition)
|
||||
{
|
||||
qDebug() << "AiEngineGimbalClient::aiTargetZoomedSlot() Server zoomed to index:"
|
||||
<< targetPosition.targetIndex
|
||||
<< "Geopos:"
|
||||
<< targetPosition.position.lat
|
||||
<< targetPosition.position.lon
|
||||
<< targetPosition.position.alt;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalClient::cameraPositionSlot(AiEngineCameraPosition cameraPosition)
|
||||
{
|
||||
qDebug() << "AiEngineGimbalClient::cameraPositionSlot() Camera moved to:"
|
||||
<< cameraPosition.pitch
|
||||
<< cameraPosition.yaw
|
||||
<< cameraPosition.zoom;
|
||||
}
|
||||
|
||||
|
||||
AiEngineRectangle AiEngineGimbalClient::getGroupCoordinates(QVector<AiEngineObject> &objects)
|
||||
{
|
||||
AiEngineRectangle groupRectangle;
|
||||
groupRectangle.top = 1000000;
|
||||
groupRectangle.left = 1000000;
|
||||
groupRectangle.bottom = 0;
|
||||
groupRectangle.right = 0;
|
||||
|
||||
for (int i = 0; i < objects.size(); i++) {
|
||||
const AiEngineRectangle &objectRectangle = objects[i].rectangle;
|
||||
|
||||
if (objectRectangle.top < groupRectangle.top) {
|
||||
groupRectangle.top = objectRectangle.top;
|
||||
}
|
||||
if (objectRectangle.left < groupRectangle.left) {
|
||||
groupRectangle.left = objectRectangle.left;
|
||||
}
|
||||
if (objectRectangle.bottom > groupRectangle.bottom) {
|
||||
groupRectangle.bottom = objectRectangle.bottom;
|
||||
}
|
||||
if (objectRectangle.right > groupRectangle.right) {
|
||||
groupRectangle.right = objectRectangle.right;
|
||||
}
|
||||
}
|
||||
|
||||
return groupRectangle;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalClient::inferenceResultSlot(AiEngineInferenceResult result)
|
||||
{
|
||||
if (result.objects.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO!! Just increasing number for testing purposes ATM.
|
||||
static int index = 0;
|
||||
|
||||
// Find best possible target ...
|
||||
int bestObjectIndex = -1;
|
||||
float bestObjectProb = -1;
|
||||
for (int i = 0; i < result.objects.size(); i++) {
|
||||
const AiEngineObject &object = result.objects[i];
|
||||
if (object.propability > bestObjectProb) {
|
||||
bestObjectIndex = i;
|
||||
bestObjectProb = object.propability;
|
||||
}
|
||||
}
|
||||
|
||||
// ... if found, then ask camera to zoom to it.
|
||||
if (bestObjectIndex >= 0) {
|
||||
const AiEngineObject &object = result.objects[bestObjectIndex];
|
||||
|
||||
AiEngineCameraTarget target;
|
||||
target.rectangle = object.rectangle;
|
||||
target.index = index++;
|
||||
|
||||
qDebug() << "Found best target from index" << bestObjectIndex
|
||||
<< "Name:" << object.classStr
|
||||
<< "Probability:" << bestObjectProb;
|
||||
|
||||
if (mGimbalServer->isAvailable()) {
|
||||
emit zoomToAiTarget(target);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// We got list of all recognized objects, but at least for now we will zoom to all objects at
|
||||
// once and not for each invidually. Got minimal coordinates which contains the all objects.
|
||||
AiEngineRectangle groupRect = getGroupCoordinates(result.objects);
|
||||
|
||||
if (groupRect.top > 600 || groupRect.bottom > 600) {
|
||||
qDebug() << "ERROR! inferenceResultSlot() groupRect.top > 600 || groupRect.bottom > 600";
|
||||
return;
|
||||
}
|
||||
|
||||
if (groupRect.left > 600 || groupRect.right > 600) {
|
||||
qDebug() << "ERROR! inferenceResultSlot() groupRect.left > 600 || groupRect.right > 600";
|
||||
return;
|
||||
}
|
||||
|
||||
if ((groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)) {
|
||||
qDebug() << "ERROR! (groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)";
|
||||
return;
|
||||
}
|
||||
|
||||
qDebug() << "inferenceResultSlot() Zooming to square top=" << groupRect.top << "x" << groupRect.left << "and bottom:" << groupRect.bottom << "x" << groupRect.right;
|
||||
*/
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QElapsedTimer>
|
||||
#include "aiengineinference.h"
|
||||
#include "aienginegimbalserver.h"
|
||||
#include "aienginedefinitions.h"
|
||||
|
||||
class AiEngineGimbalClient : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalClient(QObject *parent = nullptr);
|
||||
|
||||
private slots:
|
||||
void aiTargetZoomedSlot(AiEngineTargetPosition targetPosition);
|
||||
void cameraPositionSlot(AiEngineCameraPosition cameraPosition);
|
||||
|
||||
public slots:
|
||||
void inferenceResultSlot(AiEngineInferenceResult results);
|
||||
|
||||
signals:
|
||||
void setDronePosition(AiEngineDronePosition);
|
||||
void zoomToAiTarget(AiEngineCameraTarget);
|
||||
void setCameraPosition(AiEngineCameraPosition);
|
||||
|
||||
private:
|
||||
AiEngineRectangle getGroupCoordinates(QVector<AiEngineObject> &objects);
|
||||
AiEngineGimbalServer *mGimbalServer;
|
||||
};
|
||||
@@ -0,0 +1,103 @@
|
||||
#include <QDebug>
|
||||
#include <QTimer>
|
||||
#include "aienginegimbalserver.h"
|
||||
#include "aienginegimbalserveractions.h"
|
||||
|
||||
|
||||
AiEngineGimbalServer::AiEngineGimbalServer(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
// TODO!! Setup and use serial port....
|
||||
mIsAvailable = false;
|
||||
qDebug() << "Initial is available: " << mIsAvailable;
|
||||
|
||||
QTimer::singleShot(5000, this, [this]() {
|
||||
mIsAvailable = true;
|
||||
qDebug() << "Initial is available: " << mIsAvailable;
|
||||
});
|
||||
|
||||
mActions.setup(&mUdpSocket, &mUdpCommand, &mUdpResponse, &mGimbalStatus);
|
||||
|
||||
// TODO: Remove after testing
|
||||
mDronePosition.position.alt = 1000;
|
||||
mDronePosition.position.lat = 49.8397;
|
||||
mDronePosition.position.lon = 24.0319;
|
||||
mDronePosition.pitch = 0.0;
|
||||
mDronePosition.yaw = 90.0;
|
||||
|
||||
connect(&mActions, &AiEngineGimbalServerActions::aiTargetZoomed, this, &AiEngineGimbalServer::aiTargetZoomed);
|
||||
}
|
||||
|
||||
|
||||
// TODO!! Client doesn't really send any signal yet to this slot.
|
||||
void AiEngineGimbalServer::dronePositionSlot(AiEngineDronePosition dronePosition)
|
||||
{
|
||||
qDebug() << "AiEngineGimbalServer::dronePositionSlot() Server got new drone position:"
|
||||
<< dronePosition.position.lat
|
||||
<< dronePosition.position.lon
|
||||
<< dronePosition.position.alt
|
||||
<< dronePosition.pitch
|
||||
<< dronePosition.yaw;
|
||||
|
||||
mDronePosition = dronePosition;
|
||||
}
|
||||
|
||||
|
||||
// This is actually called from the client.
|
||||
void AiEngineGimbalServer::zoomToAiTargetSlot(AiEngineCameraTarget target)
|
||||
{
|
||||
qDebug() << "zoomToAiTargetSlot called";
|
||||
if (mIsAvailable == true) {
|
||||
mIsAvailable = false;
|
||||
qDebug() << "Is available: " << mIsAvailable;
|
||||
|
||||
qDebug() << "AiEngineGimbalServer::zoomToAiTargetSlot() Move camera to the new target:"
|
||||
<< "index:" << target.index
|
||||
<< "pos:" << target.rectangle.top << target.rectangle.left << target.rectangle.bottom << target.rectangle.right;
|
||||
|
||||
// Rectangle calculation for having proper zoom on group / target
|
||||
AiEngineRectangleProperties rectangle = mActions.calculateRectangleProperties(target.rectangle.top, target.rectangle.left, target.rectangle.bottom, target.rectangle.right);
|
||||
qDebug() << "rectangle.middleX: " << rectangle.middleX;
|
||||
qDebug() << "rectangle.middleY: " << rectangle.middleY;
|
||||
qDebug() << "rectangle.width: " << rectangle.width;
|
||||
qDebug() << "rectangle.height: " << rectangle.height;
|
||||
|
||||
// Turn
|
||||
mActions.turnToTarget(rectangle);
|
||||
|
||||
// Calculate location
|
||||
int delay1 = 3000; // Adjust this value as needed
|
||||
AiEngineDronePosition dronePosition = mDronePosition;
|
||||
int targetIndex = target.index;
|
||||
QTimer::singleShot(delay1, this, [this, dronePosition, targetIndex]() { mActions.getLocation(dronePosition, targetIndex); });
|
||||
|
||||
// Zoom
|
||||
int delay2 = delay1 + 100; // Adjust this value as needed
|
||||
QTimer::singleShot(delay2, this, [this, rectangle]() { mActions.zoomToTarget(rectangle); });
|
||||
|
||||
// Return to previous position
|
||||
int delay3 = delay2 + 10000; // Adjust this value as needed
|
||||
AiEngineGimbalStatus gimbalStatus = mGimbalStatus;
|
||||
QTimer::singleShot(delay3, this, [this, gimbalStatus]() { mActions.restoreOrientationAndZoom(gimbalStatus); });
|
||||
|
||||
// Allow calls
|
||||
int delay4 = delay3 + 3000; // Adjust this value as needed
|
||||
QTimer::singleShot(delay4, this, [this]() {
|
||||
mIsAvailable = true;
|
||||
qDebug() << "Is available: " << mIsAvailable;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool AiEngineGimbalServer::isAvailable(void)
|
||||
{
|
||||
return mIsAvailable;
|
||||
}
|
||||
|
||||
|
||||
// TODO!! Not sent from the client yet.
|
||||
void AiEngineGimbalServer::cameraPositionSlot(AiEngineCameraPosition position)
|
||||
{
|
||||
qDebug() << "AiEngineGimbalServer::cameraPositionSlot() Move camera to:" << position.pitch << position.yaw << "zoom:" << position.zoom;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QMap>
|
||||
#include "aienginedefinitions.h"
|
||||
#include "aienginegimbalserveractions.h"
|
||||
#include "aienginegimbalserverudpcommand.h"
|
||||
#include "aienginegimbalserverudpresponse.h"
|
||||
#include "aienginegimbalserverudp.h"
|
||||
|
||||
|
||||
class AiEngineGimbalServer : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalServer(QObject *parent = nullptr);
|
||||
bool isAvailable(void);
|
||||
|
||||
public slots:
|
||||
void dronePositionSlot(AiEngineDronePosition);
|
||||
void zoomToAiTargetSlot(AiEngineCameraTarget);
|
||||
void cameraPositionSlot(AiEngineCameraPosition);
|
||||
|
||||
signals:
|
||||
void aiTargetZoomed(AiEngineTargetPosition);
|
||||
void newCameraPosition(AiEngineCameraPosition);
|
||||
|
||||
private:
|
||||
AiEngineGimbalServerUDP mUdpSocket;
|
||||
AiEngineGimbalStatus mGimbalStatus;
|
||||
|
||||
AiEngineDronePosition mDronePosition;
|
||||
AiEngineGimbalServerUDPCommand mUdpCommand;
|
||||
AiEngineGimbalServerUDPResponse mUdpResponse;
|
||||
AiEngineGimbalServerActions mActions;
|
||||
bool mIsAvailable;
|
||||
};
|
||||
@@ -0,0 +1,356 @@
|
||||
#include <QUdpSocket>
|
||||
#include <QVariant>
|
||||
#include "aienginegimbalserveractions.h"
|
||||
#include "aienginegimbalserverudp.h"
|
||||
|
||||
|
||||
AiEngineGimbalServerActions::AiEngineGimbalServerActions(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::setup(AiEngineGimbalServerUDP *udpSocket, AiEngineGimbalServerUDPCommand *udpCommand, AiEngineGimbalServerUDPResponse *udpResponse, AiEngineGimbalStatus *gimbalStatus)
|
||||
{
|
||||
mUdpSocket = udpSocket;
|
||||
mUdpCommand = udpCommand;
|
||||
mUdpResponse = udpResponse;
|
||||
mGimbalStatus = gimbalStatus;
|
||||
|
||||
// Set initial position and update status
|
||||
QByteArray tempCommand;
|
||||
QByteArray tempResponse;
|
||||
QHash<QString, QVariant> responseValues;
|
||||
|
||||
// Get camera ID
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_HW_INFO);
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
tempResponse = mUdpSocket->readResponse();
|
||||
responseValues = mUdpResponse->getResponceValues(tempResponse);
|
||||
mGimbalStatus->hardwareID = responseValues["hardware_id"].toInt();
|
||||
|
||||
switch (mGimbalStatus->hardwareID) {
|
||||
case AI_ENGINE_CAMERA_ZR10_HID:
|
||||
mGimbalStatus->aiEngineCameraFOVHMin = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MIN;
|
||||
mGimbalStatus->aiEngineCameraFOVHMax = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MAX;
|
||||
mGimbalStatus->aiEngineCameraFOVVMin = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MIN;
|
||||
mGimbalStatus->aiEngineCameraFOVVMax = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MAX;
|
||||
mGimbalStatus->aiEngineCameraFLMin = AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MIN;
|
||||
mGimbalStatus->aiEngineCameraFLMax = AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MAX;
|
||||
break;
|
||||
case AI_ENGINE_CAMERA_A8_HID:
|
||||
mGimbalStatus->aiEngineCameraFOVHMin = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MIN;
|
||||
mGimbalStatus->aiEngineCameraFOVHMax = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MAX;
|
||||
mGimbalStatus->aiEngineCameraFOVVMin = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MIN;
|
||||
mGimbalStatus->aiEngineCameraFOVVMax = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MAX;
|
||||
mGimbalStatus->aiEngineCameraFLMin = AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MIN;
|
||||
mGimbalStatus->aiEngineCameraFLMax = AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MAX;
|
||||
break;
|
||||
default:
|
||||
qDebug().noquote().nospace() << "ERROR: Unknown HardwareID " << mGimbalStatus->hardwareID;
|
||||
break;
|
||||
}
|
||||
|
||||
// Get resolution to reduce calls to camera
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_CAMERA_CODEC_SPECS);
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
tempResponse = mUdpSocket->readResponse();
|
||||
responseValues = mUdpResponse->getResponceValues(tempResponse);
|
||||
mGimbalStatus->resolutionX = responseValues["width"].toInt();
|
||||
mGimbalStatus->resolutionY = responseValues["height"].toInt();
|
||||
|
||||
// Get max zoom value to reduce calls to camera
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_MAX_ZOOM_VALUE);
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
tempResponse = mUdpSocket->readResponse();
|
||||
responseValues = mUdpResponse->getResponceValues(tempResponse);
|
||||
float maxZoom = responseValues["zoom"].toFloat();
|
||||
mGimbalStatus->maxZoom = maxZoom > 10 ? 10 : maxZoom;
|
||||
|
||||
// Go to initial orientation
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_DEGREES);
|
||||
int16_t degreesVal = AI_ENGINE_GIMBAL_INITIAL_YAW * 10;
|
||||
tempCommand[8] = degreesVal & 0xFF;
|
||||
tempCommand[9] = degreesVal >> 8;
|
||||
degreesVal = AI_ENGINE_GIMBAL_INITIAL_PITCH * 10;
|
||||
tempCommand[10] = degreesVal & 0xFF;
|
||||
tempCommand[11] = degreesVal >> 8;
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
|
||||
// Go to initial zoom
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
|
||||
uint8_t integerPart = static_cast<uint8_t>(AI_ENGINE_CAMERA_INITIAL_ZOOM);
|
||||
float fractionalPart = AI_ENGINE_CAMERA_INITIAL_ZOOM - integerPart;
|
||||
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
|
||||
tempCommand[8] = integerPart;
|
||||
tempCommand[9] = scaledFractional;
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
|
||||
mGimbalStatus->currentPitch = AI_ENGINE_GIMBAL_INITIAL_PITCH;
|
||||
mGimbalStatus->currentRoll = AI_ENGINE_GIMBAL_INITIAL_ROLL;
|
||||
mGimbalStatus->currentYaw = AI_ENGINE_GIMBAL_INITIAL_YAW;
|
||||
mGimbalStatus->currentZoom = AI_ENGINE_CAMERA_INITIAL_ZOOM;
|
||||
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->hardwareID: " << mGimbalStatus->hardwareID;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->maxZoom: " << mGimbalStatus->maxZoom;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->currentZoom: " << mGimbalStatus->currentZoom;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->currentPitch: " << mGimbalStatus->currentPitch;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->currentYaw: " << mGimbalStatus->currentYaw;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->currentRoll: " << mGimbalStatus->currentRoll;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->resolutionX: " << mGimbalStatus->resolutionX;
|
||||
qDebug().noquote().nospace() << "mGimbalStatus->resolutionY: " << mGimbalStatus->resolutionY;
|
||||
}
|
||||
|
||||
|
||||
AiEngineRectangleProperties AiEngineGimbalServerActions::calculateRectangleProperties(int top, int left, int bottom, int right) {
|
||||
// AI resolution is 640 * 360, which is half of 1280 * 720
|
||||
// Multiply by 2 to get appropriate position on video
|
||||
top = top * 2;
|
||||
left = left * 2;
|
||||
bottom = bottom * 2;
|
||||
right = right * 2;
|
||||
|
||||
// Sanity check
|
||||
// top cannot be greater than bottom
|
||||
// left cannot be greater than right
|
||||
if (top > bottom) {
|
||||
int temp = top;
|
||||
top = bottom;
|
||||
bottom = temp;
|
||||
qWarning().noquote().nospace() << "calculateRectangleProperties(): top and bottom mixed?";
|
||||
}
|
||||
if (left > right) {
|
||||
int temp = left;
|
||||
left = right;
|
||||
right = temp;
|
||||
qWarning().noquote().nospace() << "calculateRectangleProperties(): left and right mixed?";
|
||||
}
|
||||
|
||||
AiEngineRectangleProperties properties;
|
||||
properties.width = right - left;
|
||||
properties.height = bottom - top;
|
||||
properties.middleX = static_cast<int>(left + properties.width / 2);
|
||||
properties.middleY = static_cast<int>(top + properties.height / 2);
|
||||
|
||||
// Sanity check, none cannot be 0
|
||||
// If that is the case, we will not turn or zoom
|
||||
if (properties.height == 0 || properties.width == 0 || properties.middleX == 0 || properties.middleY == 0) {
|
||||
properties.height = mGimbalStatus->resolutionY;
|
||||
properties.width = mGimbalStatus->resolutionX;
|
||||
properties.middleX = mGimbalStatus->resolutionX / 2;
|
||||
properties.middleY = mGimbalStatus->resolutionY / 2;
|
||||
qWarning().noquote().nospace() << "calculateRectangleProperties(): Something was zero -> No zoom, no turn!";
|
||||
}
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::getAnglesToOnScreenTarget(int targetX, int targetY, float &resultYaw, float &resultPitch)
|
||||
{
|
||||
// Get current yaw and pitch
|
||||
resultYaw = mGimbalStatus->currentYaw;
|
||||
resultPitch = mGimbalStatus->currentPitch;
|
||||
|
||||
// Normalize target pixel location to [-0.5, 0.5] range
|
||||
float normPixelX = (targetX - mGimbalStatus->resolutionX / 2.0f) / (mGimbalStatus->resolutionX / 2.0f);
|
||||
float normPixelY = (targetY - mGimbalStatus->resolutionY / 2.0f) / (mGimbalStatus->resolutionY / 2.0f);
|
||||
|
||||
// Adjust horizontal field of view for zoom
|
||||
float horizontalFov = mGimbalStatus->aiEngineCameraFOVHMin + (10 - mGimbalStatus->currentZoom) * (mGimbalStatus->aiEngineCameraFOVHMax - mGimbalStatus->aiEngineCameraFOVHMin) / 9;
|
||||
float verticalFov = mGimbalStatus->aiEngineCameraFOVVMin + (10 - mGimbalStatus->currentZoom) * (mGimbalStatus->aiEngineCameraFOVVMax - mGimbalStatus->aiEngineCameraFOVVMin) / 9;
|
||||
float focalLength = mGimbalStatus->aiEngineCameraFLMin + (mGimbalStatus->currentZoom - 1) * (mGimbalStatus->aiEngineCameraFLMax - mGimbalStatus->aiEngineCameraFLMin) / 9;
|
||||
|
||||
// Calculate image plane dimensions based on focal length and aspect ratio
|
||||
float imagePlaneWidth = 2.0f * focalLength * tan(degreesToRadians(horizontalFov) / 2.0f);
|
||||
float imagePlaneHeight = 2.0f * focalLength * tan(degreesToRadians(verticalFov) / 2.0f);
|
||||
|
||||
// Calculate angle offsets based on normalized pixel location and image plane dimensions
|
||||
float turnX = atan2(normPixelX * imagePlaneWidth / 2.0f, mGimbalStatus->aiEngineCameraFLMin) * 180.0f / M_PI;
|
||||
float turnY = atan2(normPixelY * imagePlaneHeight / 2.0f, mGimbalStatus->aiEngineCameraFLMin) * 180.0f / M_PI;
|
||||
|
||||
// Make alterations to current angles
|
||||
resultYaw -= turnX;
|
||||
resultPitch -= turnY;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::turnToTarget(AiEngineRectangleProperties rectangle)
|
||||
{
|
||||
qDebug().noquote().nospace() << "Turning to target";
|
||||
|
||||
float resultYaw = 0.0f;
|
||||
float resultPitch = 0.0f;
|
||||
getAnglesToOnScreenTarget(rectangle.middleX, rectangle.middleY, resultYaw, resultPitch);
|
||||
|
||||
QByteArray serialCommandTurn = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_PIXEL);
|
||||
|
||||
int16_t degreesVal = resultYaw * 10;
|
||||
serialCommandTurn[8] = degreesVal & 0xFF;
|
||||
serialCommandTurn[9] = degreesVal >> 8;
|
||||
|
||||
degreesVal = resultPitch * 10;
|
||||
serialCommandTurn[10] = degreesVal & 0xFF;
|
||||
serialCommandTurn[11] = degreesVal >> 8;
|
||||
|
||||
mUdpSocket->sendCommand(serialCommandTurn);
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::zoomToTarget(AiEngineRectangleProperties rectangle)
|
||||
{
|
||||
qDebug().noquote().nospace() << "Zooming to target";
|
||||
|
||||
float fillRatio = 0.5;
|
||||
float targetPixelWidth = rectangle.width;
|
||||
float targetPixelHeight = rectangle.height;
|
||||
float adjustedObjectWidth = targetPixelWidth / fillRatio;
|
||||
float adjustedObjectHeight = targetPixelHeight / fillRatio;
|
||||
float zoomWidth = static_cast<double>(mGimbalStatus->resolutionX) / adjustedObjectWidth;
|
||||
float zoomHeight = static_cast<double>(mGimbalStatus->resolutionY) / adjustedObjectHeight;
|
||||
float zoom = std::min(zoomWidth, zoomHeight);
|
||||
if (zoom < 1.0f) {
|
||||
zoom = 1.0f;
|
||||
}
|
||||
|
||||
QByteArray serialCommandNewZoom = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
|
||||
|
||||
uint8_t integerPart = static_cast<uint8_t>(zoom);
|
||||
float fractionalPart = zoom - integerPart;
|
||||
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
|
||||
|
||||
serialCommandNewZoom[8] = integerPart;
|
||||
serialCommandNewZoom[9] = scaledFractional;
|
||||
|
||||
mUdpSocket->sendCommand(serialCommandNewZoom);
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::getLocation(AiEngineDronePosition dronePosition, int targetIndex)
|
||||
{
|
||||
// From the drone and camera
|
||||
CameraData cameraData = getCameraData();
|
||||
GPSData gpsData = {dronePosition.position.alt, dronePosition.position.lat, dronePosition.position.lon};
|
||||
DroneData droneData = {gpsData, dronePosition.yaw, dronePosition.pitch, 0.0f /* Roll */}; // GPS (latitude, longitude, altitude) and heading
|
||||
|
||||
// Calculate the GPS location of the target
|
||||
AiEngineGeoPosition targetPosition = calculateTargetLocation(droneData, cameraData);
|
||||
|
||||
// TODO: Send position
|
||||
AiEngineTargetPosition targetReturn;
|
||||
targetReturn.position = targetPosition;
|
||||
targetReturn.targetIndex = targetIndex;
|
||||
qDebug() << "targetReturn.targetIndex: " << targetReturn.targetIndex;
|
||||
qDebug() << "targetReturn.position.alt: " << targetReturn.position.alt;
|
||||
qDebug() << "targetReturn.position.lat: " << targetReturn.position.lat;
|
||||
qDebug() << "targetReturn.position.lon: " << targetReturn.position.lon;
|
||||
emit aiTargetZoomed(targetReturn);
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::restoreOrientationAndZoom(AiEngineGimbalStatus gimbalStatus)
|
||||
{
|
||||
QByteArray tempCommand;
|
||||
|
||||
// Go to initial orientation
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_DEGREES);
|
||||
int16_t degreesVal = gimbalStatus.currentYaw * 10;
|
||||
tempCommand[8] = degreesVal & 0xFF;
|
||||
tempCommand[9] = degreesVal >> 8;
|
||||
degreesVal = gimbalStatus.currentPitch * 10;
|
||||
tempCommand[10] = degreesVal & 0xFF;
|
||||
tempCommand[11] = degreesVal >> 8;
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
|
||||
// Go to initial zoom
|
||||
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
|
||||
uint8_t integerPart = static_cast<uint8_t>(gimbalStatus.currentZoom);
|
||||
float fractionalPart = gimbalStatus.currentZoom - integerPart;
|
||||
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
|
||||
tempCommand[8] = integerPart;
|
||||
tempCommand[9] = scaledFractional;
|
||||
mUdpSocket->sendCommand(tempCommand);
|
||||
|
||||
// TODO: Maybe send signal that all done?
|
||||
}
|
||||
|
||||
|
||||
CameraData AiEngineGimbalServerActions::getCameraData()
|
||||
{
|
||||
uint16_t height = mGimbalStatus->resolutionY;
|
||||
uint16_t width = mGimbalStatus->resolutionX;
|
||||
float yaw = 0 - mGimbalStatus->currentYaw; // Reverse value for calculation purposes
|
||||
float pitch = 0 - mGimbalStatus->currentPitch; // Reverse value for calculation purposes
|
||||
|
||||
return {height, width, pitch, yaw};
|
||||
}
|
||||
|
||||
|
||||
// Function to calculate the new GPS location
|
||||
AiEngineGeoPosition AiEngineGimbalServerActions::calculateTargetLocation(DroneData drone, CameraData camera)
|
||||
{
|
||||
// Calculate altitude and distance to the target
|
||||
//float targetDistance = calculateTargetDistanceFromTargetSize(targetTrueSize, targetPixelSize, cameraData.width, degreesToRadians(cameraData.fow));
|
||||
float slantDistance = 0;
|
||||
float horizontalDistance = 0;
|
||||
calculateDistancesToTarget(drone.gps.altitude, camera.pitch, slantDistance, horizontalDistance);
|
||||
|
||||
// Calculate new altitude using the slant distance and angle
|
||||
float pitchRad = degreesToRadians(camera.pitch);
|
||||
float sinPitchRad = std::sin(pitchRad);
|
||||
float altitudeDifference = std::round(slantDistance * sinPitchRad * 100.0) / 100.0; // Rounding to avoid weird targetAltitude
|
||||
float targetAltitude = (std::round(drone.gps.altitude * 100.0) / 100.0) - altitudeDifference; // Rounding to avoid weird targetAltitude
|
||||
|
||||
// Calculate the bearing from the drone orientation and camera orientation
|
||||
float targetBearing = std::fmod(drone.yaw + camera.yaw, 360.0f);
|
||||
|
||||
// Convert bearing and drone's latitude/longitude to radians
|
||||
float bearingRad = degreesToRadians(targetBearing);
|
||||
float latRad = degreesToRadians(drone.gps.latitude);
|
||||
float lonRad = degreesToRadians(drone.gps.longitude);
|
||||
|
||||
// Calculate new latitude using Haversine formula
|
||||
float newLatRad = std::asin(std::sin(latRad) * std::cos(horizontalDistance / EARTH_RADIUS) + std::cos(latRad) * std::sin(horizontalDistance / EARTH_RADIUS) * std::cos(bearingRad));
|
||||
|
||||
// Calculate new longitude using Haversine formula
|
||||
float newLonRad = lonRad + std::atan2(std::sin(bearingRad) * std::sin(horizontalDistance / EARTH_RADIUS) * std::cos(latRad), std::cos(horizontalDistance / EARTH_RADIUS) - std::sin(latRad) * std::sin(newLatRad));
|
||||
|
||||
// Convert back to degrees for latitude and longitude
|
||||
AiEngineGeoPosition newLocation;
|
||||
newLocation.alt = targetAltitude;
|
||||
newLocation.lat = newLatRad * 180.0f / static_cast<float>(M_PI);
|
||||
newLocation.lon = newLonRad * 180.0f / static_cast<float>(M_PI);
|
||||
|
||||
return newLocation;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerActions::calculateDistancesToTarget(float altitude, float cameraPitch, float &slantDistance, float &horizontalDistance)
|
||||
{
|
||||
// Convert pitch angle from degrees to radians
|
||||
float cameraPitchRadians = degreesToRadians(cameraPitch);
|
||||
|
||||
// Value has to be between 0-90
|
||||
cameraPitchRadians = std::clamp((double)cameraPitchRadians, (double)0.0f, M_PI_2);
|
||||
|
||||
// Calculate horizontal distance
|
||||
horizontalDistance = altitude / tan(cameraPitchRadians);
|
||||
|
||||
// Adjust for Earth's curvature:
|
||||
// We need to find the horizontal distance on the curved Earth surface that corresponds to this flat distance
|
||||
double centralAngle = horizontalDistance / EARTH_RADIUS; // in radians
|
||||
|
||||
// Calculate the arc length on Earth's surface
|
||||
horizontalDistance = EARTH_RADIUS * centralAngle;
|
||||
|
||||
// Calculate slant distance considering the Earth's curvature
|
||||
slantDistance = sqrt(altitude * altitude + horizontalDistance * horizontalDistance);
|
||||
}
|
||||
|
||||
|
||||
// Function to convert degrees to radians
|
||||
float AiEngineGimbalServerActions::degreesToRadians(float degrees)
|
||||
{
|
||||
return degrees * M_PI / 180.0f;
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include <QDebug>
|
||||
#include "aienginedefinitions.h"
|
||||
#include "aienginegimbalserverudpcommand.h"
|
||||
#include "aienginegimbalserverudpresponse.h"
|
||||
#include "aienginegimbalserverudp.h"
|
||||
|
||||
|
||||
const double EARTH_RADIUS = 6371000.0; // Earth's radius in meters
|
||||
|
||||
|
||||
struct GPSData
|
||||
{
|
||||
float altitude; // Meters
|
||||
float latitude; // Decimal degrees
|
||||
float longitude; // Decimal degrees
|
||||
};
|
||||
|
||||
|
||||
struct CameraData
|
||||
{
|
||||
uint16_t height; // Pixels
|
||||
uint16_t width; // Pixels
|
||||
float pitch; // Degrees
|
||||
float yaw; // Degrees
|
||||
};
|
||||
|
||||
|
||||
struct DroneData
|
||||
{
|
||||
GPSData gps;
|
||||
float yaw; // Degrees
|
||||
float pitch; // Degrees
|
||||
float roll; // Degrees
|
||||
};
|
||||
|
||||
|
||||
class AiEngineGimbalServerActions : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalServerActions(QObject *parent = nullptr);
|
||||
|
||||
public slots:
|
||||
void setup(AiEngineGimbalServerUDP *udpSocket, AiEngineGimbalServerUDPCommand *udpCommand, AiEngineGimbalServerUDPResponse *udpResponse, AiEngineGimbalStatus *gimbalStatus);
|
||||
AiEngineRectangleProperties calculateRectangleProperties(int top, int left, int bottom, int right);
|
||||
void turnToTarget(AiEngineRectangleProperties rectangle);
|
||||
void zoomToTarget(AiEngineRectangleProperties rectangle);
|
||||
void getLocation(AiEngineDronePosition dronePosition, int targetIndex);
|
||||
void restoreOrientationAndZoom(AiEngineGimbalStatus gimbalStatus);
|
||||
|
||||
signals:
|
||||
void aiTargetZoomed(AiEngineTargetPosition);
|
||||
|
||||
private:
|
||||
AiEngineGimbalServerUDP *mUdpSocket;
|
||||
AiEngineGimbalServerUDPCommand *mUdpCommand;
|
||||
AiEngineGimbalServerUDPResponse *mUdpResponse;
|
||||
AiEngineGimbalStatus *mGimbalStatus;
|
||||
|
||||
private slots:
|
||||
CameraData getCameraData(void);
|
||||
void getAnglesToOnScreenTarget(int targetX, int targetY, float &resultYaw, float &resultPitch);
|
||||
AiEngineGeoPosition calculateTargetLocation(DroneData drone, CameraData camera);
|
||||
void calculateDistancesToTarget(float altitude, float cameraPitch, float &slantDistance, float &horizontalDistance);
|
||||
float degreesToRadians(float degrees);
|
||||
};
|
||||
@@ -0,0 +1,31 @@
|
||||
#include "aienginegimbalservercrc16.h"
|
||||
|
||||
|
||||
static const uint16_t crc16Table[256] = {0x0, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 0x1231, 0x210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 0x2462, 0x3443, 0x420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 0x3653, 0x2672, 0x1611, 0x630,
|
||||
0x76d7, 0x66f6, 0x5695, 0x46b4, 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x840, 0x1861, 0x2802, 0x3823, 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0xa50, 0x3a33, 0x2a12, 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0xc60, 0x1c41,
|
||||
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0xe70, 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 0x1080, 0xa1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 0x2b1, 0x1290, 0x22f3, 0x32d2,
|
||||
0x4235, 0x5214, 0x6277, 0x7256, 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 0x34e2, 0x24c3, 0x14a0, 0x481, 0x7466, 0x6447, 0x5424, 0x4405, 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 0x26d3, 0x36f2, 0x691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x8e1, 0x3882, 0x28a3,
|
||||
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0xaf1, 0x1ad0, 0x2ab3, 0x3a92, 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0xcc1, 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0xed1, 0x1ef0};
|
||||
|
||||
|
||||
uint16_t AiEngineGimbalServerCrc16::calculateCRC(const uint8_t *ptr, uint32_t len, uint16_t crcInit)
|
||||
{
|
||||
uint16_t crc = crcInit;
|
||||
uint8_t temp;
|
||||
|
||||
while (len-- != 0) {
|
||||
temp = (crc >> 8) & 0xFF;
|
||||
crc = (crc << 8) ^ crc16Table[*ptr ^ temp];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerCrc16::getCRCBytes(const QByteArray &data, int8_t *bytes)
|
||||
{
|
||||
uint16_t crc16 = calculateCRC(reinterpret_cast<const uint8_t *>(data.constData()), data.size(), 0);
|
||||
bytes[0] = crc16 & 0xFF; // Set LSB
|
||||
bytes[1] = crc16 >> 8; // Set MSB
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <QByteArray>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
class AiEngineGimbalServerCrc16
|
||||
{
|
||||
public:
|
||||
static void getCRCBytes(const QByteArray &data, int8_t *bytes);
|
||||
|
||||
private:
|
||||
static uint16_t calculateCRC(const uint8_t *ptr, uint32_t len, uint16_t crcInit);
|
||||
};
|
||||
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* This is a defines header for Siyi Gimbal Cameras.
|
||||
* Other cameras might need their own defines header.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
#define AI_ENGINE_CAMERA_ZR10_HID 0x6B // 107
|
||||
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MIN 6.7f
|
||||
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MAX 47.0f
|
||||
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MIN 3.8f
|
||||
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MAX 30.0f
|
||||
#define AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MIN 5.2f
|
||||
#define AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MAX 47.5f
|
||||
|
||||
#define AI_ENGINE_CAMERA_A8_HID 0x73 // 115
|
||||
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MIN 85.0f
|
||||
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MAX 85.0f
|
||||
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MIN 58.0f
|
||||
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MAX 58.0f
|
||||
#define AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MIN 21.0f
|
||||
#define AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MAX 21.0f
|
||||
|
||||
#define AI_ENGINE_CAMERA_ASPECT_RATIO 1.777777778f
|
||||
#define AI_ENGINE_GIMBAL_YAW_MIN -135.0f
|
||||
#define AI_ENGINE_GIMBAL_YAW_MAX 135.0f
|
||||
#define AI_ENGINE_GIMBAL_PITCH_MIN -90.0f
|
||||
#define AI_ENGINE_GIMBAL_PITCH_MAX 25.0f
|
||||
#define AI_ENGINE_CAMERA_INITIAL_ZOOM 1.0f
|
||||
#define AI_ENGINE_GIMBAL_INITIAL_PITCH -20.0f
|
||||
#define AI_ENGINE_GIMBAL_INITIAL_ROLL 0.0f
|
||||
#define AI_ENGINE_GIMBAL_INITIAL_YAW 0.0f
|
||||
|
||||
|
||||
enum UDP_COMMAND_ID {
|
||||
TURN_TO_DEGREES = 1,
|
||||
TURN_TO_PIXEL,
|
||||
ZOOM_TO_X,
|
||||
ACQUIRE_CAMERA_CODEC_SPECS,
|
||||
ACQUIRE_CURRENT_ZOOM,
|
||||
ACQUIRE_ATTITUDE_DATA,
|
||||
AUTO_CENTER,
|
||||
ZOOM_MOST,
|
||||
ZOOM_LEAST,
|
||||
FOCUS_MOST,
|
||||
FOCUS_LEAST,
|
||||
FOCUS_AUTO,
|
||||
ROTATE_UP,
|
||||
ROTATE_DOWN,
|
||||
ROTATE_RIGHT,
|
||||
ROTATE_LEFT,
|
||||
ROTATE_STOP,
|
||||
ACQUIRE_MAX_ZOOM_VALUE,
|
||||
TAKE_PICTURES,
|
||||
TAKE_VIDEO,
|
||||
ROTATE_100_100,
|
||||
ACQUIRE_GIMBAL_STATUS,
|
||||
ACQUIRE_HW_INFO,
|
||||
ACQUIRE_FIRMWARE_VERSION,
|
||||
MODE_LOCK,
|
||||
MODE_FOLLOW,
|
||||
MODE_FPV,
|
||||
ENABLE_HDMI,
|
||||
ENABLE_CVBS,
|
||||
DISABLE_HDMI_CVBS,
|
||||
ACQUIRE_RANGE_DATA,
|
||||
RUN_TARGET_LOCATION_TEST
|
||||
};
|
||||
@@ -0,0 +1,73 @@
|
||||
#include <QDebug>
|
||||
#include <QTimer>
|
||||
#include "aienginegimbalserverudp.h"
|
||||
#include "aienginegimbalservercrc16.h"
|
||||
|
||||
|
||||
AiEngineGimbalServerUDP::AiEngineGimbalServerUDP(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
// Open UDP socket
|
||||
mUdpSocket = new QUdpSocket();
|
||||
mUdpSocket->connectToHost(QHostAddress(SERVER_IP), SERVER_PORT);
|
||||
|
||||
if (mUdpSocket->isOpen() == false) {
|
||||
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Unable to connect " << SERVER_IP << ":" << SERVER_PORT;
|
||||
delete mUdpSocket;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Connected " << SERVER_IP << ":" << SERVER_PORT;
|
||||
}
|
||||
|
||||
|
||||
AiEngineGimbalServerUDP::~AiEngineGimbalServerUDP()
|
||||
{
|
||||
if (mUdpSocket->isOpen() == true) {
|
||||
mUdpSocket->close();
|
||||
}
|
||||
|
||||
delete mUdpSocket;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineGimbalServerUDP::sendCommand(const QByteArray &command)
|
||||
{
|
||||
QByteArray toSend = command;
|
||||
int8_t crcBytes[2];
|
||||
AiEngineGimbalServerCrc16::getCRCBytes(toSend, crcBytes);
|
||||
toSend.resize(toSend.size() + 2); // Increase array size to accommodate CRC bytes
|
||||
toSend[toSend.size() - 2] = crcBytes[0]; // Set LSB
|
||||
toSend[toSend.size() - 1] = crcBytes[1]; // Set MSB
|
||||
|
||||
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Sent HEX data: " << toSend.toHex();
|
||||
|
||||
if (mUdpSocket->isOpen() == false) {
|
||||
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Connection not open (sendCommand)";
|
||||
return;
|
||||
}
|
||||
|
||||
if (mUdpSocket->write(toSend) == -1) {
|
||||
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Failed to send data: " << mUdpSocket->errorString();
|
||||
}
|
||||
}
|
||||
|
||||
QByteArray AiEngineGimbalServerUDP::readResponse()
|
||||
{
|
||||
QByteArray response;
|
||||
QByteArray recvBuff(RECV_BUF_SIZE, 0);
|
||||
|
||||
if (mUdpSocket->waitForReadyRead(3000)) {
|
||||
qint64 recvLen = mUdpSocket->read(recvBuff.data(), RECV_BUF_SIZE);
|
||||
if (recvLen > 0) {
|
||||
response = recvBuff.left(recvLen);
|
||||
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Received HEX data: " << response.toHex();
|
||||
} else {
|
||||
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Failed to receive data: " << mUdpSocket->errorString();
|
||||
}
|
||||
} else {
|
||||
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): No data received.";
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include <QObject>
|
||||
#include <QUdpSocket>
|
||||
|
||||
|
||||
#define SERVER_IP "192.168.0.25"
|
||||
#define SERVER_PORT 37260
|
||||
#define RECV_BUF_SIZE 1024
|
||||
|
||||
|
||||
class AiEngineGimbalServerUDP : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalServerUDP(QObject *parent = nullptr);
|
||||
|
||||
public:
|
||||
~AiEngineGimbalServerUDP();
|
||||
|
||||
public slots:
|
||||
void sendCommand(const QByteArray &command);
|
||||
QByteArray readResponse();
|
||||
|
||||
private:
|
||||
QUdpSocket *mUdpSocket;
|
||||
};
|
||||
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* This is a serial command class for Siyi Gimbal Cameras.
|
||||
* Other cameras might need their own serial command class.
|
||||
*/
|
||||
|
||||
#include <QDebug>
|
||||
#include "aienginegimbalserverudpcommand.h"
|
||||
|
||||
|
||||
AiEngineGimbalServerUDPCommand::AiEngineGimbalServerUDPCommand(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
/*
|
||||
Field Index Bytes Description
|
||||
STX 0 2 0x6655: starting mark. Low byte in the front
|
||||
CTRL 2 1 0: need_ack (if the current data pack need “ack”)
|
||||
1: ack_pack (if the current data pack is an “ack” package) 2-7: reserved
|
||||
Data_len 3 2 Data field byte length. Low byte in the front
|
||||
SEQ 5 2 Frame sequence (0 ~ 65535). Low byte in the front
|
||||
CMD_ID 7 1 Command ID
|
||||
DATA 8 Data_len Data
|
||||
CRC16 2 CRC16 check to the complete data package. Low
|
||||
byte in the front
|
||||
*/
|
||||
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::TURN_TO_DEGREES, createByteArray({0x55, 0x66, 0x01, 0x04, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00}), "Turn to degrees"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::TURN_TO_PIXEL, createByteArray({0x55, 0x66, 0x01, 0x04, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00}), "Turn to pixel"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_TO_X, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x01, 0x00, 0x0F, 0x00, 0x00}), "Zoom to X"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::AUTO_CENTER, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x08, 0x01}), "Auto Centering"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_ATTITUDE_DATA, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0D}), "Acquire Attitude Data"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_CURRENT_ZOOM, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x18}), "Acquire current zoom"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_CAMERA_CODEC_SPECS, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x20, 0x01}), "Acquire Camera Codec Specs"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_MOST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x01}), "Zoom +1"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_LEAST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0xFF}), "Zoom -1"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_MOST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01}), "Manual Focus +1"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_LEAST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x06, 0xFF}), "Manual Focus -1"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_AUTO, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x04, 0x01}), "Auto Focus"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_UP, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x2D}), "Rotate Up"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_DOWN, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, -0x2D}), "Rotate Down"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_RIGHT, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x2D, 0x00}), "Rotate Right"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_LEFT, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, -0x2D, 0x00}), "Rotate Left"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_STOP, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00}), "Stop rotation"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_MAX_ZOOM_VALUE, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x16}), "Acquire the Max Zoom Value"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::TAKE_PICTURES, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x00}), "Take Pictures"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::TAKE_VIDEO, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x02}), "Record Video"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_100_100, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x64, 0x64}), "Rotate 100 100"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_GIMBAL_STATUS, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0A}), "Gimbal Status Information"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_HW_INFO, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02}), "Acquire Hardware ID"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_FIRMWARE_VERSION, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01}), "Acquire Firmware Version"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_LOCK, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x03}), "Lock Mode"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_FOLLOW, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x04}), "Follow Mode"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_FPV, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x05}), "FPV Mode"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ENABLE_HDMI,
|
||||
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x06}),
|
||||
"Set Video Output as HDMI (Only available on A8 mini, restart to take "
|
||||
"effect)"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ENABLE_CVBS,
|
||||
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x07}),
|
||||
"Set Video Output as CVBS (Only available on A8 mini, restart to take "
|
||||
"effect)"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::DISABLE_HDMI_CVBS,
|
||||
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x08}),
|
||||
"Turn Off both CVBS and HDMI Output (Only available on A8 mini, restart "
|
||||
"to take effect)"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_RANGE_DATA,
|
||||
createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x15}),
|
||||
"Read Range from Laser Rangefinder(Low byte in the front, high byte in "
|
||||
"the back, available on ZT30)"});
|
||||
mSerialCommands.push_back({UDP_COMMAND_ID::RUN_TARGET_LOCATION_TEST, createByteArray({0x00, 0x00}), "TEST target location calculations"});
|
||||
|
||||
// Sort vector by SERIAL_COMMAND_ID
|
||||
std::sort(mSerialCommands.begin(), mSerialCommands.end(), [](const AiEngineServerSerialCommandStructure &a, const AiEngineServerSerialCommandStructure &b) { return a.id < b.id; });
|
||||
}
|
||||
|
||||
QByteArray AiEngineGimbalServerUDPCommand::createByteArray(const std::initializer_list<int> &bytes)
|
||||
{
|
||||
QByteArray byteArray;
|
||||
for (int byte : bytes) {
|
||||
byteArray.append(static_cast<char>(byte));
|
||||
}
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
int AiEngineGimbalServerUDPCommand::getCommandIndex(UDP_COMMAND_ID commandId)
|
||||
{
|
||||
for (uint i = 0; i < mSerialCommands.size(); i++) {
|
||||
if (mSerialCommands.at(i).id == commandId) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
QByteArray AiEngineGimbalServerUDPCommand::getCommand(UDP_COMMAND_ID commandId)
|
||||
{
|
||||
int commandIndex = getCommandIndex(commandId);
|
||||
if (commandIndex == -1) {
|
||||
qCritical().noquote().nospace() << "Command not found for command: " << commandId;
|
||||
}
|
||||
return mSerialCommands.at(commandIndex).command;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* This is a serial command class for Siyi Gimbal Cameras.
|
||||
* Other cameras might need their own serial command class.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QByteArray>
|
||||
#include <QList>
|
||||
#include <QObject>
|
||||
#include <QString>
|
||||
#include "aienginedefinitions.h"
|
||||
|
||||
class AiEngineGimbalServerUDPCommand : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalServerUDPCommand(QObject *parent = nullptr);
|
||||
|
||||
public:
|
||||
QByteArray getCommand(UDP_COMMAND_ID commandId);
|
||||
|
||||
private:
|
||||
QByteArray createByteArray(const std::initializer_list<int> &bytes);
|
||||
int getCommandIndex(UDP_COMMAND_ID commandId);
|
||||
std::vector<AiEngineServerSerialCommandStructure> mSerialCommands;
|
||||
};
|
||||
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* This is a serial response class for Siyi Gimbal Cameras.
|
||||
* Other cameras might need their own serial response class.
|
||||
*/
|
||||
|
||||
#include <QDebug>
|
||||
#include <QHash>
|
||||
#include <QString>
|
||||
#include <QVariant>
|
||||
#include "aienginegimbalservercrc16.h"
|
||||
#include "aienginegimbalserverudpresponse.h"
|
||||
|
||||
|
||||
AiEngineGimbalServerUDPResponse::AiEngineGimbalServerUDPResponse(QObject *parent)
|
||||
: QObject{parent}
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
QHash<QString, QVariant> AiEngineGimbalServerUDPResponse::getResponceValues(QByteArray response)
|
||||
{
|
||||
QHash<QString, QVariant> results;
|
||||
|
||||
if (response.size() == 0) {
|
||||
qWarning().noquote().nospace() << "Response is empty, exiting...";
|
||||
return results;
|
||||
}
|
||||
|
||||
// Check response data validity
|
||||
int8_t crcCheck[2];
|
||||
uint8_t desiredLength = response.size() - 2;
|
||||
QByteArray subData(response.data(), desiredLength);
|
||||
AiEngineGimbalServerCrc16::getCRCBytes(subData, crcCheck);
|
||||
|
||||
int8_t crcOriginal[2];
|
||||
crcOriginal[0] = response.at(response.size() - 2);
|
||||
crcOriginal[1] = response.at(response.size() - 1);
|
||||
|
||||
// Data not OK
|
||||
if (crcCheck[0] != crcOriginal[0] || crcCheck[1] != crcOriginal[1]) {
|
||||
qWarning().noquote().nospace() << "Response data INVALID";
|
||||
QString responseCRC = QString("0x%1,0x%2").arg(crcOriginal[0], 2, 16, QLatin1Char('0')).arg(crcOriginal[1], 2, 16, QLatin1Char('0')).toUpper();
|
||||
QString recalcCRC = QString("0x%1,0x%2").arg(crcCheck[0], 2, 16, QLatin1Char('0')).arg(crcCheck[1], 2, 16, QLatin1Char('0')).toUpper();
|
||||
qWarning().noquote().nospace() << responseCRC << "!=" << recalcCRC;
|
||||
}
|
||||
|
||||
uint8_t command = response.at(MESSAGE_IDX::CMD_ID);
|
||||
|
||||
if (command == 0x0E) {
|
||||
int16_t yaw = ((uint8_t) response.at(9) << 8) | (uint8_t) response.at(8);
|
||||
int16_t pitch = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
|
||||
int16_t roll = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
|
||||
results.insert("yaw", (float) (yaw / 10));
|
||||
results.insert("pitch", (float) (pitch / 10));
|
||||
results.insert("roll", (float) (roll / 10));
|
||||
} else if (command == 0x0D) {
|
||||
int16_t yaw = ((uint8_t) response.at(9) << 8) | (uint8_t) response.at(8);
|
||||
int16_t pitch = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
|
||||
int16_t roll = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
|
||||
int16_t yawSpeed = ((uint8_t) response.at(15) << 8) | (uint8_t) response.at(14);
|
||||
int16_t pitchSpeed = ((uint8_t) response.at(17) << 8) | (uint8_t) response.at(16);
|
||||
int16_t rollSpeed = ((uint8_t) response.at(19) << 8) | (uint8_t) response.at(18);
|
||||
results.insert("yaw", (float) (yaw / 10));
|
||||
results.insert("pitch", (float) (pitch / 10));
|
||||
results.insert("roll", (float) (roll / 10));
|
||||
results.insert("yaw_speed", (float) (yawSpeed / 10));
|
||||
results.insert("pitch_speed", (float) (pitchSpeed / 10));
|
||||
results.insert("roll_speed", (float) (rollSpeed / 10));
|
||||
} else if (command == 0x0F) {
|
||||
int8_t zoom = (int8_t) response.at(8);
|
||||
results.insert("zoom", zoom);
|
||||
} else if (command == 0x16 || command == 0x18) {
|
||||
float zoomInt = (float) response.at(8);
|
||||
float zoomFloat = (float) ((float) response.at(9) / 10);
|
||||
results.insert("zoom", zoomInt + zoomFloat);
|
||||
} else if (command == 0x20) {
|
||||
uint16_t width = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
|
||||
uint16_t height = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
|
||||
results.insert("width", width);
|
||||
results.insert("height", height);
|
||||
} else if (command == 0x02) {
|
||||
QByteArray hidArray;
|
||||
hidArray.append((char)response.at(8));
|
||||
hidArray.append((char)response.at(9));
|
||||
uint8_t hid = static_cast<uint8_t>(hidArray.toUShort(nullptr, 16));
|
||||
results.insert("hardware_id", hid);
|
||||
} else {
|
||||
qWarning().noquote().nospace() << "Getting responce values not implemented yet for command " << QString("0x%1").arg(command, 2, 16, QLatin1Char('0'));
|
||||
QString responseStr;
|
||||
for (int i = 0; i < response.size(); i++) {
|
||||
if (i > 0) {
|
||||
responseStr += ",";
|
||||
}
|
||||
responseStr += QString("0x%1").arg(response.at(i), 2, 16, QChar('0')).toUpper();
|
||||
responseStr.replace("0X", "0x");
|
||||
}
|
||||
qWarning().noquote().nospace() << "Responce byte array: " << responseStr;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* This is a serial response class for Siyi Gimbal Cameras.
|
||||
* Other cameras might need their own serial response class.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QString>
|
||||
#include <QByteArray>
|
||||
#include <QObject>
|
||||
|
||||
|
||||
enum MESSAGE_IDX { STX = 0, CTRL = 2, Data_len = 3, SEQ = 5, CMD_ID = 7, DATA = 8 };
|
||||
|
||||
|
||||
class AiEngineGimbalServerUDPResponse : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineGimbalServerUDPResponse(QObject *parent = nullptr);
|
||||
|
||||
public:
|
||||
QHash<QString, QVariant> getResponceValues(QByteArray response);
|
||||
};
|
||||
@@ -0,0 +1,31 @@
|
||||
#include <QFile>
|
||||
#include <QDataStream>
|
||||
#include <QDebug>
|
||||
#include "aiengineimagesaver.h"
|
||||
|
||||
AiEngineImageSaver::AiEngineImageSaver(const cv::Mat &image, int imageNumber, QObject *parent)
|
||||
: QThread(parent), image(image.clone()), imageNumber(imageNumber)
|
||||
{
|
||||
}
|
||||
|
||||
AiEngineImageSaver::~AiEngineImageSaver()
|
||||
{
|
||||
wait();
|
||||
}
|
||||
|
||||
void AiEngineImageSaver::run()
|
||||
{
|
||||
if (image.empty() || image.channels() == 0) {
|
||||
qWarning() << "AiEngineImageSaver. Empty image or no channels, nothing to save.";
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate the size of the upper half
|
||||
int halfHeight = image.rows / 2;
|
||||
cv::Mat upperHalf = image(cv::Rect(0, 0, image.cols, halfHeight));
|
||||
|
||||
// Use bpm to reduce encoding time.
|
||||
QString filePath = QString("/tmp/image-%1.bmp").arg(imageNumber, 5, 10, QChar('0'));
|
||||
|
||||
cv::imwrite(filePath.toStdString(), upperHalf);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <QThread>
|
||||
#include <QString>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
class AiEngineImageSaver : public QThread
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
AiEngineImageSaver(const cv::Mat &image, int imageNumber, QObject *parent = nullptr);
|
||||
~AiEngineImageSaver();
|
||||
|
||||
protected:
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
cv::Mat image;
|
||||
int imageNumber;
|
||||
};
|
||||
@@ -0,0 +1,148 @@
|
||||
#include "aiengineinference.h"
|
||||
|
||||
|
||||
AiEngineInference::AiEngineInference(QString modelPath, QObject *parent)
|
||||
: QObject{parent},
|
||||
mModelPath(modelPath),
|
||||
mActive(false)
|
||||
{
|
||||
#ifdef YOLO_ONNX
|
||||
mClassNames = {
|
||||
"person",
|
||||
"bicycle",
|
||||
"car",
|
||||
"motorcycle",
|
||||
"airplane",
|
||||
"bus",
|
||||
"train",
|
||||
"truck",
|
||||
"boat",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"backpack",
|
||||
"umbrella",
|
||||
"handbag",
|
||||
"tie",
|
||||
"suitcase",
|
||||
"frisbee",
|
||||
"skis",
|
||||
"snowboard",
|
||||
"sports ball",
|
||||
"kite",
|
||||
"baseball bat",
|
||||
"baseball glove",
|
||||
"skateboard",
|
||||
"surfboard",
|
||||
"tennis racket",
|
||||
"bottle",
|
||||
"wine glass",
|
||||
"cup",
|
||||
"fork",
|
||||
"knife",
|
||||
"spoon",
|
||||
"bowl",
|
||||
"banana",
|
||||
"apple",
|
||||
"sandwich",
|
||||
"orange",
|
||||
"broccoli",
|
||||
"carrot",
|
||||
"hot dog",
|
||||
"pizza",
|
||||
"donut",
|
||||
"cake",
|
||||
"chair",
|
||||
"couch",
|
||||
"potted plant",
|
||||
"bed",
|
||||
"dining table",
|
||||
"toilet",
|
||||
"tv",
|
||||
"laptop",
|
||||
"mouse",
|
||||
"remote",
|
||||
"keyboard",
|
||||
"cell phone",
|
||||
"microwave",
|
||||
"oven",
|
||||
"toaster",
|
||||
"sink",
|
||||
"refrigerator",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"scissors",
|
||||
"teddy bear",
|
||||
"hair drier",
|
||||
"toothbrush"
|
||||
};
|
||||
#else
|
||||
mClassNames = {
|
||||
"Armoured vehicle",
|
||||
"Truck",
|
||||
"Vehicle",
|
||||
"Artillery",
|
||||
"Shadow artillery",
|
||||
"Trenches",
|
||||
"Military man",
|
||||
"Tyre tracks",
|
||||
"Additional protection tank",
|
||||
"Smoke"
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool AiEngineInference::isActive(void)
|
||||
{
|
||||
return mActive;
|
||||
}
|
||||
|
||||
|
||||
cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src)
|
||||
{
|
||||
// Calculate the aspect ratio
|
||||
float aspectRatio = static_cast<float>(src.cols) / src.rows;
|
||||
|
||||
// Determine new size while maintaining aspect ratio
|
||||
int newWidth = src.cols;
|
||||
int newHeight = src.rows;
|
||||
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
|
||||
if (aspectRatio > 1)
|
||||
{
|
||||
// Width is greater than height
|
||||
newWidth = INFERENCE_SQUARE_WIDTH;
|
||||
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
|
||||
}
|
||||
else {
|
||||
// Height is greater than or equal to width
|
||||
newHeight = INFERENCE_SQUARE_HEIGHT;
|
||||
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
|
||||
}
|
||||
}
|
||||
|
||||
// Resize the original image if needed
|
||||
cv::Mat resized;
|
||||
cv::resize(src, resized, cv::Size(newWidth, newHeight));
|
||||
|
||||
// Create a new 640x640 image with a black background
|
||||
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
|
||||
|
||||
// Copy the resized image to the top-left corner of the new image
|
||||
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QString>
|
||||
#include <QVector>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include "aienginedefinitions.h"
|
||||
|
||||
|
||||
const int INFERENCE_SQUARE_WIDTH = 640;
|
||||
const int INFERENCE_SQUARE_HEIGHT = 640;
|
||||
|
||||
|
||||
class AiEngineObject {
|
||||
public:
|
||||
AiEngineRectangle rectangle;
|
||||
float propability;
|
||||
int classId;
|
||||
QString classStr;
|
||||
};
|
||||
|
||||
|
||||
class AiEngineInferenceResult {
|
||||
public:
|
||||
cv::Mat frame;
|
||||
QVector<AiEngineObject> objects;
|
||||
};
|
||||
|
||||
|
||||
class AiEngineInference : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineInference(QString modelPath, QObject *parent = nullptr);
|
||||
bool isActive(void);
|
||||
|
||||
protected:
|
||||
cv::Mat resizeAndPad(const cv::Mat& src);
|
||||
QString mModelPath;
|
||||
bool mActive;
|
||||
int mNumber;
|
||||
QVector<QString> mClassNames;
|
||||
|
||||
public slots:
|
||||
virtual void performInferenceSlot(cv::Mat frame) = 0;
|
||||
virtual void initialize(int number) = 0;
|
||||
signals:
|
||||
void resultsReady(AiEngineInferenceResult results);
|
||||
};
|
||||
@@ -0,0 +1,96 @@
|
||||
#include <QDebug>
|
||||
#include <QtConcurrent/QtConcurrent>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include "aienginertsplistener.h"
|
||||
#include "aiengineconfig.h"
|
||||
|
||||
|
||||
AiEngineRtspListener::AiEngineRtspListener(QObject *parent)
|
||||
: QObject{parent},
|
||||
mIsListening(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
AiEngineRtspListener::~AiEngineRtspListener()
|
||||
{
|
||||
stopListening();
|
||||
}
|
||||
|
||||
|
||||
void AiEngineRtspListener::startListening(void)
|
||||
{
|
||||
mIsListening = true;
|
||||
|
||||
(void)QtConcurrent::run([this](){
|
||||
listenLoop();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void AiEngineRtspListener::stopListening()
|
||||
{
|
||||
mIsListening = false;
|
||||
|
||||
if (mCap.isOpened()) {
|
||||
mCap.release();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AiEngineRtspListener::listenLoop(void)
|
||||
{
|
||||
#ifdef AI_BENCH
|
||||
QThread::msleep(2000);
|
||||
|
||||
QString directoryPath = "/home/pama/tmp/photos";
|
||||
QDir directory(directoryPath);
|
||||
|
||||
// Ensure the directory exists
|
||||
if (!directory.exists()) {
|
||||
qDebug() << "Directory does not exist!";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
QStringList filters;
|
||||
filters << "*.jpg" << "*.jpeg" << "*.png" << "*.bmp";
|
||||
directory.setNameFilters(filters);
|
||||
|
||||
QFileInfoList fileInfoList = directory.entryInfoList(QDir::Files, QDir::Name);
|
||||
std::sort(fileInfoList.begin(), fileInfoList.end(), [](const QFileInfo &a, const QFileInfo &b) {
|
||||
return a.fileName() < b.fileName();
|
||||
});
|
||||
|
||||
qDebug() << "Images in folder:" << fileInfoList.size();
|
||||
|
||||
for (const QFileInfo &fileInfo : fileInfoList) {
|
||||
QString filePath = fileInfo.absoluteFilePath();
|
||||
cv::Mat image = cv::imread(filePath.toStdString());
|
||||
if (image.empty()) {
|
||||
qDebug() << "Failed to load image";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
emit frameReceived(image.clone());
|
||||
QThread::msleep(1500);
|
||||
}
|
||||
|
||||
QThread::msleep(2000);
|
||||
qDebug() << "Sleep 2000ms and exit";
|
||||
exit(0);
|
||||
#else
|
||||
qDebug() << "AiEngineRtspListener loop running in thread: " << QThread::currentThreadId();
|
||||
|
||||
mCap.open(rtspVideoUrl.toStdString());
|
||||
cv::Mat frame;
|
||||
|
||||
while (mIsListening) {
|
||||
mCap >> frame;
|
||||
|
||||
if (frame.empty() == false) {
|
||||
emit frameReceived(frame.clone());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <QThread>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/videoio.hpp>
|
||||
|
||||
class AiEngineRtspListener : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineRtspListener(QObject *parent = nullptr);
|
||||
~AiEngineRtspListener();
|
||||
void startListening(void);
|
||||
void stopListening(void);
|
||||
|
||||
private:
|
||||
void listenLoop(void);
|
||||
bool mIsListening;
|
||||
cv::VideoCapture mCap;
|
||||
|
||||
signals:
|
||||
void frameReceived(cv::Mat frame);
|
||||
};
|
||||
@@ -0,0 +1,10 @@
|
||||
Armoured_vehicle
|
||||
Truck
|
||||
Vehicle
|
||||
Artillery
|
||||
Shadow_artillery
|
||||
Tranches
|
||||
Military man
|
||||
Tyre_tracks
|
||||
Additional_protection_tank
|
||||
Smoke
|
||||
@@ -0,0 +1,80 @@
|
||||
person
|
||||
bicycle
|
||||
car
|
||||
motorcycle
|
||||
airplane
|
||||
bus
|
||||
train
|
||||
truck
|
||||
boat
|
||||
traffic light
|
||||
fire hydrant
|
||||
stop sign
|
||||
parking meter
|
||||
bench
|
||||
bird
|
||||
cat
|
||||
dog
|
||||
horse
|
||||
sheep
|
||||
cow
|
||||
elephant
|
||||
bear
|
||||
zebra
|
||||
giraffe
|
||||
backpack
|
||||
umbrella
|
||||
handbag
|
||||
tie
|
||||
suitcase
|
||||
frisbee
|
||||
skis
|
||||
snowboard
|
||||
sports ball
|
||||
kite
|
||||
baseball bat
|
||||
baseball glove
|
||||
skateboard
|
||||
surfboard
|
||||
tennis racket
|
||||
bottle
|
||||
wine glass
|
||||
cup
|
||||
fork
|
||||
knife
|
||||
spoon
|
||||
bowl
|
||||
banana
|
||||
apple
|
||||
sandwich
|
||||
orange
|
||||
broccoli
|
||||
carrot
|
||||
hot dog
|
||||
pizza
|
||||
donut
|
||||
cake
|
||||
chair
|
||||
couch
|
||||
potted plant
|
||||
bed
|
||||
dining table
|
||||
toilet
|
||||
tv
|
||||
laptop
|
||||
mouse
|
||||
remote
|
||||
keyboard
|
||||
cell phone
|
||||
microwave
|
||||
oven
|
||||
toaster
|
||||
sink
|
||||
refrigerator
|
||||
book
|
||||
clock
|
||||
vase
|
||||
scissors
|
||||
teddy bear
|
||||
hair drier
|
||||
toothbrush
|
||||
@@ -0,0 +1 @@
|
||||
yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640
|
||||
@@ -0,0 +1,27 @@
|
||||
#include <QCoreApplication>
|
||||
#include <QDebug>
|
||||
#include <QFile>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include "aiengine.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
QCoreApplication app(argc, argv);
|
||||
|
||||
if (argc != 2) {
|
||||
qDebug() << "\nUsage: " << argv[0] << " <model_file>";
|
||||
return 1;
|
||||
}
|
||||
|
||||
QString modelFile = argv[1];
|
||||
if (QFile::exists(modelFile) == false) {
|
||||
qDebug() << "\nUsage: " << modelFile << " <model_file>";
|
||||
return 1;
|
||||
}
|
||||
|
||||
AiEngine aiEngine(modelFile);
|
||||
aiEngine.start();
|
||||
|
||||
return app.exec();
|
||||
}
|
||||
@@ -0,0 +1,408 @@
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "aiengineinferencencnn.h"
|
||||
|
||||
|
||||
const int target_size = 640;
|
||||
const float prob_threshold = 0.25f;
|
||||
const float nms_threshold = 0.45f;
|
||||
const int num_labels = 10; // 80 object labels in COCO, 10 in Azaion
|
||||
const int MAX_STRIDE = 32;
|
||||
|
||||
|
||||
char* getCharPointerCopy(const QString& modelPath) {
|
||||
QByteArray byteArray = modelPath.toUtf8();
|
||||
char* cString = new char[byteArray.size() + 1]; // Allocate memory
|
||||
std::strcpy(cString, byteArray.constData()); // Copy the data
|
||||
return cString; // Remember to delete[] this when done!
|
||||
}
|
||||
|
||||
|
||||
AiEngineInferencevNcnn::AiEngineInferencevNcnn(QString modelPath, QObject *parent) :
|
||||
AiEngineInference{modelPath, parent}
|
||||
{
|
||||
qDebug() << "TUOMAS AiEngineInferencevNcnn() mModelPath=" << mModelPath;
|
||||
|
||||
yolov8.opt.num_threads = 4;
|
||||
yolov8.opt.use_vulkan_compute = false;
|
||||
|
||||
QString paramPath = modelPath.chopped(3).append("param");
|
||||
char *model = getCharPointerCopy(modelPath);
|
||||
char *param = getCharPointerCopy(paramPath);
|
||||
|
||||
qDebug() << "model:" << model;
|
||||
qDebug() << "param:" << param;
|
||||
|
||||
yolov8.load_param(param);
|
||||
yolov8.load_model(model);
|
||||
}
|
||||
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = objects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (objects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (objects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(objects[i], objects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(objects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(objects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects)
|
||||
{
|
||||
if (objects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
||||
}
|
||||
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
if (!agnostic && a.label != b.label)
|
||||
continue;
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline float sigmoid(float x)
|
||||
{
|
||||
return static_cast<float>(1.f / (1.f + exp(-x)));
|
||||
}
|
||||
|
||||
|
||||
static inline float clampf(float d, float min, float max)
|
||||
{
|
||||
const float t = d < min ? min : d;
|
||||
return t > max ? max : t;
|
||||
}
|
||||
|
||||
|
||||
static void parse_yolov8_detections(
|
||||
float* inputs, float confidence_threshold,
|
||||
int num_channels, int num_anchors, int num_labels,
|
||||
int infer_img_width, int infer_img_height,
|
||||
std::vector<Object>& objects)
|
||||
{
|
||||
std::vector<Object> detections;
|
||||
cv::Mat output = cv::Mat((int)num_channels, (int)num_anchors, CV_32F, inputs).t();
|
||||
|
||||
for (int i = 0; i < num_anchors; i++)
|
||||
{
|
||||
const float* row_ptr = output.row(i).ptr<float>();
|
||||
const float* bboxes_ptr = row_ptr;
|
||||
const float* scores_ptr = row_ptr + 4;
|
||||
const float* max_s_ptr = std::max_element(scores_ptr, scores_ptr + num_labels);
|
||||
float score = *max_s_ptr;
|
||||
if (score > confidence_threshold)
|
||||
{
|
||||
float x = *bboxes_ptr++;
|
||||
float y = *bboxes_ptr++;
|
||||
float w = *bboxes_ptr++;
|
||||
float h = *bboxes_ptr;
|
||||
|
||||
float x0 = clampf((x - 0.5f * w), 0.f, (float)infer_img_width);
|
||||
float y0 = clampf((y - 0.5f * h), 0.f, (float)infer_img_height);
|
||||
float x1 = clampf((x + 0.5f * w), 0.f, (float)infer_img_width);
|
||||
float y1 = clampf((y + 0.5f * h), 0.f, (float)infer_img_height);
|
||||
|
||||
cv::Rect_<float> bbox;
|
||||
bbox.x = x0;
|
||||
bbox.y = y0;
|
||||
bbox.width = x1 - x0;
|
||||
bbox.height = y1 - y0;
|
||||
Object object;
|
||||
object.label = max_s_ptr - scores_ptr;
|
||||
object.prob = score;
|
||||
object.rect = bbox;
|
||||
detections.push_back(object);
|
||||
}
|
||||
}
|
||||
objects = detections;
|
||||
}
|
||||
|
||||
|
||||
int AiEngineInferencevNcnn::detect_yolov8(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
/*
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
// letterbox pad to multiple of MAX_STRIDE
|
||||
int w = img_w;
|
||||
int h = img_h;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
*/
|
||||
int w = 640;
|
||||
int h = 640;
|
||||
int img_w = 640;
|
||||
int img_h = 640;
|
||||
float scale = 1.f;
|
||||
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
|
||||
|
||||
int wpad = (target_size + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
|
||||
int hpad = (target_size + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
|
||||
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in_pad.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
ncnn::Extractor ex = yolov8.create_extractor();
|
||||
|
||||
ex.input("in0", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("out0", out);
|
||||
|
||||
std::vector<Object> objects32;
|
||||
parse_yolov8_detections(
|
||||
(float*)out.data, prob_threshold,
|
||||
out.h, out.w, num_labels,
|
||||
in_pad.w, in_pad.h,
|
||||
objects32);
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Time taken: " << duration.count() << " milliseconds" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static cv::Mat draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"Armoured vehicles",
|
||||
"Truck",
|
||||
"Passenger car",
|
||||
"Artillery",
|
||||
"Shadow of the vehicle",
|
||||
"Trenches",
|
||||
"Military",
|
||||
"Ramps",
|
||||
"Tank with additional protection",
|
||||
"Smoke"
|
||||
};
|
||||
|
||||
static const unsigned char colors[19][3] = {
|
||||
{54, 67, 244},
|
||||
{99, 30, 233},
|
||||
{176, 39, 156},
|
||||
{183, 58, 103},
|
||||
{181, 81, 63},
|
||||
{243, 150, 33},
|
||||
{244, 169, 3},
|
||||
{212, 188, 0},
|
||||
{136, 150, 0},
|
||||
{80, 175, 76},
|
||||
{74, 195, 139},
|
||||
{57, 220, 205},
|
||||
{59, 235, 255},
|
||||
{7, 193, 255},
|
||||
{0, 152, 255},
|
||||
{34, 87, 255},
|
||||
{72, 85, 121},
|
||||
{158, 158, 158},
|
||||
{139, 125, 96}
|
||||
};
|
||||
|
||||
int color_index = 0;
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
const unsigned char* color = colors[color_index % 19];
|
||||
color_index++;
|
||||
|
||||
cv::Scalar cc(color[0], color[1], color[2]);
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cc, 2);
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cc, -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferencevNcnn::performInferenceSlot(cv::Mat frame)
|
||||
{
|
||||
mActive = true;
|
||||
|
||||
cv::Mat scaledImage = resizeAndPad(frame);
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolov8(scaledImage, objects);
|
||||
|
||||
if (objects.empty() == false) {
|
||||
AiEngineInferenceResult result;
|
||||
result.frame = draw_objects(scaledImage, objects);
|
||||
|
||||
for (uint i = 0; i < objects.size(); i++) {
|
||||
const Object &detection = objects[i];
|
||||
AiEngineObject object;
|
||||
object.classId = detection.label;
|
||||
object.classStr = mClassNames[detection.label];
|
||||
object.propability = detection.prob;
|
||||
object.rectangle.top = detection.rect.y;
|
||||
object.rectangle.left = detection.rect.x;
|
||||
object.rectangle.bottom = detection.rect.y + detection.rect.height;
|
||||
object.rectangle.right = detection.rect.x + detection.rect.width;
|
||||
result.objects.append(object);
|
||||
}
|
||||
|
||||
emit resultsReady(result);
|
||||
}
|
||||
|
||||
mActive = false;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferencevNcnn::initialize(int number)
|
||||
{
|
||||
(void)number;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <ncnn/net.h>
|
||||
#include "aiengineinference.h"
|
||||
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
|
||||
class AiEngineInferencevNcnn : public AiEngineInference
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineInferencevNcnn(QString modelPath, QObject *parent = nullptr);
|
||||
void initialize(int number);
|
||||
|
||||
public slots:
|
||||
void performInferenceSlot(cv::Mat frame) override;
|
||||
|
||||
private:
|
||||
int detect_yolov8(const cv::Mat& bgr, std::vector<Object>& objects);
|
||||
ncnn::Net yolov8;
|
||||
};
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
#include <vector>
|
||||
#include "aiengineinferenceonnxruntime.h"
|
||||
|
||||
|
||||
static const float confThreshold = 0.25f;
|
||||
static const float iouThreshold = 0.45f;
|
||||
static const float maskThreshold = 0.45f;
|
||||
|
||||
|
||||
AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) :
|
||||
AiEngineInference{modelPath, parent},
|
||||
mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold)
|
||||
{
|
||||
qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath;
|
||||
qDebug() << "AiEngineInferencevOnnxRuntime() mClassNames.size() =" << mClassNames.size();
|
||||
}
|
||||
|
||||
|
||||
cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections)
|
||||
{
|
||||
cv::Mat result = image.clone();
|
||||
|
||||
for (const auto &detection : detections)
|
||||
{
|
||||
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
|
||||
|
||||
int confidence = roundf(detection.conf * 100);
|
||||
std::string label = mClassNames[detection.classId].toStdString() + ": " + std::to_string(confidence) + "%";
|
||||
|
||||
int baseLine;
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_COMPLEX, 0.5, 1, &baseLine);
|
||||
cv::Point labelOrigin(detection.box.x, detection.box.y - labelSize.height - baseLine);
|
||||
|
||||
cv::rectangle(
|
||||
result,
|
||||
labelOrigin,
|
||||
cv::Point(detection.box.x + labelSize.width, detection.box.y),
|
||||
cv::Scalar(255, 255, 255),
|
||||
cv::FILLED);
|
||||
|
||||
cv::putText(
|
||||
result,
|
||||
label,
|
||||
cv::Point(detection.box.x, detection.box.y - baseLine + 2),
|
||||
cv::FONT_HERSHEY_COMPLEX,
|
||||
0.5,
|
||||
cv::Scalar(0, 0, 0),
|
||||
1,
|
||||
cv::LINE_AA);
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame)
|
||||
{
|
||||
qDebug() << __PRETTY_FUNCTION__;
|
||||
|
||||
try {
|
||||
mActive = true;
|
||||
cv::Mat scaledImage = resizeAndPad(frame);
|
||||
std::vector<Yolov8Result> detections = mPredictor.predict(scaledImage);
|
||||
|
||||
#ifdef YOLO_ONNX
|
||||
// Only keep following detected objects.
|
||||
// car = 2
|
||||
// train = 6
|
||||
// cup = 41
|
||||
// banana = 46
|
||||
auto it = std::remove_if(detections.begin(), detections.end(),
|
||||
[](const Yolov8Result& result) {
|
||||
return result.classId != 2 &&
|
||||
result.classId != 6 &&
|
||||
result.classId != 41 &&
|
||||
result.classId != 46;
|
||||
});
|
||||
detections.erase(it, detections.end());
|
||||
#endif
|
||||
|
||||
AiEngineInferenceResult result;
|
||||
|
||||
for (uint i = 0; i < detections.size(); i++) {
|
||||
const Yolov8Result &detection = detections[i];
|
||||
|
||||
if (detection.classId >= mClassNames.size()) {
|
||||
qDebug() << "performInferenceSlot() invalid classId =" << detection.classId;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
cv::imwrite("scaledImage.png", scaledImage);
|
||||
}
|
||||
|
||||
// Add detected objects to the results
|
||||
AiEngineObject object;
|
||||
object.classId = detection.classId;
|
||||
object.classStr = mClassNames[detection.classId];
|
||||
object.propability = detection.conf;
|
||||
object.rectangle.top = detection.box.y;
|
||||
object.rectangle.left = detection.box.x;
|
||||
object.rectangle.bottom = detection.box.y + detection.box.height;
|
||||
object.rectangle.right = detection.box.x + detection.box.width;
|
||||
result.objects.append(object);
|
||||
}
|
||||
|
||||
if (result.objects.empty() == false) {
|
||||
result.frame = drawLabels(scaledImage, detections);
|
||||
emit resultsReady(result);
|
||||
}
|
||||
|
||||
mActive = false;
|
||||
}
|
||||
catch (const cv::Exception& e) {
|
||||
std::cout << "OpenCV exception caught: " << e.what() << std::endl;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cout << "Standard exception caught: " << e.what() << std::endl;
|
||||
}
|
||||
catch (...) {
|
||||
std::cout << "Unknown exception caught" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferencevOnnxRuntime::initialize(int number)
|
||||
{
|
||||
(void)number;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include "aiengineinference.h"
|
||||
#include "yolov8Predictor.h"
|
||||
|
||||
class AiEngineInferencevOnnxRuntime : public AiEngineInference
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr);
|
||||
void initialize(int number);
|
||||
|
||||
public slots:
|
||||
void performInferenceSlot(cv::Mat frame) override;
|
||||
|
||||
private:
|
||||
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections);
|
||||
YOLOPredictor mPredictor;
|
||||
};
|
||||
@@ -0,0 +1,167 @@
|
||||
#include "utils.h"
|
||||
|
||||
size_t utils::vectorProduct(const std::vector<int64_t> &vector)
|
||||
{
|
||||
if (vector.empty())
|
||||
return 0;
|
||||
|
||||
size_t product = 1;
|
||||
for (const auto &element : vector)
|
||||
product *= element;
|
||||
|
||||
return product;
|
||||
}
|
||||
|
||||
std::wstring utils::charToWstring(const char *str)
|
||||
{
|
||||
typedef std::codecvt_utf8<wchar_t> convert_type;
|
||||
std::wstring_convert<convert_type, wchar_t> converter;
|
||||
|
||||
return converter.from_bytes(str);
|
||||
}
|
||||
|
||||
std::vector<std::string> utils::loadNames(const std::string &path)
|
||||
{
|
||||
// load class names
|
||||
std::vector<std::string> classNames;
|
||||
std::ifstream infile(path);
|
||||
if (infile.good())
|
||||
{
|
||||
std::string line;
|
||||
while (getline(infile, line))
|
||||
{
|
||||
if (line.back() == '\r')
|
||||
line.pop_back();
|
||||
classNames.emplace_back(line);
|
||||
}
|
||||
infile.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "ERROR: Failed to access class name path: " << path << std::endl;
|
||||
}
|
||||
// set color
|
||||
srand(time(0));
|
||||
|
||||
for (int i = 0; i < (int)(2 * classNames.size()); i++)
|
||||
{
|
||||
int b = rand() % 256;
|
||||
int g = rand() % 256;
|
||||
int r = rand() % 256;
|
||||
colors.push_back(cv::Scalar(b, g, r));
|
||||
}
|
||||
return classNames;
|
||||
}
|
||||
|
||||
void utils::visualizeDetection(cv::Mat &im, std::vector<Yolov8Result> &results,
|
||||
const std::vector<std::string> &classNames)
|
||||
{
|
||||
cv::Mat image = im.clone();
|
||||
for (const Yolov8Result &result : results)
|
||||
{
|
||||
|
||||
int x = result.box.x;
|
||||
int y = result.box.y;
|
||||
|
||||
int conf = (int)std::round(result.conf * 100);
|
||||
int classId = result.classId;
|
||||
std::string label = classNames[classId] + " 0." + std::to_string(conf);
|
||||
|
||||
int baseline = 0;
|
||||
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline);
|
||||
image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
|
||||
cv::rectangle(image, result.box, colors[classId], 2);
|
||||
cv::rectangle(image,
|
||||
cv::Point(x, y), cv::Point(x + size.width, y + 12),
|
||||
colors[classId], -1);
|
||||
cv::putText(image, label,
|
||||
cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
|
||||
0.4, cv::Scalar(0, 0, 0), 1);
|
||||
}
|
||||
cv::addWeighted(im, 0.4, image, 0.6, 0, im);
|
||||
}
|
||||
|
||||
void utils::letterbox(const cv::Mat &image, cv::Mat &outImage,
|
||||
const cv::Size &newShape = cv::Size(640, 640),
|
||||
const cv::Scalar &color = cv::Scalar(114, 114, 114),
|
||||
bool auto_ = true,
|
||||
bool scaleFill = false,
|
||||
bool scaleUp = true,
|
||||
int stride = 32)
|
||||
{
|
||||
cv::Size shape = image.size();
|
||||
float r = std::min((float)newShape.height / (float)shape.height,
|
||||
(float)newShape.width / (float)shape.width);
|
||||
if (!scaleUp)
|
||||
r = std::min(r, 1.0f);
|
||||
|
||||
float ratio[2]{r, r};
|
||||
|
||||
int newUnpad[2]{(int)std::round((float)shape.width * r),
|
||||
(int)std::round((float)shape.height * r)};
|
||||
|
||||
auto dw = (float)(newShape.width - newUnpad[0]);
|
||||
auto dh = (float)(newShape.height - newUnpad[1]);
|
||||
|
||||
if (auto_)
|
||||
{
|
||||
dw = (float)((int)dw % stride);
|
||||
dh = (float)((int)dh % stride);
|
||||
}
|
||||
else if (scaleFill)
|
||||
{
|
||||
dw = 0.0f;
|
||||
dh = 0.0f;
|
||||
newUnpad[0] = newShape.width;
|
||||
newUnpad[1] = newShape.height;
|
||||
ratio[0] = (float)newShape.width / (float)shape.width;
|
||||
ratio[1] = (float)newShape.height / (float)shape.height;
|
||||
}
|
||||
|
||||
dw /= 2.0f;
|
||||
dh /= 2.0f;
|
||||
|
||||
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
|
||||
{
|
||||
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
||||
}
|
||||
|
||||
int top = int(std::round(dh - 0.1f));
|
||||
int bottom = int(std::round(dh + 0.1f));
|
||||
int left = int(std::round(dw - 0.1f));
|
||||
int right = int(std::round(dw + 0.1f));
|
||||
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
||||
}
|
||||
|
||||
void utils::scaleCoords(cv::Rect &coords,
|
||||
cv::Mat &mask,
|
||||
const float maskThreshold,
|
||||
const cv::Size &imageShape,
|
||||
const cv::Size &imageOriginalShape)
|
||||
{
|
||||
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
|
||||
(float)imageShape.width / (float)imageOriginalShape.width);
|
||||
|
||||
int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
|
||||
(int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
|
||||
|
||||
coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
|
||||
coords.x = std::max(0, coords.x);
|
||||
coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
|
||||
coords.y = std::max(0, coords.y);
|
||||
|
||||
coords.width = (int)std::round(((float)coords.width / gain));
|
||||
coords.width = std::min(coords.width, imageOriginalShape.width - coords.x);
|
||||
coords.height = (int)std::round(((float)coords.height / gain));
|
||||
coords.height = std::min(coords.height, imageOriginalShape.height - coords.y);
|
||||
mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1]));
|
||||
|
||||
cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR);
|
||||
|
||||
mask = mask(coords) > maskThreshold;
|
||||
}
|
||||
template <typename T>
|
||||
T utils::clip(const T &n, const T &lower, const T &upper)
|
||||
{
|
||||
return std::max(lower, std::min(n, upper));
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
#include <codecvt>
|
||||
#include <fstream>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
struct Yolov8Result
|
||||
{
|
||||
cv::Rect box;
|
||||
cv::Mat boxMask; // mask in box
|
||||
float conf{};
|
||||
int classId{};
|
||||
};
|
||||
|
||||
namespace utils
|
||||
{
|
||||
static std::vector<cv::Scalar> colors;
|
||||
|
||||
size_t vectorProduct(const std::vector<int64_t> &vector);
|
||||
std::wstring charToWstring(const char *str);
|
||||
std::vector<std::string> loadNames(const std::string &path);
|
||||
void visualizeDetection(cv::Mat &image, std::vector<Yolov8Result> &results,
|
||||
const std::vector<std::string> &classNames);
|
||||
|
||||
void letterbox(const cv::Mat &image, cv::Mat &outImage,
|
||||
const cv::Size &newShape,
|
||||
const cv::Scalar &color,
|
||||
bool auto_,
|
||||
bool scaleFill,
|
||||
bool scaleUp,
|
||||
int stride);
|
||||
|
||||
void scaleCoords(cv::Rect &coords, cv::Mat &mask,
|
||||
const float maskThreshold,
|
||||
const cv::Size &imageShape, const cv::Size &imageOriginalShape);
|
||||
|
||||
template <typename T>
|
||||
T clip(const T &n, const T &lower, const T &upper);
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
#include "yolov8Predictor.h"
|
||||
|
||||
YOLOPredictor::YOLOPredictor(const std::string &modelPath,
|
||||
float confThreshold,
|
||||
float iouThreshold,
|
||||
float maskThreshold)
|
||||
{
|
||||
this->confThreshold = confThreshold;
|
||||
this->iouThreshold = iouThreshold;
|
||||
this->maskThreshold = maskThreshold;
|
||||
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
|
||||
sessionOptions = Ort::SessionOptions();
|
||||
|
||||
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
||||
std::cout << "Inference device: CPU" << std::endl;
|
||||
|
||||
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
|
||||
|
||||
const size_t num_input_nodes = session.GetInputCount(); //==1
|
||||
const size_t num_output_nodes = session.GetOutputCount(); //==1,2
|
||||
if (num_output_nodes > 1)
|
||||
{
|
||||
this->hasMask = true;
|
||||
std::cout << "Instance Segmentation" << std::endl;
|
||||
}
|
||||
else
|
||||
std::cout << "Object Detection" << std::endl;
|
||||
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
for (int i = 0; i < (int)num_input_nodes; i++)
|
||||
{
|
||||
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||
this->inputNames.push_back(input_name.get());
|
||||
input_names_ptr.push_back(std::move(input_name));
|
||||
|
||||
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
|
||||
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
||||
this->inputShapes.push_back(inputTensorShape);
|
||||
this->isDynamicInputShape = true;
|
||||
// checking if width and height are dynamic
|
||||
if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
|
||||
{
|
||||
std::cout << "Dynamic input shape" << std::endl;
|
||||
this->isDynamicInputShape = true;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < (int)num_output_nodes; i++)
|
||||
{
|
||||
auto output_name = session.GetOutputNameAllocated(i, allocator);
|
||||
this->outputNames.push_back(output_name.get());
|
||||
output_names_ptr.push_back(std::move(output_name));
|
||||
|
||||
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
|
||||
std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
||||
this->outputShapes.push_back(outputTensorShape);
|
||||
if (i == 0)
|
||||
{
|
||||
if (!this->hasMask)
|
||||
classNums = outputTensorShape[1] - 4;
|
||||
else
|
||||
classNums = outputTensorShape[1] - 4 - 32;
|
||||
}
|
||||
}
|
||||
// for (const char *x : this->inputNames)
|
||||
// {
|
||||
// std::cout << x << std::endl;
|
||||
// }
|
||||
// for (const char *x : this->outputNames)
|
||||
// {
|
||||
// std::cout << x << std::endl;
|
||||
// }
|
||||
// std::cout << classNums << std::endl;
|
||||
}
|
||||
|
||||
void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
|
||||
float &bestConf,
|
||||
int &bestClassId,
|
||||
const int _classNums)
|
||||
{
|
||||
// first 4 element are box
|
||||
bestClassId = 4;
|
||||
bestConf = 0;
|
||||
|
||||
for (int i = 4; i < _classNums + 4; i++)
|
||||
{
|
||||
if (it[i] > bestConf)
|
||||
{
|
||||
bestConf = it[i];
|
||||
bestClassId = i - 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
|
||||
const cv::Mat &maskProtos)
|
||||
{
|
||||
cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
|
||||
|
||||
cv::Mat matmul_res = (maskProposals * protos).t();
|
||||
cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
|
||||
cv::Mat dest;
|
||||
|
||||
// sigmoid
|
||||
cv::exp(-masks, dest);
|
||||
dest = 1.0 / (1.0 + dest);
|
||||
cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
|
||||
return dest;
|
||||
}
|
||||
|
||||
void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape)
|
||||
{
|
||||
cv::Mat resizedImage, floatImage;
|
||||
cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB);
|
||||
utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
|
||||
cv::Scalar(114, 114, 114), this->isDynamicInputShape,
|
||||
false, true, 32);
|
||||
|
||||
inputTensorShape[2] = resizedImage.rows;
|
||||
inputTensorShape[3] = resizedImage.cols;
|
||||
|
||||
resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
|
||||
blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
|
||||
cv::Size floatImageSize{floatImage.cols, floatImage.rows};
|
||||
|
||||
// hwc -> chw
|
||||
std::vector<cv::Mat> chw(floatImage.channels());
|
||||
for (int i = 0; i < floatImage.channels(); ++i)
|
||||
{
|
||||
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
|
||||
}
|
||||
cv::split(floatImage, chw);
|
||||
}
|
||||
|
||||
std::vector<Yolov8Result> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
|
||||
const cv::Size &originalImageShape,
|
||||
std::vector<Ort::Value> &outputTensors)
|
||||
{
|
||||
|
||||
// for box
|
||||
std::vector<cv::Rect> boxes;
|
||||
std::vector<float> confs;
|
||||
std::vector<int> classIds;
|
||||
|
||||
float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
|
||||
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
|
||||
float *output0ptr = (float *)output0.data;
|
||||
int rows = (int)this->outputShapes[0][2];
|
||||
int cols = (int)this->outputShapes[0][1];
|
||||
// std::cout << rows << cols << std::endl;
|
||||
// if hasMask
|
||||
std::vector<std::vector<float>> picked_proposals;
|
||||
cv::Mat mask_protos;
|
||||
|
||||
for (int i = 0; i < rows; i++)
|
||||
{
|
||||
std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
|
||||
float confidence;
|
||||
int classId;
|
||||
this->getBestClassInfo(it.begin(), confidence, classId, classNums);
|
||||
|
||||
if (confidence > this->confThreshold)
|
||||
{
|
||||
if (this->hasMask)
|
||||
{
|
||||
std::vector<float> temp(it.begin() + 4 + classNums, it.end());
|
||||
picked_proposals.push_back(temp);
|
||||
}
|
||||
int centerX = (int)(it[0]);
|
||||
int centerY = (int)(it[1]);
|
||||
int width = (int)(it[2]);
|
||||
int height = (int)(it[3]);
|
||||
int left = centerX - width / 2;
|
||||
int top = centerY - height / 2;
|
||||
boxes.emplace_back(left, top, width, height);
|
||||
confs.emplace_back(confidence);
|
||||
classIds.emplace_back(classId);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> indices;
|
||||
cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices);
|
||||
|
||||
if (this->hasMask)
|
||||
{
|
||||
float *maskOutput = outputTensors[1].GetTensorMutableData<float>();
|
||||
std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
|
||||
mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
|
||||
}
|
||||
|
||||
std::vector<Yolov8Result> results;
|
||||
for (int idx : indices)
|
||||
{
|
||||
Yolov8Result res;
|
||||
res.box = cv::Rect(boxes[idx]);
|
||||
if (this->hasMask)
|
||||
res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos);
|
||||
else
|
||||
res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
|
||||
|
||||
utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape);
|
||||
res.conf = confs[idx];
|
||||
res.classId = classIds[idx];
|
||||
results.emplace_back(res);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
std::vector<Yolov8Result> YOLOPredictor::predict(cv::Mat &image)
|
||||
{
|
||||
float *blob = nullptr;
|
||||
std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
|
||||
this->preprocessing(image, blob, inputTensorShape);
|
||||
|
||||
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
|
||||
|
||||
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
|
||||
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
|
||||
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
||||
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
|
||||
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(
|
||||
memoryInfo, inputTensorValues.data(), inputTensorSize,
|
||||
inputTensorShape.data(), inputTensorShape.size()));
|
||||
|
||||
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
|
||||
this->inputNames.data(),
|
||||
inputTensors.data(),
|
||||
1,
|
||||
this->outputNames.data(),
|
||||
this->outputNames.size());
|
||||
|
||||
cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]);
|
||||
std::vector<Yolov8Result> result = this->postprocessing(resizedShape,
|
||||
image.size(),
|
||||
outputTensors);
|
||||
|
||||
delete[] blob;
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <utility>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
class YOLOPredictor
|
||||
{
|
||||
public:
|
||||
explicit YOLOPredictor(std::nullptr_t){};
|
||||
YOLOPredictor(const std::string &modelPath,
|
||||
float confThreshold,
|
||||
float iouThreshold,
|
||||
float maskThreshold);
|
||||
// ~YOLOPredictor();
|
||||
std::vector<Yolov8Result> predict(cv::Mat &image);
|
||||
int classNums = 10;
|
||||
|
||||
private:
|
||||
Ort::Env env{nullptr};
|
||||
Ort::SessionOptions sessionOptions{nullptr};
|
||||
Ort::Session session{nullptr};
|
||||
|
||||
void preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape);
|
||||
std::vector<Yolov8Result> postprocessing(const cv::Size &resizedImageShape,
|
||||
const cv::Size &originalImageShape,
|
||||
std::vector<Ort::Value> &outputTensors);
|
||||
|
||||
static void getBestClassInfo(std::vector<float>::iterator it,
|
||||
float &bestConf,
|
||||
int &bestClassId,
|
||||
const int _classNums);
|
||||
cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos);
|
||||
bool isDynamicInputShape{};
|
||||
|
||||
std::vector<const char *> inputNames;
|
||||
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
|
||||
|
||||
std::vector<const char *> outputNames;
|
||||
std::vector<Ort::AllocatedStringPtr> output_names_ptr;
|
||||
|
||||
std::vector<std::vector<int64_t>> inputShapes;
|
||||
std::vector<std::vector<int64_t>> outputShapes;
|
||||
float confThreshold = 0.3f;
|
||||
float iouThreshold = 0.4f;
|
||||
|
||||
bool hasMask = false;
|
||||
float maskThreshold = 0.5f;
|
||||
};
|
||||
@@ -0,0 +1,67 @@
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
#include "aiengineinferenceopencvonnx.h"
|
||||
|
||||
|
||||
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
|
||||
: AiEngineInference{modelPath, parent},
|
||||
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
|
||||
{
|
||||
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
|
||||
//mEngine = new InferenceEngine(modelPath.toStdString());
|
||||
//mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
|
||||
{
|
||||
try {
|
||||
qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
|
||||
|
||||
mActive = true;
|
||||
|
||||
cv::Mat scaledImage = resizeAndPad(frame);
|
||||
//cv::imwrite("/tmp/frame.png", scaledImage);
|
||||
std::vector<Detection> detections = mInference.runInference(scaledImage);
|
||||
AiEngineInferenceResult result;
|
||||
|
||||
qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
|
||||
|
||||
for (uint i = 0; i < detections.size(); ++i) {
|
||||
const Detection &detection = detections[i];
|
||||
|
||||
// Add detected objects to the results
|
||||
AiEngineObject object;
|
||||
object.classId = detection.class_id;
|
||||
object.propability = detection.confidence;
|
||||
object.rectangle.top = detection.box.y;
|
||||
object.rectangle.left = detection.box.x;
|
||||
object.rectangle.bottom = detection.box.y + detection.box.height;
|
||||
object.rectangle.right = detection.box.x + detection.box.width;
|
||||
result.objects.append(object);
|
||||
|
||||
//qDebug() << "performInferenceSlot()" << object.rectangle.top << object.rectangle.left << "and" << object.rectangle.bottom << object.rectangle.right;
|
||||
}
|
||||
|
||||
auto end = std::remove_if(detections.begin(), detections.end(), [](const Detection& detection) {
|
||||
return detection.class_id != 0;
|
||||
});
|
||||
detections.erase(end, detections.end());
|
||||
|
||||
if (result.objects.empty() == false) {
|
||||
result.frame = mInference.drawLabels(scaledImage, detections);
|
||||
emit resultsReady(result);
|
||||
}
|
||||
|
||||
mActive = false;
|
||||
}
|
||||
catch (const cv::Exception& e) {
|
||||
std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpencvOnnx::initialize(int number)
|
||||
{
|
||||
(void)number;
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include "aiengineinference.h"
|
||||
#include "src-opencv-onnx/inference.h"
|
||||
|
||||
class AiEngineInferenceOpencvOnnx : public AiEngineInference
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
|
||||
void initialize(int number);
|
||||
|
||||
public slots:
|
||||
void performInferenceSlot(cv::Mat frame) override;
|
||||
|
||||
private:
|
||||
Inference mInference;
|
||||
};
|
||||
@@ -0,0 +1,227 @@
|
||||
#include "inference.h"
|
||||
|
||||
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
|
||||
{
|
||||
modelPath = onnxModelPath;
|
||||
modelShape = modelInputShape;
|
||||
classesPath = classesTxtFile;
|
||||
cudaEnabled = runWithCuda;
|
||||
|
||||
std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
|
||||
loadOnnxNetwork();
|
||||
// loadClassesFromFile(); The classes are hard-coded for this example
|
||||
}
|
||||
|
||||
std::vector<Detection> Inference::runInference(const cv::Mat &input)
|
||||
{
|
||||
cv::Mat modelInput = input;
|
||||
if (letterBoxForSquare && modelShape.width == modelShape.height)
|
||||
modelInput = formatToSquare(modelInput);
|
||||
|
||||
cv::Mat blob;
|
||||
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
|
||||
net.setInput(blob);
|
||||
|
||||
std::vector<cv::Mat> outputs;
|
||||
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
||||
|
||||
int rows = outputs[0].size[1];
|
||||
int dimensions = outputs[0].size[2];
|
||||
|
||||
bool yolov8 = true;
|
||||
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
||||
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
||||
/*
|
||||
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
|
||||
{
|
||||
std::cout << "yolov8 = " << yolov8 << std::endl;
|
||||
yolov8 = true;
|
||||
rows = outputs[0].size[2];
|
||||
dimensions = outputs[0].size[1];
|
||||
|
||||
outputs[0] = outputs[0].reshape(1, dimensions);
|
||||
cv::transpose(outputs[0], outputs[0]);
|
||||
}
|
||||
*/
|
||||
|
||||
float *data = (float *)outputs[0].data;
|
||||
|
||||
float x_factor = modelInput.cols / modelShape.width;
|
||||
float y_factor = modelInput.rows / modelShape.height;
|
||||
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
if (yolov8)
|
||||
{
|
||||
float *classes_scores = data+4;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double maxClassScore;
|
||||
|
||||
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||
|
||||
if (maxClassScore > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(maxClassScore);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
else // yolov5
|
||||
{
|
||||
float confidence = data[4];
|
||||
|
||||
if (confidence >= modelConfidenceThreshold)
|
||||
{
|
||||
float *classes_scores = data+5;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double max_class_score;
|
||||
|
||||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||||
|
||||
if (max_class_score > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(confidence);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data += dimensions;
|
||||
}
|
||||
|
||||
std::vector<int> nms_result;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
|
||||
|
||||
std::vector<Detection> detections{};
|
||||
for (unsigned long i = 0; i < nms_result.size(); ++i)
|
||||
{
|
||||
int idx = nms_result[i];
|
||||
|
||||
Detection result;
|
||||
result.class_id = class_ids[idx];
|
||||
result.confidence = confidences[idx];
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<int> dis(100, 255);
|
||||
result.color = cv::Scalar(dis(gen),
|
||||
dis(gen),
|
||||
dis(gen));
|
||||
|
||||
result.className = classes[result.class_id];
|
||||
result.box = boxes[idx];
|
||||
|
||||
detections.push_back(result);
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
void Inference::loadClassesFromFile()
|
||||
{
|
||||
std::ifstream inputFile(classesPath);
|
||||
if (inputFile.is_open())
|
||||
{
|
||||
std::string classLine;
|
||||
while (std::getline(inputFile, classLine))
|
||||
classes.push_back(classLine);
|
||||
inputFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
void Inference::loadOnnxNetwork()
|
||||
{
|
||||
printf("loadOnnxNetwork() starts\n");
|
||||
|
||||
net = cv::dnn::readNetFromONNX(modelPath);
|
||||
|
||||
if (cudaEnabled)
|
||||
{
|
||||
std::cout << "\nRunning on CUDA" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "\nRunning on CPU" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Inference::formatToSquare(const cv::Mat &source)
|
||||
{
|
||||
int col = source.cols;
|
||||
int row = source.rows;
|
||||
int _max = MAX(col, row);
|
||||
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
||||
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
||||
return result;
|
||||
}
|
||||
|
||||
cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
|
||||
{
|
||||
cv::Mat result = image.clone();
|
||||
|
||||
for (const auto &detection : detections)
|
||||
{
|
||||
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
|
||||
std::string label = detection.className + ": " + std::to_string(detection.confidence);
|
||||
|
||||
int baseLine;
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
cv::rectangle(
|
||||
result,
|
||||
cv::Point(detection.box.x, detection.box.y - labelSize.height),
|
||||
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
|
||||
cv::Scalar(255, 255, 255),
|
||||
cv::FILLED);
|
||||
|
||||
cv::putText(
|
||||
result,
|
||||
label,
|
||||
cv::Point(
|
||||
detection.box.x,
|
||||
detection.box.y),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
cv::Scalar(0, 0, 0),
|
||||
1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
// Cpp native
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <random>
|
||||
|
||||
// OpenCV / DNN / Inference
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int class_id{0};
|
||||
std::string className{};
|
||||
float confidence{0.0};
|
||||
cv::Scalar color{};
|
||||
cv::Rect box{};
|
||||
};
|
||||
|
||||
class Inference
|
||||
{
|
||||
public:
|
||||
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
|
||||
std::vector<Detection> runInference(const cv::Mat &input);
|
||||
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
|
||||
|
||||
private:
|
||||
void loadClassesFromFile();
|
||||
void loadOnnxNetwork();
|
||||
|
||||
cv::Mat formatToSquare(const cv::Mat &source);
|
||||
|
||||
std::string modelPath{};
|
||||
std::string classesPath{};
|
||||
bool cudaEnabled{};
|
||||
|
||||
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
|
||||
|
||||
cv::Size2f modelShape{};
|
||||
|
||||
float modelConfidenceThreshold {0.25};
|
||||
float modelScoreThreshold {0.45};
|
||||
float modelNMSThreshold {0.50};
|
||||
|
||||
bool letterBoxForSquare = false;
|
||||
|
||||
cv::dnn::Net net;
|
||||
};
|
||||
@@ -0,0 +1,175 @@
|
||||
#include <QDebug>
|
||||
#include <QThread>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <iomanip>
|
||||
#include "aiengineinferenceopi5.h"
|
||||
#include "file_utils.h"
|
||||
#include "image_drawing.h"
|
||||
|
||||
|
||||
AiEngineInferenceOpi5::AiEngineInferenceOpi5(QString modelPath, QObject *parent)
|
||||
: AiEngineInference{modelPath, parent}
|
||||
{
|
||||
qDebug() << "AiEngineInferenceOpi5() test mModelPath=" << mModelPath;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpi5::initialize(int number)
|
||||
{
|
||||
mNumber = number;
|
||||
|
||||
memset(&mRrknnAppCtx0, 0, sizeof(rknn_app_context_t));
|
||||
init_post_process();
|
||||
|
||||
int ret = init_yolov8_model(mModelPath.toLocal8Bit(), &mRrknnAppCtx0);
|
||||
if (ret != 0) {
|
||||
qDebug() << "init_yolov8_model() failure! ret: " << ret << "modelPath = " << mModelPath << "number:" << number;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AiEngineInferenceOpi5::~AiEngineInferenceOpi5()
|
||||
{
|
||||
deinit_post_process();
|
||||
release_yolov8_model(&mRrknnAppCtx0);
|
||||
}
|
||||
|
||||
|
||||
image_buffer_t AiEngineInferenceOpi5::convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame)
|
||||
{
|
||||
// Convert BGR to RGB
|
||||
cv::Mat rgbFrame;
|
||||
cv::cvtColor(bgrFrame, rgbFrame, cv::COLOR_BGR2RGB);
|
||||
|
||||
image_buffer_t imgBuffer;
|
||||
memset(&imgBuffer, 0, sizeof(image_buffer_t));
|
||||
imgBuffer.width = rgbFrame.cols;
|
||||
imgBuffer.height = rgbFrame.rows;
|
||||
imgBuffer.width_stride = rgbFrame.step;
|
||||
imgBuffer.height_stride = rgbFrame.rows;
|
||||
imgBuffer.format = IMAGE_FORMAT_RGB888;
|
||||
imgBuffer.size = rgbFrame.total() * rgbFrame.elemSize();
|
||||
|
||||
imgBuffer.virt_addr = new unsigned char[imgBuffer.size];
|
||||
std::memcpy(imgBuffer.virt_addr, rgbFrame.data, imgBuffer.size);
|
||||
|
||||
return imgBuffer;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpi5::freeImageBuffer(image_buffer_t& imgBuffer)
|
||||
{
|
||||
if (imgBuffer.virt_addr) {
|
||||
delete[] imgBuffer.virt_addr;
|
||||
imgBuffer.virt_addr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cv::Mat AiEngineInferenceOpi5::resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame)
|
||||
{
|
||||
/*
|
||||
// Resize input frame to half size
|
||||
cv::Mat resizedFrame;
|
||||
cv::resize(inputFrame, resizedFrame, cv::Size(), 0.5, 0.5);
|
||||
|
||||
// Create a 640x640 frame to place the resized frame
|
||||
cv::Mat outputFrame = cv::Mat::zeros(640, 640, inputFrame.type());
|
||||
|
||||
// Copy the resized frame to the top-left corner of the output frame
|
||||
cv::Rect roi(0, 0, resizedFrame.cols, resizedFrame.rows);
|
||||
resizedFrame.copyTo(outputFrame(roi));
|
||||
|
||||
return outputFrame;
|
||||
*/
|
||||
|
||||
const int targetWidth = 640;
|
||||
const int targetHeight = 640;
|
||||
float aspectRatio = static_cast<float>(inputFrame.cols) / static_cast<float>(inputFrame.rows);
|
||||
int newWidth = targetWidth;
|
||||
int newHeight = static_cast<int>(targetWidth / aspectRatio);
|
||||
if (newHeight > targetHeight) {
|
||||
newHeight = targetHeight;
|
||||
newWidth = static_cast<int>(targetHeight * aspectRatio);
|
||||
}
|
||||
|
||||
cv::Mat resizedFrame;
|
||||
cv::resize(inputFrame, resizedFrame, cv::Size(newWidth, newHeight));
|
||||
cv::Mat outputFrame = cv::Mat::zeros(targetHeight, targetWidth, inputFrame.type());
|
||||
cv::Rect roi(cv::Point(0, 0), resizedFrame.size());
|
||||
resizedFrame.copyTo(outputFrame(roi));
|
||||
|
||||
return outputFrame;
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpi5::drawObjects(cv::Mat& image, const object_detect_result_list& result_list)
|
||||
{
|
||||
for (int i = 0; i < result_list.count; i++) {
|
||||
const object_detect_result& result = result_list.results[i];
|
||||
|
||||
if (result.cls_id >= mClassNames.size()) {
|
||||
//result.cls_id = result.cls_id % mClassNames.size();
|
||||
qDebug() << "Class id >= mClassNames.size() Reducing it.";
|
||||
//continue;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Inference[%d] probability = %f\n", i, result.prop * 100);
|
||||
|
||||
int left = result.box.left;
|
||||
int top = result.box.top;
|
||||
int right = result.box.right;
|
||||
int bottom = result.box.bottom;
|
||||
|
||||
cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
|
||||
|
||||
// Text
|
||||
char c_text[256];
|
||||
//sprintf(c_text, "%s %d%%", coco_cls_to_name(result.cls_id), (int)(round(result.prop * 100)));
|
||||
sprintf(c_text, "%s %d%%", mClassNames[result.cls_id % mClassNames.size()].toStdString().c_str(), (int)(round(result.prop * 100)));
|
||||
cv::Point textOrg(left, top - 5);
|
||||
cv::putText(image, std::string(c_text), textOrg, cv::FONT_HERSHEY_COMPLEX, 0.70, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AiEngineInferenceOpi5::performInferenceSlot(cv::Mat frame)
|
||||
{
|
||||
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
|
||||
|
||||
mActive = true;
|
||||
|
||||
cv::Mat scaledFrame = resizeToHalfAndAssigntoTopLeft640x640(frame);
|
||||
image_buffer_t imgBuffer = convertCV2FrameToImageBuffer(scaledFrame);
|
||||
|
||||
object_detect_result_list od_results;
|
||||
int ret = inference_yolov8_model(&mRrknnAppCtx0, &imgBuffer, &od_results, mNumber);
|
||||
freeImageBuffer(imgBuffer);
|
||||
if (ret != 0) {
|
||||
qDebug() << "AiEngineInferenceOpi5::performInferenceSlot() failure! ret: " << ret;
|
||||
mActive = false;
|
||||
return;
|
||||
}
|
||||
|
||||
AiEngineInferenceResult result;
|
||||
for (int i = 0; i < od_results.count; i++) {
|
||||
object_detect_result *det_result = &(od_results.results[i]);
|
||||
qDebug() << "____box:" << det_result->box.top << det_result->box.left << det_result->box.bottom << det_result->box.right << "\n";
|
||||
AiEngineObject object;
|
||||
object.classId = det_result->cls_id;
|
||||
object.propability = det_result->prop;
|
||||
object.rectangle.top = det_result->box.top;
|
||||
object.rectangle.left = det_result->box.left;
|
||||
object.rectangle.bottom = det_result->box.bottom;
|
||||
object.rectangle.right = det_result->box.right;
|
||||
result.objects.append(object);
|
||||
}
|
||||
|
||||
drawObjects(scaledFrame, od_results);
|
||||
result.frame = scaledFrame.clone();
|
||||
emit resultsReady(result);
|
||||
|
||||
mActive = false;
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include "aiengineinference.h"
|
||||
#include "yolov8.h"
|
||||
#include "image_utils.h"
|
||||
|
||||
class AiEngineInferenceOpi5 : public AiEngineInference
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit AiEngineInferenceOpi5(QString modelPath, QObject *parent = nullptr);
|
||||
~AiEngineInferenceOpi5();
|
||||
void initialize(int number) override;
|
||||
|
||||
public slots:
|
||||
void performInferenceSlot(cv::Mat frame) override;
|
||||
|
||||
private:
|
||||
image_buffer_t convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame);
|
||||
void freeImageBuffer(image_buffer_t& imgBuffer);
|
||||
cv::Mat resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame);
|
||||
void drawObjects(cv::Mat& image, const object_detect_result_list& result_list);
|
||||
|
||||
rknn_app_context_t mRrknnAppCtx0;
|
||||
};
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 602 KiB |
@@ -0,0 +1,42 @@
|
||||
#ifndef _RKNN_MODEL_ZOO_COMMON_H_
|
||||
#define _RKNN_MODEL_ZOO_COMMON_H_
|
||||
|
||||
/**
|
||||
* @brief Image pixel format
|
||||
*
|
||||
*/
|
||||
typedef enum {
|
||||
IMAGE_FORMAT_GRAY8,
|
||||
IMAGE_FORMAT_RGB888,
|
||||
IMAGE_FORMAT_RGBA8888,
|
||||
IMAGE_FORMAT_YUV420SP_NV21,
|
||||
IMAGE_FORMAT_YUV420SP_NV12,
|
||||
} image_format_t;
|
||||
|
||||
/**
|
||||
* @brief Image buffer
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
int width;
|
||||
int height;
|
||||
int width_stride;
|
||||
int height_stride;
|
||||
image_format_t format;
|
||||
unsigned char* virt_addr;
|
||||
int size;
|
||||
int fd;
|
||||
} image_buffer_t;
|
||||
|
||||
/**
|
||||
* @brief Image rectangle
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
int left;
|
||||
int top;
|
||||
int right;
|
||||
int bottom;
|
||||
} image_rect_t;
|
||||
|
||||
#endif //_RKNN_MODEL_ZOO_COMMON_H_
|
||||
@@ -0,0 +1,129 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_TEXT_LINE_LENGTH 1024
|
||||
|
||||
unsigned char* load_model(const char* filename, int* model_size)
|
||||
{
|
||||
FILE* fp = fopen(filename, "rb");
|
||||
if (fp == NULL) {
|
||||
printf("fopen %s fail!\n", filename);
|
||||
return NULL;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
int model_len = ftell(fp);
|
||||
unsigned char* model = (unsigned char*)malloc(model_len);
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
if (model_len != fread(model, 1, model_len, fp)) {
|
||||
printf("fread %s fail!\n", filename);
|
||||
free(model);
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
*model_size = model_len;
|
||||
fclose(fp);
|
||||
return model;
|
||||
}
|
||||
|
||||
int read_data_from_file(const char *path, char **out_data)
|
||||
{
|
||||
FILE *fp = fopen(path, "rb");
|
||||
if(fp == NULL) {
|
||||
printf("fopen %s fail!\n", path);
|
||||
return -1;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
int file_size = ftell(fp);
|
||||
char *data = (char *)malloc(file_size+1);
|
||||
data[file_size] = 0;
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
if(file_size != fread(data, 1, file_size, fp)) {
|
||||
printf("fread %s fail!\n", path);
|
||||
free(data);
|
||||
fclose(fp);
|
||||
return -1;
|
||||
}
|
||||
if(fp) {
|
||||
fclose(fp);
|
||||
}
|
||||
*out_data = data;
|
||||
return file_size;
|
||||
}
|
||||
|
||||
int write_data_to_file(const char *path, const char *data, unsigned int size)
|
||||
{
|
||||
FILE *fp;
|
||||
|
||||
fp = fopen(path, "w");
|
||||
if(fp == NULL) {
|
||||
printf("open error: %s\n", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fwrite(data, 1, size, fp);
|
||||
fflush(fp);
|
||||
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int count_lines(FILE* file)
|
||||
{
|
||||
int count = 0;
|
||||
char ch;
|
||||
|
||||
while(!feof(file))
|
||||
{
|
||||
ch = fgetc(file);
|
||||
if(ch == '\n')
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
count += 1;
|
||||
|
||||
rewind(file);
|
||||
return count;
|
||||
}
|
||||
|
||||
char** read_lines_from_file(const char* filename, int* line_count)
|
||||
{
|
||||
FILE* file = fopen(filename, "r");
|
||||
if (file == NULL) {
|
||||
printf("Failed to open the file.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int num_lines = count_lines(file);
|
||||
printf("num_lines=%d\n", num_lines);
|
||||
char** lines = (char**)malloc(num_lines * sizeof(char*));
|
||||
memset(lines, 0, num_lines * sizeof(char*));
|
||||
|
||||
char buffer[MAX_TEXT_LINE_LENGTH];
|
||||
int line_index = 0;
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), file) != NULL) {
|
||||
buffer[strcspn(buffer, "\n")] = '\0'; // 移除换行符
|
||||
|
||||
lines[line_index] = (char*)malloc(strlen(buffer) + 1);
|
||||
strcpy(lines[line_index], buffer);
|
||||
|
||||
line_index++;
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
|
||||
*line_count = num_lines;
|
||||
return lines;
|
||||
}
|
||||
|
||||
void free_lines(char** lines, int line_count)
|
||||
{
|
||||
for (int i = 0; i < line_count; i++) {
|
||||
if (lines[i] != NULL) {
|
||||
free(lines[i]);
|
||||
}
|
||||
}
|
||||
free(lines);
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _RKNN_MODEL_ZOO_FILE_UTILS_H_
|
||||
#define _RKNN_MODEL_ZOO_FILE_UTILS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Read data from file
|
||||
*
|
||||
* @param path [in] File path
|
||||
* @param out_data [out] Read data
|
||||
* @return int -1: error; > 0: Read data size
|
||||
*/
|
||||
int read_data_from_file(const char *path, char **out_data);
|
||||
|
||||
/**
|
||||
* @brief Write data to file
|
||||
*
|
||||
* @param path [in] File path
|
||||
* @param data [in] Write data
|
||||
* @param size [in] Write data size
|
||||
* @return int 0: success; -1: error
|
||||
*/
|
||||
int write_data_to_file(const char *path, const char *data, unsigned int size);
|
||||
|
||||
/**
|
||||
* @brief Read all lines from text file
|
||||
*
|
||||
* @param path [in] File path
|
||||
* @param line_count [out] File line count
|
||||
* @return char** String array of all lines, remeber call free_lines() to release after used
|
||||
*/
|
||||
char** read_lines_from_file(const char* path, int* line_count);
|
||||
|
||||
/**
|
||||
* @brief Free lines string array
|
||||
*
|
||||
* @param lines [in] String array
|
||||
* @param line_count [in] Line count
|
||||
*/
|
||||
void free_lines(char** lines, int line_count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif //_RKNN_MODEL_ZOO_FILE_UTILS_H_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,89 @@
|
||||
#ifndef _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
|
||||
#define _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// Color Format ARGB8888
|
||||
#define COLOR_GREEN 0xFF00FF00
|
||||
#define COLOR_BLUE 0xFF0000FF
|
||||
#define COLOR_RED 0xFFFF0000
|
||||
#define COLOR_YELLOW 0xFFFFFF00
|
||||
#define COLOR_ORANGE 0xFFFF4500
|
||||
#define COLOR_BLACK 0xFF000000
|
||||
#define COLOR_WHITE 0xFFFFFFFF
|
||||
|
||||
/**
|
||||
* @brief Draw rectangle
|
||||
*
|
||||
* @param image [in] Image buffer
|
||||
* @param rx [in] Rectangle top left x
|
||||
* @param ry [in] Rectangle top left y
|
||||
* @param rw [in] Rectangle width
|
||||
* @param rh [in] Rectangle height
|
||||
* @param color [in] Rectangle line color
|
||||
* @param thickness [in] Rectangle line thickness
|
||||
*/
|
||||
void draw_rectangle(image_buffer_t* image, int rx, int ry, int rw, int rh, unsigned int color,
|
||||
int thickness);
|
||||
|
||||
/**
|
||||
* @brief Draw line
|
||||
*
|
||||
* @param image [in] Image buffer
|
||||
* @param x0 [in] Line begin point x
|
||||
* @param y0 [in] Line begin point y
|
||||
* @param x1 [in] Line end point x
|
||||
* @param y1 [in] Line end point y
|
||||
* @param color [in] Line color
|
||||
* @param thickness [in] Line thickness
|
||||
*/
|
||||
void draw_line(image_buffer_t* image, int x0, int y0, int x1, int y1, unsigned int color,
|
||||
int thickness);
|
||||
|
||||
/**
|
||||
* @brief Draw text (only support ASCII char)
|
||||
*
|
||||
* @param image [in] Image buffer
|
||||
* @param text [in] Text
|
||||
* @param x [in] Text position x
|
||||
* @param y [in] Text position y
|
||||
* @param color [in] Text color
|
||||
* @param fontsize [in] Text fontsize
|
||||
*/
|
||||
void draw_text(image_buffer_t* image, const char* text, int x, int y, unsigned int color,
|
||||
int fontsize);
|
||||
|
||||
/**
|
||||
* @brief Draw circle
|
||||
*
|
||||
* @param image [in] Image buffer
|
||||
* @param cx [in] Circle center x
|
||||
* @param cy [in] Circle center y
|
||||
* @param radius [in] Circle radius
|
||||
* @param color [in] Circle color
|
||||
* @param thickness [in] Circle thickness
|
||||
*/
|
||||
void draw_circle(image_buffer_t* image, int cx, int cy, int radius, unsigned int color,
|
||||
int thickness);
|
||||
|
||||
/**
|
||||
* @brief Draw image
|
||||
*
|
||||
* @param image [in] Target Image buffer
|
||||
* @param draw_img [in] Image for drawing
|
||||
* @param x [in] Target Image draw position x
|
||||
* @param y [in] Target Image draw position y
|
||||
* @param rw [in] Width of image for drawing
|
||||
* @param rh [in] Height of image for drawing
|
||||
*/
|
||||
void draw_image(image_buffer_t* image, unsigned char* draw_img, int x, int y, int rw, int rh);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
|
||||
@@ -0,0 +1,782 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <dirent.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "im2d.h"
|
||||
#include "drmrga.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STBI_NO_THREAD_LOCALS
|
||||
#define STBI_ONLY_JPEG
|
||||
#define STBI_ONLY_PNG
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#include "turbojpeg.h"
|
||||
|
||||
#include "image_utils.h"
|
||||
#include "file_utils.h"
|
||||
|
||||
static const char* filter_image_names[] = {
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"JPG",
|
||||
"JPEG",
|
||||
"png",
|
||||
"PNG",
|
||||
"data",
|
||||
NULL
|
||||
};
|
||||
|
||||
static const char* subsampName[TJ_NUMSAMP] = {"4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1"};
|
||||
|
||||
static const char* colorspaceName[TJ_NUMCS] = {"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"};
|
||||
|
||||
static int image_file_filter(const struct dirent *entry)
|
||||
{
|
||||
const char ** filter;
|
||||
|
||||
for (filter = filter_image_names; *filter; ++filter) {
|
||||
if(strstr(entry->d_name, *filter) != NULL) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_image_jpeg(const char* path, image_buffer_t* image)
|
||||
{
|
||||
FILE* jpegFile = NULL;
|
||||
unsigned long jpegSize;
|
||||
int flags = 0;
|
||||
int width, height;
|
||||
int origin_width, origin_height;
|
||||
unsigned char* imgBuf = NULL;
|
||||
unsigned char* jpegBuf = NULL;
|
||||
unsigned long size;
|
||||
unsigned short orientation = 1;
|
||||
struct timeval tv1, tv2;
|
||||
|
||||
if ((jpegFile = fopen(path, "rb")) == NULL) {
|
||||
printf("open input file failure\n");
|
||||
}
|
||||
if (fseek(jpegFile, 0, SEEK_END) < 0 || (size = ftell(jpegFile)) < 0 || fseek(jpegFile, 0, SEEK_SET) < 0) {
|
||||
printf("determining input file size failure\n");
|
||||
}
|
||||
if (size == 0) {
|
||||
printf("determining input file size, Input file contains no data\n");
|
||||
}
|
||||
jpegSize = (unsigned long)size;
|
||||
if ((jpegBuf = (unsigned char*)malloc(jpegSize * sizeof(unsigned char))) == NULL) {
|
||||
printf("allocating JPEG buffer\n");
|
||||
}
|
||||
if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) {
|
||||
printf("reading input file");
|
||||
}
|
||||
fclose(jpegFile);
|
||||
jpegFile = NULL;
|
||||
|
||||
tjhandle handle = NULL;
|
||||
int subsample, colorspace;
|
||||
int padding = 1;
|
||||
int ret = 0;
|
||||
|
||||
handle = tjInitDecompress();
|
||||
ret = tjDecompressHeader3(handle, jpegBuf, size, &origin_width, &origin_height, &subsample, &colorspace);
|
||||
if (ret < 0) {
|
||||
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
|
||||
return -1;
|
||||
}
|
||||
|
||||
// 对图像做裁剪16对齐,利于后续rga操作
|
||||
int crop_width = origin_width / 16 * 16;
|
||||
int crop_height = origin_height / 16 * 16;
|
||||
|
||||
printf("origin size=%dx%d crop size=%dx%d\n", origin_width, origin_height, crop_width, crop_height);
|
||||
|
||||
// gettimeofday(&tv1, NULL);
|
||||
ret = tjDecompressHeader3(handle, jpegBuf, size, &width, &height, &subsample, &colorspace);
|
||||
if (ret < 0) {
|
||||
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
|
||||
return -1;
|
||||
}
|
||||
printf("input image: %d x %d, subsampling: %s, colorspace: %s, orientation: %d\n",
|
||||
width, height, subsampName[subsample], colorspaceName[colorspace], orientation);
|
||||
int sw_out_size = width * height * 3;
|
||||
unsigned char* sw_out_buf = image->virt_addr;
|
||||
if (sw_out_buf == NULL) {
|
||||
sw_out_buf = (unsigned char*)malloc(sw_out_size * sizeof(unsigned char));
|
||||
}
|
||||
if (sw_out_buf == NULL) {
|
||||
printf("sw_out_buf is NULL\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
flags |= 0;
|
||||
|
||||
// 错误码为0时,表示警告,错误码为-1时表示错误
|
||||
int pixelFormat = TJPF_RGB;
|
||||
ret = tjDecompress2(handle, jpegBuf, size, sw_out_buf, width, 0, height, pixelFormat, flags);
|
||||
// ret = tjDecompressToYUV2(handle, jpeg_buf, size, dst_buf, *width, padding, *height, flags);
|
||||
if ((0 != tjGetErrorCode(handle)) && (ret < 0)) {
|
||||
printf("error : decompress to yuv failed, errorStr:%s, errorCode:%d\n", tjGetErrorStr(),
|
||||
tjGetErrorCode(handle));
|
||||
goto out;
|
||||
}
|
||||
if ((0 == tjGetErrorCode(handle)) && (ret < 0)) {
|
||||
printf("warning : errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
|
||||
}
|
||||
tjDestroy(handle);
|
||||
// gettimeofday(&tv2, NULL);
|
||||
// printf("decode time %ld ms\n", (tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000);
|
||||
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
image->format = IMAGE_FORMAT_RGB888;
|
||||
image->virt_addr = sw_out_buf;
|
||||
image->size = sw_out_size;
|
||||
out:
|
||||
if (jpegBuf) {
|
||||
free(jpegBuf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_image_raw(const char* path, image_buffer_t* image)
|
||||
{
|
||||
FILE *fp = fopen(path, "rb");
|
||||
if(fp == NULL) {
|
||||
printf("fopen %s fail!\n", path);
|
||||
return -1;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
int file_size = ftell(fp);
|
||||
unsigned char *data = image->virt_addr;
|
||||
if (image->virt_addr == NULL) {
|
||||
data = (unsigned char *)malloc(file_size+1);
|
||||
}
|
||||
data[file_size] = 0;
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
if(file_size != fread(data, 1, file_size, fp)) {
|
||||
printf("fread %s fail!\n", path);
|
||||
free(data);
|
||||
return -1;
|
||||
}
|
||||
if(fp) {
|
||||
fclose(fp);
|
||||
}
|
||||
if (image->virt_addr == NULL) {
|
||||
image->virt_addr = data;
|
||||
image->size = file_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_image_jpeg(const char* path, int quality, const image_buffer_t* image)
|
||||
{
|
||||
int ret;
|
||||
int jpegSubsamp = TJSAMP_422;
|
||||
unsigned char* jpegBuf = NULL;
|
||||
unsigned long jpegSize = 0;
|
||||
int flags = 0;
|
||||
|
||||
const unsigned char* data = image->virt_addr;
|
||||
int width = image->width;
|
||||
int height = image->height;
|
||||
int pixelFormat = TJPF_RGB;
|
||||
|
||||
tjhandle handle = tjInitCompress();
|
||||
|
||||
if (image->format == IMAGE_FORMAT_RGB888) {
|
||||
ret = tjCompress2(handle, data, width, 0, height, pixelFormat, &jpegBuf, &jpegSize, jpegSubsamp, quality, flags);
|
||||
} else {
|
||||
printf("write_image_jpeg: pixel format %d not support\n", image->format);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// printf("ret=%d jpegBuf=%p jpegSize=%d\n", ret, jpegBuf, jpegSize);
|
||||
if (jpegBuf != NULL && jpegSize > 0) {
|
||||
write_data_to_file(path, (const char*)jpegBuf, jpegSize);
|
||||
tjFree(jpegBuf);
|
||||
}
|
||||
tjDestroy(handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_image_stb(const char* path, image_buffer_t* image)
|
||||
{
|
||||
// 默认图像为3通道
|
||||
int w, h, c;
|
||||
unsigned char* pixeldata = stbi_load(path, &w, &h, &c, 0);
|
||||
if (!pixeldata) {
|
||||
printf("error: read image %s fail\n", path);
|
||||
return -1;
|
||||
}
|
||||
// printf("load image wxhxc=%dx%dx%d path=%s\n", w, h, c, path);
|
||||
int size = w * h * c;
|
||||
|
||||
// 设置图像数据
|
||||
if (image->virt_addr != NULL) {
|
||||
memcpy(image->virt_addr, pixeldata, size);
|
||||
stbi_image_free(pixeldata);
|
||||
} else {
|
||||
image->virt_addr = pixeldata;
|
||||
}
|
||||
image->width = w;
|
||||
image->height = h;
|
||||
if (c == 4) {
|
||||
image->format = IMAGE_FORMAT_RGBA8888;
|
||||
} else if (c == 1) {
|
||||
image->format = IMAGE_FORMAT_GRAY8;
|
||||
} else {
|
||||
image->format = IMAGE_FORMAT_RGB888;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_image(const char* path, image_buffer_t* image)
|
||||
{
|
||||
const char* _ext = strrchr(path, '.');
|
||||
if (!_ext) {
|
||||
// missing extension
|
||||
return -666;
|
||||
}
|
||||
if (strcmp(_ext, ".data") == 0) {
|
||||
return read_image_raw(path, image);
|
||||
} else if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
|
||||
strcmp(_ext, ".JPEG") == 0) {
|
||||
return read_image_jpeg(path, image);
|
||||
} else {
|
||||
return read_image_stb(path, image);
|
||||
}
|
||||
}
|
||||
|
||||
int write_image(const char* path, const image_buffer_t* img)
|
||||
{
|
||||
int ret;
|
||||
int width = img->width;
|
||||
int height = img->height;
|
||||
int channel = 3;
|
||||
void* data = img->virt_addr;
|
||||
printf("write_image path: %s width=%d height=%d channel=%d data=%p\n",
|
||||
path, width, height, channel, data);
|
||||
|
||||
const char* _ext = strrchr(path, '.');
|
||||
if (!_ext) {
|
||||
// missing extension
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
|
||||
strcmp(_ext, ".JPEG") == 0) {
|
||||
int quality = 95;
|
||||
ret = write_image_jpeg(path, quality, img);
|
||||
} else if (strcmp(_ext, ".png") == 0 | strcmp(_ext, ".PNG") == 0) {
|
||||
ret = stbi_write_png(path, width, height, channel, data, 0);
|
||||
} else if (strcmp(_ext, ".data") == 0 | strcmp(_ext, ".DATA") == 0) {
|
||||
int size = get_image_size(img);
|
||||
ret = write_data_to_file(path, data, size);
|
||||
} else {
|
||||
// unknown extension type
|
||||
return -1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int crop_and_scale_image_c(int channel, unsigned char *src, int src_width, int src_height,
|
||||
int crop_x, int crop_y, int crop_width, int crop_height,
|
||||
unsigned char *dst, int dst_width, int dst_height,
|
||||
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
|
||||
if (dst == NULL) {
|
||||
printf("dst buffer is null\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
float x_ratio = (float)crop_width / (float)dst_box_width;
|
||||
float y_ratio = (float)crop_height / (float)dst_box_height;
|
||||
|
||||
// printf("src_width=%d src_height=%d crop_x=%d crop_y=%d crop_width=%d crop_height=%d\n",
|
||||
// src_width, src_height, crop_x, crop_y, crop_width, crop_height);
|
||||
// printf("dst_width=%d dst_height=%d dst_box_x=%d dst_box_y=%d dst_box_width=%d dst_box_height=%d\n",
|
||||
// dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
|
||||
// printf("channel=%d x_ratio=%f y_ratio=%f\n", channel, x_ratio, y_ratio);
|
||||
|
||||
// 从原图指定区域取数据,双线性缩放到目标指定区域
|
||||
for (int dst_y = dst_box_y; dst_y < dst_box_y + dst_box_height; dst_y++) {
|
||||
for (int dst_x = dst_box_x; dst_x < dst_box_x + dst_box_width; dst_x++) {
|
||||
int dst_x_offset = dst_x - dst_box_x;
|
||||
int dst_y_offset = dst_y - dst_box_y;
|
||||
|
||||
int src_x = (int)(dst_x_offset * x_ratio) + crop_x;
|
||||
int src_y = (int)(dst_y_offset * y_ratio) + crop_y;
|
||||
|
||||
float x_diff = (dst_x_offset * x_ratio) - (src_x - crop_x);
|
||||
float y_diff = (dst_y_offset * y_ratio) - (src_y - crop_y);
|
||||
|
||||
int index1 = src_y * src_width * channel + src_x * channel;
|
||||
int index2 = index1 + src_width * channel; // down
|
||||
if (src_y == src_height - 1) {
|
||||
// 如果到图像最下边缘,变成选择上面的像素
|
||||
index2 = index1 - src_width * channel;
|
||||
}
|
||||
int index3 = index1 + 1 * channel; // right
|
||||
int index4 = index2 + 1 * channel; // down right
|
||||
if (src_x == src_width - 1) {
|
||||
// 如果到图像最右边缘,变成选择左边的像素
|
||||
index3 = index1 - 1 * channel;
|
||||
index4 = index2 - 1 * channel;
|
||||
}
|
||||
|
||||
// printf("dst_x=%d dst_y=%d dst_x_offset=%d dst_y_offset=%d src_x=%d src_y=%d x_diff=%f y_diff=%f src index=%d %d %d %d\n",
|
||||
// dst_x, dst_y, dst_x_offset, dst_y_offset,
|
||||
// src_x, src_y, x_diff, y_diff,
|
||||
// index1, index2, index3, index4);
|
||||
|
||||
for (int c = 0; c < channel; c++) {
|
||||
unsigned char A = src[index1+c];
|
||||
unsigned char B = src[index3+c];
|
||||
unsigned char C = src[index2+c];
|
||||
unsigned char D = src[index4+c];
|
||||
|
||||
unsigned char pixel = (unsigned char)(
|
||||
A * (1 - x_diff) * (1 - y_diff) +
|
||||
B * x_diff * (1 - y_diff) +
|
||||
C * y_diff * (1 - x_diff) +
|
||||
D * x_diff * y_diff
|
||||
);
|
||||
|
||||
dst[(dst_y * dst_width + dst_x) * channel + c] = pixel;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crop_and_scale_image_yuv420sp(unsigned char *src, int src_width, int src_height,
|
||||
int crop_x, int crop_y, int crop_width, int crop_height,
|
||||
unsigned char *dst, int dst_width, int dst_height,
|
||||
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
|
||||
|
||||
unsigned char* src_y = src;
|
||||
unsigned char* src_uv = src + src_width * src_height;
|
||||
|
||||
unsigned char* dst_y = dst;
|
||||
unsigned char* dst_uv = dst + dst_width * dst_height;
|
||||
|
||||
crop_and_scale_image_c(1, src_y, src_width, src_height, crop_x, crop_y, crop_width, crop_height,
|
||||
dst_y, dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
|
||||
|
||||
crop_and_scale_image_c(2, src_uv, src_width / 2, src_height / 2, crop_x / 2, crop_y / 2, crop_width / 2, crop_height / 2,
|
||||
dst_uv, dst_width / 2, dst_height / 2, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int convert_image_cpu(image_buffer_t *src, image_buffer_t *dst, image_rect_t *src_box, image_rect_t *dst_box, char color) {
|
||||
int ret;
|
||||
if (dst->virt_addr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (src->virt_addr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (src->format != dst->format) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int src_box_x = 0;
|
||||
int src_box_y = 0;
|
||||
int src_box_w = src->width;
|
||||
int src_box_h = src->height;
|
||||
if (src_box != NULL) {
|
||||
src_box_x = src_box->left;
|
||||
src_box_y = src_box->top;
|
||||
src_box_w = src_box->right - src_box->left + 1;
|
||||
src_box_h = src_box->bottom - src_box->top + 1;
|
||||
}
|
||||
int dst_box_x = 0;
|
||||
int dst_box_y = 0;
|
||||
int dst_box_w = dst->width;
|
||||
int dst_box_h = dst->height;
|
||||
if (dst_box != NULL) {
|
||||
dst_box_x = dst_box->left;
|
||||
dst_box_y = dst_box->top;
|
||||
dst_box_w = dst_box->right - dst_box->left + 1;
|
||||
dst_box_h = dst_box->bottom - dst_box->top + 1;
|
||||
}
|
||||
|
||||
// fill pad color
|
||||
if (dst_box_w != dst->width || dst_box_h != dst->height) {
|
||||
int dst_size = get_image_size(dst);
|
||||
memset(dst->virt_addr, color, dst_size);
|
||||
}
|
||||
|
||||
int need_release_dst_buffer = 0;
|
||||
int reti = 0;
|
||||
if (src->format == IMAGE_FORMAT_RGB888) {
|
||||
reti = crop_and_scale_image_c(3, src->virt_addr, src->width, src->height,
|
||||
src_box_x, src_box_y, src_box_w, src_box_h,
|
||||
dst->virt_addr, dst->width, dst->height,
|
||||
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
|
||||
} else if (src->format == IMAGE_FORMAT_RGBA8888) {
|
||||
reti = crop_and_scale_image_c(4, src->virt_addr, src->width, src->height,
|
||||
src_box_x, src_box_y, src_box_w, src_box_h,
|
||||
dst->virt_addr, dst->width, dst->height,
|
||||
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
|
||||
} else if (src->format == IMAGE_FORMAT_GRAY8) {
|
||||
reti = crop_and_scale_image_c(1, src->virt_addr, src->width, src->height,
|
||||
src_box_x, src_box_y, src_box_w, src_box_h,
|
||||
dst->virt_addr, dst->width, dst->height,
|
||||
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
|
||||
} else if (src->format == IMAGE_FORMAT_YUV420SP_NV12 || src->format == IMAGE_FORMAT_YUV420SP_NV12) {
|
||||
reti = crop_and_scale_image_yuv420sp(src->virt_addr, src->width, src->height,
|
||||
src_box_x, src_box_y, src_box_w, src_box_h,
|
||||
dst->virt_addr, dst->width, dst->height,
|
||||
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
|
||||
} else {
|
||||
printf("no support format %d\n", src->format);
|
||||
}
|
||||
if (reti != 0) {
|
||||
printf("convert_image_cpu fail %d\n", reti);
|
||||
return -1;
|
||||
}
|
||||
printf("finish\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_rga_fmt(image_format_t fmt) {
|
||||
switch (fmt)
|
||||
{
|
||||
case IMAGE_FORMAT_RGB888:
|
||||
return RK_FORMAT_RGB_888;
|
||||
case IMAGE_FORMAT_RGBA8888:
|
||||
return RK_FORMAT_RGBA_8888;
|
||||
case IMAGE_FORMAT_YUV420SP_NV12:
|
||||
return RK_FORMAT_YCbCr_420_SP;
|
||||
case IMAGE_FORMAT_YUV420SP_NV21:
|
||||
return RK_FORMAT_YCrCb_420_SP;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int get_image_size(image_buffer_t* image)
|
||||
{
|
||||
if (image == NULL) {
|
||||
return 0;
|
||||
}
|
||||
switch (image->format)
|
||||
{
|
||||
case IMAGE_FORMAT_GRAY8:
|
||||
return image->width * image->height;
|
||||
case IMAGE_FORMAT_RGB888:
|
||||
return image->width * image->height * 3;
|
||||
case IMAGE_FORMAT_RGBA8888:
|
||||
return image->width * image->height * 4;
|
||||
case IMAGE_FORMAT_YUV420SP_NV12:
|
||||
case IMAGE_FORMAT_YUV420SP_NV21:
|
||||
return image->width * image->height * 3 / 2;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int convert_image_rga(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
int srcWidth = src_img->width;
|
||||
int srcHeight = src_img->height;
|
||||
void *src = src_img->virt_addr;
|
||||
int src_fd = src_img->fd;
|
||||
void *src_phy = NULL;
|
||||
int srcFmt = get_rga_fmt(src_img->format);
|
||||
|
||||
int dstWidth = dst_img->width;
|
||||
int dstHeight = dst_img->height;
|
||||
void *dst = dst_img->virt_addr;
|
||||
int dst_fd = dst_img->fd;
|
||||
void *dst_phy = NULL;
|
||||
int dstFmt = get_rga_fmt(dst_img->format);
|
||||
|
||||
int rotate = 0;
|
||||
|
||||
int use_handle = 0;
|
||||
#if defined(LIBRGA_IM2D_HANDLE)
|
||||
use_handle = 1;
|
||||
#endif
|
||||
|
||||
// printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
|
||||
// srcWidth, srcHeight, srcFmt, src, src_fd);
|
||||
// printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
|
||||
// dstWidth, dstHeight, dstFmt, dst, dst_fd);
|
||||
// printf("rotate=%d\n", rotate);
|
||||
|
||||
int usage = 0;
|
||||
IM_STATUS ret_rga = IM_STATUS_NOERROR;
|
||||
|
||||
// set rga usage
|
||||
usage |= rotate;
|
||||
|
||||
// set rga rect
|
||||
im_rect srect;
|
||||
im_rect drect;
|
||||
im_rect prect;
|
||||
memset(&prect, 0, sizeof(im_rect));
|
||||
|
||||
if (src_box != NULL) {
|
||||
srect.x = src_box->left;
|
||||
srect.y = src_box->top;
|
||||
srect.width = src_box->right - src_box->left + 1;
|
||||
srect.height = src_box->bottom - src_box->top + 1;
|
||||
} else {
|
||||
srect.x = 0;
|
||||
srect.y = 0;
|
||||
srect.width = srcWidth;
|
||||
srect.height = srcHeight;
|
||||
}
|
||||
|
||||
if (dst_box != NULL) {
|
||||
drect.x = dst_box->left;
|
||||
drect.y = dst_box->top;
|
||||
drect.width = dst_box->right - dst_box->left + 1;
|
||||
drect.height = dst_box->bottom - dst_box->top + 1;
|
||||
} else {
|
||||
drect.x = 0;
|
||||
drect.y = 0;
|
||||
drect.width = dstWidth;
|
||||
drect.height = dstHeight;
|
||||
}
|
||||
|
||||
// set rga buffer
|
||||
rga_buffer_t rga_buf_src;
|
||||
rga_buffer_t rga_buf_dst;
|
||||
rga_buffer_t pat;
|
||||
rga_buffer_handle_t rga_handle_src = 0;
|
||||
rga_buffer_handle_t rga_handle_dst = 0;
|
||||
memset(&pat, 0, sizeof(rga_buffer_t));
|
||||
|
||||
im_handle_param_t in_param;
|
||||
in_param.width = srcWidth;
|
||||
in_param.height = srcHeight;
|
||||
in_param.format = srcFmt;
|
||||
|
||||
im_handle_param_t dst_param;
|
||||
dst_param.width = dstWidth;
|
||||
dst_param.height = dstHeight;
|
||||
dst_param.format = dstFmt;
|
||||
|
||||
if (use_handle) {
|
||||
if (src_phy != NULL) {
|
||||
rga_handle_src = importbuffer_physicaladdr((uint64_t)src_phy, &in_param);
|
||||
} else if (src_fd > 0) {
|
||||
rga_handle_src = importbuffer_fd(src_fd, &in_param);
|
||||
} else {
|
||||
rga_handle_src = importbuffer_virtualaddr(src, &in_param);
|
||||
}
|
||||
if (rga_handle_src <= 0) {
|
||||
printf("src handle error %d\n", rga_handle_src);
|
||||
ret = -1;
|
||||
goto err;
|
||||
}
|
||||
rga_buf_src = wrapbuffer_handle(rga_handle_src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
|
||||
} else {
|
||||
if (src_phy != NULL) {
|
||||
rga_buf_src = wrapbuffer_physicaladdr(src_phy, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
|
||||
} else if (src_fd > 0) {
|
||||
rga_buf_src = wrapbuffer_fd(src_fd, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
|
||||
} else {
|
||||
rga_buf_src = wrapbuffer_virtualaddr(src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_handle) {
|
||||
if (dst_phy != NULL) {
|
||||
rga_handle_dst = importbuffer_physicaladdr((uint64_t)dst_phy, &dst_param);
|
||||
} else if (dst_fd > 0) {
|
||||
rga_handle_dst = importbuffer_fd(dst_fd, &dst_param);
|
||||
} else {
|
||||
rga_handle_dst = importbuffer_virtualaddr(dst, &dst_param);
|
||||
}
|
||||
if (rga_handle_dst <= 0) {
|
||||
printf("dst handle error %d\n", rga_handle_dst);
|
||||
ret = -1;
|
||||
goto err;
|
||||
}
|
||||
rga_buf_dst = wrapbuffer_handle(rga_handle_dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
|
||||
} else {
|
||||
if (dst_phy != NULL) {
|
||||
rga_buf_dst = wrapbuffer_physicaladdr(dst_phy, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
|
||||
} else if (dst_fd > 0) {
|
||||
rga_buf_dst = wrapbuffer_fd(dst_fd, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
|
||||
} else {
|
||||
rga_buf_dst = wrapbuffer_virtualaddr(dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
|
||||
}
|
||||
}
|
||||
|
||||
if (drect.width != dstWidth || drect.height != dstHeight) {
|
||||
im_rect dst_whole_rect = {0, 0, dstWidth, dstHeight};
|
||||
int imcolor;
|
||||
char* p_imcolor = &imcolor;
|
||||
p_imcolor[0] = color;
|
||||
p_imcolor[1] = color;
|
||||
p_imcolor[2] = color;
|
||||
p_imcolor[3] = color;
|
||||
printf("fill dst image (x y w h)=(%d %d %d %d) with color=0x%x\n",
|
||||
dst_whole_rect.x, dst_whole_rect.y, dst_whole_rect.width, dst_whole_rect.height, imcolor);
|
||||
ret_rga = imfill(rga_buf_dst, dst_whole_rect, imcolor);
|
||||
if (ret_rga <= 0) {
|
||||
if (dst != NULL) {
|
||||
size_t dst_size = get_image_size(dst_img);
|
||||
memset(dst, color, dst_size);
|
||||
} else {
|
||||
printf("Warning: Can not fill color on target image\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rga process
|
||||
ret_rga = improcess(rga_buf_src, rga_buf_dst, pat, srect, drect, prect, usage);
|
||||
if (ret_rga <= 0) {
|
||||
printf("Error on improcess STATUS=%d\n", ret_rga);
|
||||
printf("RGA error message: %s\n", imStrError((IM_STATUS)ret_rga));
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
err:
|
||||
if (rga_handle_src > 0) {
|
||||
releasebuffer_handle(rga_handle_src);
|
||||
}
|
||||
|
||||
if (rga_handle_dst > 0) {
|
||||
releasebuffer_handle(rga_handle_dst);
|
||||
}
|
||||
|
||||
// printf("finish\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
int convert_image(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
|
||||
{
|
||||
int ret;
|
||||
|
||||
printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
|
||||
src_img->width, src_img->height, src_img->format, src_img->virt_addr, src_img->fd);
|
||||
printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
|
||||
dst_img->width, dst_img->height, dst_img->format, dst_img->virt_addr, dst_img->fd);
|
||||
if (src_box != NULL) {
|
||||
printf("src_box=(%d %d %d %d)\n", src_box->left, src_box->top, src_box->right, src_box->bottom);
|
||||
}
|
||||
if (dst_box != NULL) {
|
||||
printf("dst_box=(%d %d %d %d)\n", dst_box->left, dst_box->top, dst_box->right, dst_box->bottom);
|
||||
}
|
||||
printf("color=0x%x\n", color);
|
||||
|
||||
ret = convert_image_rga(src_img, dst_img, src_box, dst_box, color);
|
||||
if (ret != 0) {
|
||||
printf("try convert image use cpu\n");
|
||||
ret = convert_image_cpu(src_img, dst_img, src_box, dst_box, color);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color)
|
||||
{
|
||||
int ret = 0;
|
||||
int allow_slight_change = 1;
|
||||
int src_w = src_image->width;
|
||||
int src_h = src_image->height;
|
||||
int dst_w = dst_image->width;
|
||||
int dst_h = dst_image->height;
|
||||
int resize_w = dst_w;
|
||||
int resize_h = dst_h;
|
||||
|
||||
int padding_w = 0;
|
||||
int padding_h = 0;
|
||||
|
||||
int _left_offset = 0;
|
||||
int _top_offset = 0;
|
||||
float scale = 1.0;
|
||||
|
||||
image_rect_t src_box;
|
||||
src_box.left = 0;
|
||||
src_box.top = 0;
|
||||
src_box.right = src_image->width - 1;
|
||||
src_box.bottom = src_image->height - 1;
|
||||
|
||||
image_rect_t dst_box;
|
||||
dst_box.left = 0;
|
||||
dst_box.top = 0;
|
||||
dst_box.right = dst_image->width - 1;
|
||||
dst_box.bottom = dst_image->height - 1;
|
||||
|
||||
float _scale_w = (float)dst_w / src_w;
|
||||
float _scale_h = (float)dst_h / src_h;
|
||||
if(_scale_w < _scale_h) {
|
||||
scale = _scale_w;
|
||||
resize_h = (int) src_h*scale;
|
||||
} else {
|
||||
scale = _scale_h;
|
||||
resize_w = (int) src_w*scale;
|
||||
}
|
||||
// slight change image size for align
|
||||
if (allow_slight_change == 1 && (resize_w % 4 != 0)) {
|
||||
resize_w -= resize_w % 4;
|
||||
}
|
||||
if (allow_slight_change == 1 && (resize_h % 2 != 0)) {
|
||||
resize_h -= resize_h % 2;
|
||||
}
|
||||
// padding
|
||||
padding_h = dst_h - resize_h;
|
||||
padding_w = dst_w - resize_w;
|
||||
// center
|
||||
if (_scale_w < _scale_h) {
|
||||
dst_box.top = padding_h / 2;
|
||||
if (dst_box.top % 2 != 0) {
|
||||
dst_box.top -= dst_box.top % 2;
|
||||
if (dst_box.top < 0) {
|
||||
dst_box.top = 0;
|
||||
}
|
||||
}
|
||||
dst_box.bottom = dst_box.top + resize_h - 1;
|
||||
_top_offset = dst_box.top;
|
||||
} else {
|
||||
dst_box.left = padding_w / 2;
|
||||
if (dst_box.left % 2 != 0) {
|
||||
dst_box.left -= dst_box.left % 2;
|
||||
if (dst_box.left < 0) {
|
||||
dst_box.left = 0;
|
||||
}
|
||||
}
|
||||
dst_box.right = dst_box.left + resize_w - 1;
|
||||
_left_offset = dst_box.left;
|
||||
}
|
||||
printf("scale=%f dst_box=(%d %d %d %d) allow_slight_change=%d _left_offset=%d _top_offset=%d padding_w=%d padding_h=%d\n",
|
||||
scale, dst_box.left, dst_box.top, dst_box.right, dst_box.bottom, allow_slight_change,
|
||||
_left_offset, _top_offset, padding_w, padding_h);
|
||||
|
||||
//set offset and scale
|
||||
if(letterbox != NULL){
|
||||
letterbox->scale = scale;
|
||||
letterbox->x_pad = _left_offset;
|
||||
letterbox->y_pad = _top_offset;
|
||||
}
|
||||
// alloc memory buffer for dst image,
|
||||
// remember to free
|
||||
if (dst_image->virt_addr == NULL && dst_image->fd <= 0) {
|
||||
int dst_size = get_image_size(dst_image);
|
||||
dst_image->virt_addr = (uint8_t *)malloc(dst_size);
|
||||
if (dst_image->virt_addr == NULL) {
|
||||
printf("malloc size %d error\n", dst_size);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ret = convert_image(src_image, dst_image, &src_box, &dst_box, color);
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
#ifndef _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
|
||||
#define _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/**
|
||||
* @brief LetterBox
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
int x_pad;
|
||||
int y_pad;
|
||||
float scale;
|
||||
} letterbox_t;
|
||||
|
||||
/**
|
||||
* @brief Read image file (support png/jpeg/bmp)
|
||||
*
|
||||
* @param path [in] Image path
|
||||
* @param image [out] Read image
|
||||
* @return int 0: success; -1: error
|
||||
*/
|
||||
int read_image(const char* path, image_buffer_t* image);
|
||||
|
||||
/**
|
||||
* @brief Write image file (support jpg/png)
|
||||
*
|
||||
* @param path [in] Image path
|
||||
* @param image [in] Image for write (only support IMAGE_FORMAT_RGB888)
|
||||
* @return int 0: success; -1: error
|
||||
*/
|
||||
int write_image(const char* path, const image_buffer_t* image);
|
||||
|
||||
/**
|
||||
* @brief Convert image for resize and pixel format change
|
||||
*
|
||||
* @param src_image [in] Source Image
|
||||
* @param dst_image [out] Target Image
|
||||
* @param src_box [in] Crop rectangle on source image
|
||||
* @param dst_box [in] Crop rectangle on target image
|
||||
* @param color [in] Pading color if dst_box can not fill target image
|
||||
* @return int
|
||||
*/
|
||||
int convert_image(image_buffer_t* src_image, image_buffer_t* dst_image, image_rect_t* src_box, image_rect_t* dst_box, char color);
|
||||
|
||||
/**
|
||||
* @brief Convert image with letterbox
|
||||
*
|
||||
* @param src_image [in] Source Image
|
||||
* @param dst_image [out] Target Image
|
||||
* @param letterbox [out] Letterbox
|
||||
* @param color [in] Fill color on target image
|
||||
* @return int
|
||||
*/
|
||||
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color);
|
||||
|
||||
/**
|
||||
* @brief Get the image size
|
||||
*
|
||||
* @param image [in] Image
|
||||
* @return int image size
|
||||
*/
|
||||
int get_image_size(image_buffer_t* image);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
|
||||
@@ -0,0 +1,500 @@
|
||||
// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "yolov8.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#ifdef OPI5_BUILD
|
||||
#define LABEL_NALE_TXT_PATH "azaion_10_labels_list.txt"
|
||||
#else
|
||||
#define LABEL_NALE_TXT_PATH "coco_80_labels_list.txt"
|
||||
#endif
|
||||
|
||||
|
||||
static char *labels[OBJ_CLASS_NUM];
|
||||
|
||||
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
|
||||
|
||||
static char *readLine(FILE *fp, char *buffer, int *len)
|
||||
{
|
||||
int ch;
|
||||
int i = 0;
|
||||
size_t buff_len = 0;
|
||||
|
||||
buffer = (char *)malloc(buff_len + 1);
|
||||
if (!buffer)
|
||||
return NULL; // Out of memory
|
||||
|
||||
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
|
||||
{
|
||||
buff_len++;
|
||||
void *tmp = realloc(buffer, buff_len + 1);
|
||||
if (tmp == NULL)
|
||||
{
|
||||
free(buffer);
|
||||
return NULL; // Out of memory
|
||||
}
|
||||
buffer = (char *)tmp;
|
||||
|
||||
buffer[i] = (char)ch;
|
||||
i++;
|
||||
}
|
||||
buffer[i] = '\0';
|
||||
|
||||
*len = buff_len;
|
||||
|
||||
// Detect end
|
||||
if (ch == EOF && (i == 0 || ferror(fp)))
|
||||
{
|
||||
free(buffer);
|
||||
return NULL;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static int readLines(const char *fileName, char *lines[], int max_line)
|
||||
{
|
||||
FILE *file = fopen(fileName, "r");
|
||||
char *s;
|
||||
int i = 0;
|
||||
int n = 0;
|
||||
|
||||
if (file == NULL)
|
||||
{
|
||||
printf("Open %s fail!\n", fileName);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ((s = readLine(file, s, &n)) != NULL)
|
||||
{
|
||||
lines[i++] = s;
|
||||
if (i >= max_line)
|
||||
break;
|
||||
}
|
||||
fclose(file);
|
||||
return i;
|
||||
}
|
||||
|
||||
static int loadLabelName(const char *locationFilename, char *label[])
|
||||
{
|
||||
printf("load lable %s\n", locationFilename);
|
||||
readLines(locationFilename, label, OBJ_CLASS_NUM);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
|
||||
float ymax1)
|
||||
{
|
||||
float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
|
||||
float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
|
||||
float i = w * h;
|
||||
float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
|
||||
return u <= 0.f ? 0.f : (i / u);
|
||||
}
|
||||
|
||||
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
|
||||
int filterId, float threshold)
|
||||
{
|
||||
for (int i = 0; i < validCount; ++i)
|
||||
{
|
||||
if (order[i] == -1 || classIds[i] != filterId)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
int n = order[i];
|
||||
for (int j = i + 1; j < validCount; ++j)
|
||||
{
|
||||
int m = order[j];
|
||||
if (m == -1 || classIds[i] != filterId)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
float xmin0 = outputLocations[n * 4 + 0];
|
||||
float ymin0 = outputLocations[n * 4 + 1];
|
||||
float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
|
||||
float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
|
||||
|
||||
float xmin1 = outputLocations[m * 4 + 0];
|
||||
float ymin1 = outputLocations[m * 4 + 1];
|
||||
float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
|
||||
float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
|
||||
|
||||
float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
|
||||
|
||||
if (iou > threshold)
|
||||
{
|
||||
order[j] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
|
||||
{
|
||||
float key;
|
||||
int key_index;
|
||||
int low = left;
|
||||
int high = right;
|
||||
if (left < right)
|
||||
{
|
||||
key_index = indices[left];
|
||||
key = input[left];
|
||||
while (low < high)
|
||||
{
|
||||
while (low < high && input[high] <= key)
|
||||
{
|
||||
high--;
|
||||
}
|
||||
input[low] = input[high];
|
||||
indices[low] = indices[high];
|
||||
while (low < high && input[low] >= key)
|
||||
{
|
||||
low++;
|
||||
}
|
||||
input[high] = input[low];
|
||||
indices[high] = indices[low];
|
||||
}
|
||||
input[low] = key;
|
||||
indices[low] = key_index;
|
||||
quick_sort_indice_inverse(input, left, low - 1, indices);
|
||||
quick_sort_indice_inverse(input, low + 1, right, indices);
|
||||
}
|
||||
return low;
|
||||
}
|
||||
|
||||
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
|
||||
|
||||
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
|
||||
|
||||
inline static int32_t __clip(float val, float min, float max)
|
||||
{
|
||||
float f = val <= min ? min : (val >= max ? max : val);
|
||||
return f;
|
||||
}
|
||||
|
||||
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
|
||||
{
|
||||
float dst_val = (f32 / scale) + zp;
|
||||
int8_t res = (int8_t)__clip(dst_val, -128, 127);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
|
||||
|
||||
|
||||
void compute_dfl(float* tensor, int dfl_len, float* box){
|
||||
for (int b=0; b<4; b++){
|
||||
float exp_t[dfl_len];
|
||||
float exp_sum=0;
|
||||
float acc_sum=0;
|
||||
for (int i=0; i< dfl_len; i++){
|
||||
exp_t[i] = exp(tensor[i+b*dfl_len]);
|
||||
exp_sum += exp_t[i];
|
||||
}
|
||||
|
||||
for (int i=0; i< dfl_len; i++){
|
||||
acc_sum += exp_t[i]/exp_sum *i;
|
||||
}
|
||||
box[b] = acc_sum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
|
||||
int8_t *score_tensor, int32_t score_zp, float score_scale,
|
||||
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
|
||||
int grid_h, int grid_w, int stride, int dfl_len,
|
||||
std::vector<float> &boxes,
|
||||
std::vector<float> &objProbs,
|
||||
std::vector<int> &classId,
|
||||
float threshold)
|
||||
{
|
||||
int validCount = 0;
|
||||
int grid_len = grid_h * grid_w;
|
||||
int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
|
||||
int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
|
||||
|
||||
for (int i = 0; i < grid_h; i++)
|
||||
{
|
||||
for (int j = 0; j < grid_w; j++)
|
||||
{
|
||||
int offset = i* grid_w + j;
|
||||
int max_class_id = -1;
|
||||
|
||||
// 通过 score sum 起到快速过滤的作用
|
||||
if (score_sum_tensor != nullptr){
|
||||
if (score_sum_tensor[offset] < score_sum_thres_i8){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int8_t max_score = -score_zp;
|
||||
for (int c= 0; c< OBJ_CLASS_NUM; c++){
|
||||
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
|
||||
{
|
||||
max_score = score_tensor[offset];
|
||||
max_class_id = c;
|
||||
}
|
||||
offset += grid_len;
|
||||
}
|
||||
|
||||
// compute box
|
||||
if (max_score> score_thres_i8){
|
||||
offset = i* grid_w + j;
|
||||
float box[4];
|
||||
float before_dfl[dfl_len*4];
|
||||
for (int k=0; k< dfl_len*4; k++){
|
||||
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
|
||||
offset += grid_len;
|
||||
}
|
||||
compute_dfl(before_dfl, dfl_len, box);
|
||||
|
||||
float x1,y1,x2,y2,w,h;
|
||||
x1 = (-box[0] + j + 0.5)*stride;
|
||||
y1 = (-box[1] + i + 0.5)*stride;
|
||||
x2 = (box[2] + j + 0.5)*stride;
|
||||
y2 = (box[3] + i + 0.5)*stride;
|
||||
w = x2 - x1;
|
||||
h = y2 - y1;
|
||||
boxes.push_back(x1);
|
||||
boxes.push_back(y1);
|
||||
boxes.push_back(w);
|
||||
boxes.push_back(h);
|
||||
|
||||
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
|
||||
classId.push_back(max_class_id);
|
||||
validCount ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return validCount;
|
||||
}
|
||||
|
||||
static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
|
||||
int grid_h, int grid_w, int stride, int dfl_len,
|
||||
std::vector<float> &boxes,
|
||||
std::vector<float> &objProbs,
|
||||
std::vector<int> &classId,
|
||||
float threshold)
|
||||
{
|
||||
int validCount = 0;
|
||||
int grid_len = grid_h * grid_w;
|
||||
for (int i = 0; i < grid_h; i++)
|
||||
{
|
||||
for (int j = 0; j < grid_w; j++)
|
||||
{
|
||||
int offset = i* grid_w + j;
|
||||
int max_class_id = -1;
|
||||
|
||||
// 通过 score sum 起到快速过滤的作用
|
||||
if (score_sum_tensor != nullptr){
|
||||
if (score_sum_tensor[offset] < threshold){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
float max_score = 0;
|
||||
for (int c= 0; c< OBJ_CLASS_NUM; c++){
|
||||
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
|
||||
{
|
||||
max_score = score_tensor[offset];
|
||||
max_class_id = c;
|
||||
}
|
||||
offset += grid_len;
|
||||
}
|
||||
|
||||
// compute box
|
||||
if (max_score> threshold){
|
||||
offset = i* grid_w + j;
|
||||
float box[4];
|
||||
float before_dfl[dfl_len*4];
|
||||
for (int k=0; k< dfl_len*4; k++){
|
||||
before_dfl[k] = box_tensor[offset];
|
||||
offset += grid_len;
|
||||
}
|
||||
compute_dfl(before_dfl, dfl_len, box);
|
||||
|
||||
float x1,y1,x2,y2,w,h;
|
||||
x1 = (-box[0] + j + 0.5)*stride;
|
||||
y1 = (-box[1] + i + 0.5)*stride;
|
||||
x2 = (box[2] + j + 0.5)*stride;
|
||||
y2 = (box[3] + i + 0.5)*stride;
|
||||
w = x2 - x1;
|
||||
h = y2 - y1;
|
||||
boxes.push_back(x1);
|
||||
boxes.push_back(y1);
|
||||
boxes.push_back(w);
|
||||
boxes.push_back(h);
|
||||
|
||||
objProbs.push_back(max_score);
|
||||
classId.push_back(max_class_id);
|
||||
validCount ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return validCount;
|
||||
}
|
||||
|
||||
|
||||
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
|
||||
{
|
||||
std::vector<float> filterBoxes;
|
||||
std::vector<float> objProbs;
|
||||
std::vector<int> classId;
|
||||
int validCount = 0;
|
||||
int stride = 0;
|
||||
int grid_h = 0;
|
||||
int grid_w = 0;
|
||||
int model_in_w = app_ctx->model_width;
|
||||
int model_in_h = app_ctx->model_height;
|
||||
|
||||
memset(od_results, 0, sizeof(object_detect_result_list));
|
||||
|
||||
// default 3 branch
|
||||
int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
|
||||
int output_per_branch = app_ctx->io_num.n_output / 3;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
|
||||
void *score_sum = nullptr;
|
||||
int32_t score_sum_zp = 0;
|
||||
float score_sum_scale = 1.0;
|
||||
if (output_per_branch == 3){
|
||||
score_sum = outputs[i*output_per_branch + 2].buf;
|
||||
score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
|
||||
score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
|
||||
}
|
||||
int box_idx = i*output_per_branch;
|
||||
int score_idx = i*output_per_branch + 1;
|
||||
|
||||
grid_h = app_ctx->output_attrs[box_idx].dims[2];
|
||||
grid_w = app_ctx->output_attrs[box_idx].dims[3];
|
||||
stride = model_in_h / grid_h;
|
||||
|
||||
if (app_ctx->is_quant)
|
||||
{
|
||||
validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
|
||||
(int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
|
||||
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
|
||||
grid_h, grid_w, stride, dfl_len,
|
||||
filterBoxes, objProbs, classId, conf_threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
|
||||
grid_h, grid_w, stride, dfl_len,
|
||||
filterBoxes, objProbs, classId, conf_threshold);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// no object detect
|
||||
if (validCount <= 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
std::vector<int> indexArray;
|
||||
for (int i = 0; i < validCount; ++i)
|
||||
{
|
||||
indexArray.push_back(i);
|
||||
}
|
||||
quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
|
||||
|
||||
std::set<int> class_set(std::begin(classId), std::end(classId));
|
||||
|
||||
for (auto c : class_set)
|
||||
{
|
||||
nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
|
||||
}
|
||||
|
||||
int last_count = 0;
|
||||
od_results->count = 0;
|
||||
|
||||
/* box valid detect target */
|
||||
for (int i = 0; i < validCount; ++i)
|
||||
{
|
||||
if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
int n = indexArray[i];
|
||||
|
||||
float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
|
||||
float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
|
||||
float x2 = x1 + filterBoxes[n * 4 + 2];
|
||||
float y2 = y1 + filterBoxes[n * 4 + 3];
|
||||
int id = classId[n];
|
||||
float obj_conf = objProbs[i];
|
||||
|
||||
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
|
||||
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
|
||||
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
|
||||
od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
|
||||
od_results->results[last_count].prop = obj_conf;
|
||||
od_results->results[last_count].cls_id = id;
|
||||
last_count++;
|
||||
}
|
||||
od_results->count = last_count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int init_post_process()
|
||||
{
|
||||
int ret = 0;
|
||||
ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *coco_cls_to_name(int cls_id)
|
||||
{
|
||||
|
||||
if (cls_id >= OBJ_CLASS_NUM)
|
||||
{
|
||||
return "null";
|
||||
}
|
||||
|
||||
if (labels[cls_id])
|
||||
{
|
||||
return labels[cls_id];
|
||||
}
|
||||
|
||||
return "null";
|
||||
}
|
||||
|
||||
void deinit_post_process()
|
||||
{
|
||||
for (int i = 0; i < OBJ_CLASS_NUM; i++)
|
||||
{
|
||||
if (labels[i] != nullptr)
|
||||
{
|
||||
free(labels[i]);
|
||||
labels[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
|
||||
#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include "rknn_api.h"
|
||||
#include "common.h"
|
||||
#include "image_utils.h"
|
||||
|
||||
#define OBJ_NAME_MAX_SIZE 64
|
||||
#define OBJ_NUMB_MAX_SIZE 128
|
||||
#define OBJ_CLASS_NUM 80
|
||||
#define NMS_THRESH 0.45
|
||||
#define BOX_THRESH 0.25
|
||||
|
||||
// class rknn_app_context_t;
|
||||
|
||||
typedef struct {
|
||||
image_rect_t box;
|
||||
float prop;
|
||||
int cls_id;
|
||||
} object_detect_result;
|
||||
|
||||
typedef struct {
|
||||
int id;
|
||||
int count;
|
||||
object_detect_result results[OBJ_NUMB_MAX_SIZE];
|
||||
} object_detect_result_list;
|
||||
|
||||
int init_post_process();
|
||||
void deinit_post_process();
|
||||
char *coco_cls_to_name(int cls_id);
|
||||
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results);
|
||||
|
||||
void deinitPostProcess();
|
||||
#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
|
||||
@@ -0,0 +1,273 @@
|
||||
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "yolov8.h"
|
||||
#include "common.h"
|
||||
#include "file_utils.h"
|
||||
#include "image_utils.h"
|
||||
|
||||
static void dump_tensor_attr(rknn_tensor_attr *attr)
|
||||
{
|
||||
printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
|
||||
"zp=%d, scale=%f\n",
|
||||
attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
|
||||
attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
|
||||
get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
|
||||
}
|
||||
|
||||
int init_yolov8_model(const char *model_path, rknn_app_context_t *app_ctx)
|
||||
{
|
||||
int ret;
|
||||
int model_len = 0;
|
||||
char *model;
|
||||
rknn_context ctx = 0;
|
||||
|
||||
// Load RKNN Model
|
||||
model_len = read_data_from_file(model_path, &model);
|
||||
if (model == NULL)
|
||||
{
|
||||
printf("load_model fail!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = rknn_init(&ctx, model, model_len, 0, NULL);
|
||||
free(model);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("rknn_init fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get Model Input Output Number
|
||||
rknn_input_output_num io_num;
|
||||
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
|
||||
if (ret != RKNN_SUCC)
|
||||
{
|
||||
printf("rknn_query fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
|
||||
|
||||
// Get Model Input Info
|
||||
printf("input tensors:\n");
|
||||
rknn_tensor_attr input_attrs[io_num.n_input];
|
||||
memset(input_attrs, 0, sizeof(input_attrs));
|
||||
for (int i = 0; i < io_num.n_input; i++)
|
||||
{
|
||||
input_attrs[i].index = i;
|
||||
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
|
||||
if (ret != RKNN_SUCC)
|
||||
{
|
||||
printf("rknn_query fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
dump_tensor_attr(&(input_attrs[i]));
|
||||
}
|
||||
|
||||
// Get Model Output Info
|
||||
printf("output tensors:\n");
|
||||
rknn_tensor_attr output_attrs[io_num.n_output];
|
||||
memset(output_attrs, 0, sizeof(output_attrs));
|
||||
for (int i = 0; i < io_num.n_output; i++)
|
||||
{
|
||||
output_attrs[i].index = i;
|
||||
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
|
||||
if (ret != RKNN_SUCC)
|
||||
{
|
||||
printf("rknn_query fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
dump_tensor_attr(&(output_attrs[i]));
|
||||
}
|
||||
|
||||
// Set to context
|
||||
app_ctx->rknn_ctx = ctx;
|
||||
|
||||
// TODO
|
||||
if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type == RKNN_TENSOR_INT8)
|
||||
{
|
||||
app_ctx->is_quant = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
app_ctx->is_quant = false;
|
||||
}
|
||||
|
||||
app_ctx->io_num = io_num;
|
||||
app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
|
||||
memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
|
||||
app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
|
||||
memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
|
||||
|
||||
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
|
||||
{
|
||||
printf("model is NCHW input fmt\n");
|
||||
app_ctx->model_channel = input_attrs[0].dims[1];
|
||||
app_ctx->model_height = input_attrs[0].dims[2];
|
||||
app_ctx->model_width = input_attrs[0].dims[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("model is NHWC input fmt\n");
|
||||
app_ctx->model_height = input_attrs[0].dims[1];
|
||||
app_ctx->model_width = input_attrs[0].dims[2];
|
||||
app_ctx->model_channel = input_attrs[0].dims[3];
|
||||
}
|
||||
printf("model input height=%d, width=%d, channel=%d\n",
|
||||
app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int release_yolov8_model(rknn_app_context_t *app_ctx)
|
||||
{
|
||||
if (app_ctx->rknn_ctx != 0)
|
||||
{
|
||||
rknn_destroy(app_ctx->rknn_ctx);
|
||||
app_ctx->rknn_ctx = 0;
|
||||
}
|
||||
if (app_ctx->input_attrs != NULL)
|
||||
{
|
||||
free(app_ctx->input_attrs);
|
||||
app_ctx->input_attrs = NULL;
|
||||
}
|
||||
if (app_ctx->output_attrs != NULL)
|
||||
{
|
||||
free(app_ctx->output_attrs);
|
||||
app_ctx->output_attrs = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inference_yolov8_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results, int core)
|
||||
{
|
||||
int ret;
|
||||
image_buffer_t dst_img;
|
||||
letterbox_t letter_box;
|
||||
rknn_input inputs[app_ctx->io_num.n_input];
|
||||
rknn_output outputs[app_ctx->io_num.n_output];
|
||||
const float nms_threshold = NMS_THRESH; // 默认的NMS阈值
|
||||
const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
|
||||
int bg_color = 114;
|
||||
|
||||
if ((!app_ctx) || !(img) || (!od_results))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(od_results, 0x00, sizeof(*od_results));
|
||||
memset(&letter_box, 0, sizeof(letterbox_t));
|
||||
memset(&dst_img, 0, sizeof(image_buffer_t));
|
||||
memset(inputs, 0, sizeof(inputs));
|
||||
memset(outputs, 0, sizeof(outputs));
|
||||
|
||||
// Pre Process
|
||||
dst_img.width = app_ctx->model_width;
|
||||
dst_img.height = app_ctx->model_height;
|
||||
dst_img.format = IMAGE_FORMAT_RGB888;
|
||||
dst_img.size = get_image_size(&dst_img);
|
||||
dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
|
||||
if (dst_img.virt_addr == NULL)
|
||||
{
|
||||
printf("malloc buffer size:%d fail!\n", dst_img.size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// letterbox
|
||||
ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("convert_image_with_letterbox fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set Input Data
|
||||
inputs[0].index = 0;
|
||||
inputs[0].type = RKNN_TENSOR_UINT8;
|
||||
inputs[0].fmt = RKNN_TENSOR_NHWC;
|
||||
inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
|
||||
inputs[0].buf = dst_img.virt_addr;
|
||||
|
||||
ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("rknn_input_set fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (core == 1) {
|
||||
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_0);
|
||||
//ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_0_1_2);
|
||||
if (ret < 0) {
|
||||
printf("rknn_set_core_mask(RKNN_NPU_CORE_0) fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (core == 2) {
|
||||
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_1);
|
||||
if (ret < 0) {
|
||||
printf("rknn_set_core_mask(RKNN_NPU_CORE_1) fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (core == 3) {
|
||||
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_2);
|
||||
if (ret < 0) {
|
||||
printf("rknn_set_core_mask(RKNN_NPU_CORE_1) fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Run
|
||||
printf("rknn_run\n");
|
||||
ret = rknn_run(app_ctx->rknn_ctx, nullptr);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("rknn_run fail! ret=%d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get Output
|
||||
memset(outputs, 0, sizeof(outputs));
|
||||
for (int i = 0; i < app_ctx->io_num.n_output; i++)
|
||||
{
|
||||
outputs[i].index = i;
|
||||
outputs[i].want_float = (!app_ctx->is_quant);
|
||||
}
|
||||
ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
|
||||
if (ret < 0)
|
||||
{
|
||||
printf("rknn_outputs_get fail! ret=%d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Post Process
|
||||
post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
|
||||
|
||||
// Remeber to release rknn output
|
||||
rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
|
||||
|
||||
out:
|
||||
if (dst_img.virt_addr != NULL)
|
||||
{
|
||||
free(dst_img.virt_addr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#ifndef _RKNN_DEMO_YOLOV8_H_
|
||||
#define _RKNN_DEMO_YOLOV8_H_
|
||||
|
||||
#include "rknn_api.h"
|
||||
#include "common.h"
|
||||
|
||||
typedef struct {
|
||||
rknn_context rknn_ctx;
|
||||
rknn_input_output_num io_num;
|
||||
rknn_tensor_attr* input_attrs;
|
||||
rknn_tensor_attr* output_attrs;
|
||||
int model_channel;
|
||||
int model_width;
|
||||
int model_height;
|
||||
bool is_quant;
|
||||
} rknn_app_context_t;
|
||||
|
||||
#include "postprocess.h"
|
||||
|
||||
|
||||
int init_yolov8_model(const char* model_path, rknn_app_context_t* app_ctx);
|
||||
|
||||
int release_yolov8_model(rknn_app_context_t* app_ctx);
|
||||
|
||||
int inference_yolov8_model(rknn_app_context_t* app_ctx, image_buffer_t* img, object_detect_result_list* od_results, int core);
|
||||
|
||||
#endif //_RKNN_DEMO_YOLOV8_H_
|
||||
Reference in New Issue
Block a user