Changed directory structure and renamed applications

- autopilot -> drone_controller
- rtsp_ai_player -> ai_controller
- added top level qmake project file
- updated documentation
- moved small demo applications from tmp/ to misc/
This commit is contained in:
Tuomas Järvinen
2024-10-19 14:44:34 +02:00
parent 54b7dc41ca
commit 45c19baa45
94 changed files with 149 additions and 204 deletions
-129
View File
@@ -1,129 +0,0 @@
# rtsp_ai_player
`rtsp_ai_player` is an application that listens to an RTSP stream, analyzes images with an AI model, and shows the results visually. It also controls a gimbal camera to zoom in on the recognized objects. Application uses YOLOv8 AI models converted to the ONNX format.
### How to convert Azaion AI model file to ONNX format
```bash
yolo export model=azaion-2024-08-13.pt dynamic=False format=onnx imgsz=640,640
```
## How to use application locally on a Linux PC.
# Speed up compilations
```bash
echo "export MAKEFLAGS=\"-j8\"" >> ~/.bashrc
echo "export PATH=/usr/lib/ccache:\$PATH" >> ~/.bashrc
```
### Install OpenCV 4.10.0
```bash
sudo apt update
sudo apt install libgtk-3-dev libpng-dev cmake ffmpeg libavcodec-dev libavformat-dev libavfilter-dev
wget https://github.com/opencv/opencv/archive/refs/tags/4.10.0.zip
unzip 4.10.0.zip
cd opencv-4.10.0
mkdir build && cd build
cmake -DCMAKE_INSTALL_PREFIX=/opt/opencv-4.10.0 -DBUILD_opencv_world=ON -DOPENCV_GENERATE_PKGCONFIG=ON -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF ..
make -j8 && sudo make install
```
### Install ONNX runtime 1.18.0
```bash
wget https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-1.18.0.tgz
sudo tar xf onnxruntime-linux-x64-1.18.0.tgz -C /opt
```
### Install ffmpeg and mediamtx RTSP server:
```bash
sudo apt update
sudo apt install ffmpeg
# For Amd64-like platforms:
wget https://github.com/bluenviron/mediamtx/releases/download/v1.8.4/mediamtx_v1.8.4_linux_amd64.tar.gz
mkdir mediamtx
tar xf mediamtx_v1.8.4_linux_amd64.tar.gz -C mediamtx
# For OrangePi5 use this lib instead (since it's an ARM platform):
wget https://github.com/bluenviron/mediamtx/releases/download/v1.9.1/mediamtx_v1.9.1_linux_arm64v8.tar.gz
mkdir mediamtx
tar xf mediamtx_v1.9.1_linux_arm64v8.tar.gz -C mediamtx
```
### If you use video file from the local RTSP server:
```bash
cd mediamtx
./mediamtx
```
### Play Azaion mp4 video file from RTSP server ... :
```bash
ffmpeg -re -stream_loop -1 -i $HOME/azaion/videos/for_ai_cut.mp4 -c copy -f rtsp rtsp://localhost:8554/live.stream
```
### ... or play simple video file from RTSP server:
```bash
ffmpeg -re -stream_loop -1 -i $HOME/azaion/videos/table.mp4 -c copy -f rtsp rtsp://localhost:8554/live.stream
```
### Test RTSP streaming with ffplay:
```bash
ffplay -rtsp_transport tcp rtsp://localhost:8554/live.stream
```
### Compile and run rtsp_ai_player with YOLOv8 medium AI model:
```bash
cd autopilot/misc/rtsp_ai_player
qmake6 && make
./rtsp_ai_player ~/azaion/models/onnx/yolov8m.onnx
```
### Compile and run rtsp_ai_player with Azaion AI model:
```bash
cd autopilot/misc/rtsp_ai_player
qmake6 && make
./rtsp_ai_player ~/azaion/models/azaion/azaion-2024-08-13.onnx
```
### Compile and run rtsp_ai_player with YOLOv8 medium model and gimbal camera:
```bash
cd autopilot/misc/rtsp_ai_player
qmake6 CONFIG+=gimbal && make
./rtsp_ai_player ~/azaion/models/onnx/yolov8m.onnx
```
## How to use application on Orange PI 5.
### Install ffmpeg and mediamtx to Ubuntu PC:
```bash
sudo apt update
sudo apt install ffmpeg
wget https://github.com/bluenviron/mediamtx/releases/download/v1.8.4/mediamtx_v1.8.4_linux_amd64.tar.gz
mkdir mediamtx
tar xf mediamtx_v1.8.4_linux_amd64.tar.gz -C mediamtx
```
### Launch RTSP server in Ubuntu PC
```bash
cd mediamtx
./mediamtx
```
### Play RTSP video stream in Ubuntu PC
```bash
ffmpeg -re -stream_loop -1 -i SOME_MP4_VIDEO_FILE -c copy -f rtsp rtsp://localhost:8554/live.stream
```
### Test RTSP stream with ffplay in Ubuntu PC:
```bash
ffplay -rtsp_transport tcp rtsp://localhost:8554/live.stream
```
### Compile and launch RTSP AI PLAYER in Orange PI 5:
Modify autopilot/misc/rtsp_ai_player/aiengineconfig.h and change 192.168.168.91 to IP address of RTSP source
```bash
cd autopilot/misc/rtsp_ai_player
mkdir build
cd build
qmake6 CONFIG+=opi5 .. && make -j8 && ./rtsp_ai_player [RKNN_MODEL_FILE]
```
-128
View File
@@ -1,128 +0,0 @@
#include <QDebug>
#include <opencv2/highgui.hpp>
#include "aiengine.h"
#include "aiengineinference.h"
#include "aiengineimagesaver.h"
#if defined(OPI5_BUILD)
#include "src-opi5/aiengineinferenceopi5.h"
#elif defined(OPENCV_BUILD)
#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
#elif defined(NCNN_BUILD)
#include "src-ncnn/aiengineinferencencnn.h"
#else
#include "src-onnx-runtime/aiengineinferenceonnxruntime.h"
#endif
AiEngine::AiEngine(QString modelPath, QObject *parent)
: QObject{parent}
{
mRtspListener = new AiEngineRtspListener(this);
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
#if defined(OPI5_BUILD)
mInference = new AiEngineInferenceOpi5(modelPath);
mInference->initialize(0);
mInference2 = new AiEngineInferenceOpi5(modelPath);
mInference2->initialize(1);
mInference3 = new AiEngineInferenceOpi5(modelPath);
mInference3->initialize(2);
#elif defined(OPENCV_BUILD)
mInference = new AiEngineInferenceOpencvOnnx(modelPath);
#elif defined(NCNN_BUILD)
mInference = new AiEngineInferencevNcnn(modelPath);
#else
mInference = new AiEngineInferencevOnnxRuntime(modelPath);
#endif
QThread *inferenceThread = new QThread(this);
mInference->moveToThread(inferenceThread);
connect(mInference, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
connect(this, &AiEngine::inferenceFrame, mInference, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
inferenceThread->start();
#ifdef OPI5_BUILD
QThread *inferenceThread2 = new QThread(this);
mInference2->moveToThread(inferenceThread2);
connect(mInference2, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
connect(this, &AiEngine::inferenceFrame2, mInference2, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
inferenceThread2->start();
QThread *inferenceThread3 = new QThread(this);
mInference3->moveToThread(inferenceThread3);
connect(mInference3, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
connect(this, &AiEngine::inferenceFrame3, mInference3, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
inferenceThread3->start();
#endif
#ifdef GIMBAL
mGimbalClient = new AiEngineGimbalClient(this);
#else
mGimbalClient = nullptr;
#endif
}
void AiEngine::start(void)
{
mRtspListener->startListening();
mElapsedTimer.start();
}
void AiEngine::stop(void)
{
mRtspListener->stopListening();
}
void AiEngine::inferenceResultsReceivedSlot(AiEngineInferenceResult result)
{
mFrameCounter++;
qDebug() << "FPS = " << (mFrameCounter / (mElapsedTimer.elapsed()/1000.0f));
//qDebug() << "DEBUG. inference frame counter:" << mFrameCounter;
//qDebug() << "AiEngine got inference results in thread: " << QThread::currentThreadId();
if (mGimbalClient != nullptr) {
mGimbalClient->inferenceResultSlot(result);
}
cv::imshow("Received Frame", result.frame);
#ifdef SAVE_IMAGES
static int imageCounter = 0;
AiEngineImageSaver *saver = new AiEngineImageSaver(result.frame, ++imageCounter);
saver->start();
connect(saver, &AiEngineImageSaver::finished, saver, &QObject::deleteLater);
#endif
}
void AiEngine::frameReceivedSlot(cv::Mat frame)
{
//qDebug() << "AiEngine got frame from RTSP listener in thread: " << QThread::currentThreadId();
//cv::imshow("Received Frame", frame);
static int framecounter = 0;
//qDebug() << "DEBUG. RTSP frame counter:" << framecounter;
if (mInference->isActive() == false) {
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
emit inferenceFrame(frame);
framecounter++;
}
#ifdef OPI5_BUILD
else if (mInference2->isActive() == false) {
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
emit inferenceFrame2(frame);
framecounter++;
}
else if (mInference3->isActive() == false) {
//qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
emit inferenceFrame3(frame);
framecounter++;
}
#endif
}
-36
View File
@@ -1,36 +0,0 @@
#pragma once
#include <QObject>
#include <QElapsedTimer>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
#include "aienginertsplistener.h"
#include "aiengineinference.h"
#include "aienginegimbalclient.h"
class AiEngine : public QObject
{
Q_OBJECT
public:
explicit AiEngine(QString modelPath, QObject *parent = nullptr);
void start(void);
void stop(void);
public slots:
void frameReceivedSlot(cv::Mat frame);
void inferenceResultsReceivedSlot(AiEngineInferenceResult result);
signals:
void inferenceFrame(cv::Mat frame);
void inferenceFrame2(cv::Mat frame);
void inferenceFrame3(cv::Mat frame);
private:
QElapsedTimer mElapsedTimer;
uint32_t mFrameCounter = 0;
AiEngineRtspListener *mRtspListener;
AiEngineInference *mInference;
AiEngineInference *mInference2;
AiEngineInference *mInference3;
AiEngineGimbalClient *mGimbalClient;
};
-10
View File
@@ -1,10 +0,0 @@
#pragma once
#include <QString>
#ifdef OPI5_BUILD
QString rtspVideoUrl = "rtsp://192.168.0.25:8554/main.264";
#else
QString rtspVideoUrl = "rtsp://localhost:8554/live.stream";
#endif
-93
View File
@@ -1,93 +0,0 @@
#pragma once
#include <QByteArray>
#include <QString>
#include "aienginegimbalserverdefines.h"
// Common geoposition
typedef struct {
float lat;
float lon;
float alt;
} AiEngineGeoPosition;
// AiEngineGimbalClient -> AiEngineGimbalServer is sent when location or position of the drone changes.
typedef struct {
AiEngineGeoPosition position;
float pitch;
float yaw;
} AiEngineDronePosition;
typedef struct {
int left;
int top;
int right;
int bottom;
} AiEngineRectangle;
// AiEngineGimbalClient -> AiEngineGimbalServer when AI finds the target. Index is a new variable to
// keep track of the targets.
typedef struct {
int index;
AiEngineRectangle rectangle;
} AiEngineCameraTarget;
// AiEngineGimbalServer -> AiEngineGimbalClient when AiEngineGimbalServer has zoomed to the target
// defined in AiEngineCameraTarget. Position variable contains calculated geoposition of the target.
typedef struct {
int targetIndex;
AiEngineGeoPosition position;
} AiEngineTargetPosition;
// AiEngineGimbalClient -> AiEngineGimbalServer when AI asks to change camera direction. For example
// nothing interesting was not found. AiEngineGimbalServer -> AiEngineGimbalClient every few second so
// that AI knows the position of the camera.
typedef struct {
float yaw;
float pitch;
float zoom;
} AiEngineCameraPosition;
struct AiEngineRectangleProperties
{
int width;
int height;
int middleX;
int middleY;
};
typedef struct {
// Get these only once
uint resolutionX;
uint resolutionY;
uint hardwareID;
float maxZoom;
float aiEngineCameraFOVHMin;
float aiEngineCameraFOVHMax;
float aiEngineCameraFOVVMin;
float aiEngineCameraFOVVMax;
float aiEngineCameraFLMin;
float aiEngineCameraFLMax;
// Update these before every command
float currentYaw;
float currentPitch;
float currentRoll;
float currentZoom;
} AiEngineGimbalStatus;
struct AiEngineServerSerialCommandStructure
{
UDP_COMMAND_ID id;
QByteArray command;
QString description;
};
@@ -1,138 +0,0 @@
#include <QDebug>
#include <QThread>
#include <QVector>
#include "aienginegimbalclient.h"
#include "aiengineinference.h"
AiEngineGimbalClient::AiEngineGimbalClient(QObject *parent)
: QObject{parent}
{
// Create server and run it in the new thread.
// Connect all signal and slots here. No need to do the same in AiEngineGimbalServer class.
mGimbalServer = new AiEngineGimbalServer();
QThread *gimbalServerThread = new QThread(this);
mGimbalServer->moveToThread(gimbalServerThread);
// Client -> Server communication. Only zoomToAiTarget() signal is emitted ATM.
connect(this, &AiEngineGimbalClient::setDronePosition, mGimbalServer, &AiEngineGimbalServer::dronePositionSlot, Qt::QueuedConnection);
connect(this, &AiEngineGimbalClient::zoomToAiTarget, mGimbalServer, &AiEngineGimbalServer::zoomToAiTargetSlot, Qt::QueuedConnection);
connect(this, &AiEngineGimbalClient::setCameraPosition, mGimbalServer, &AiEngineGimbalServer::cameraPositionSlot, Qt::QueuedConnection);
// Server -> Client communication
connect(mGimbalServer, &AiEngineGimbalServer::aiTargetZoomed, this, &AiEngineGimbalClient::aiTargetZoomedSlot, Qt::QueuedConnection);
connect(mGimbalServer, &AiEngineGimbalServer::newCameraPosition, this, &AiEngineGimbalClient::cameraPositionSlot, Qt::QueuedConnection);
gimbalServerThread->start();
}
void AiEngineGimbalClient::aiTargetZoomedSlot(AiEngineTargetPosition targetPosition)
{
qDebug() << "AiEngineGimbalClient::aiTargetZoomedSlot() Server zoomed to index:"
<< targetPosition.targetIndex
<< "Geopos:"
<< targetPosition.position.lat
<< targetPosition.position.lon
<< targetPosition.position.alt;
}
void AiEngineGimbalClient::cameraPositionSlot(AiEngineCameraPosition cameraPosition)
{
qDebug() << "AiEngineGimbalClient::cameraPositionSlot() Camera moved to:"
<< cameraPosition.pitch
<< cameraPosition.yaw
<< cameraPosition.zoom;
}
AiEngineRectangle AiEngineGimbalClient::getGroupCoordinates(QVector<AiEngineObject> &objects)
{
AiEngineRectangle groupRectangle;
groupRectangle.top = 1000000;
groupRectangle.left = 1000000;
groupRectangle.bottom = 0;
groupRectangle.right = 0;
for (int i = 0; i < objects.size(); i++) {
const AiEngineRectangle &objectRectangle = objects[i].rectangle;
if (objectRectangle.top < groupRectangle.top) {
groupRectangle.top = objectRectangle.top;
}
if (objectRectangle.left < groupRectangle.left) {
groupRectangle.left = objectRectangle.left;
}
if (objectRectangle.bottom > groupRectangle.bottom) {
groupRectangle.bottom = objectRectangle.bottom;
}
if (objectRectangle.right > groupRectangle.right) {
groupRectangle.right = objectRectangle.right;
}
}
return groupRectangle;
}
void AiEngineGimbalClient::inferenceResultSlot(AiEngineInferenceResult result)
{
if (result.objects.size() == 0) {
return;
}
// TODO!! Just increasing number for testing purposes ATM.
static int index = 0;
// Find best possible target ...
int bestObjectIndex = -1;
float bestObjectProb = -1;
for (int i = 0; i < result.objects.size(); i++) {
const AiEngineObject &object = result.objects[i];
if (object.propability > bestObjectProb) {
bestObjectIndex = i;
bestObjectProb = object.propability;
}
}
// ... if found, then ask camera to zoom to it.
if (bestObjectIndex >= 0) {
const AiEngineObject &object = result.objects[bestObjectIndex];
AiEngineCameraTarget target;
target.rectangle = object.rectangle;
target.index = index++;
qDebug() << "Found best target from index" << bestObjectIndex
<< "Name:" << object.classStr
<< "Probability:" << bestObjectProb;
if (mGimbalServer->isAvailable()) {
emit zoomToAiTarget(target);
}
}
/*
// We got list of all recognized objects, but at least for now we will zoom to all objects at
// once and not for each invidually. Got minimal coordinates which contains the all objects.
AiEngineRectangle groupRect = getGroupCoordinates(result.objects);
if (groupRect.top > 600 || groupRect.bottom > 600) {
qDebug() << "ERROR! inferenceResultSlot() groupRect.top > 600 || groupRect.bottom > 600";
return;
}
if (groupRect.left > 600 || groupRect.right > 600) {
qDebug() << "ERROR! inferenceResultSlot() groupRect.left > 600 || groupRect.right > 600";
return;
}
if ((groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)) {
qDebug() << "ERROR! (groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)";
return;
}
qDebug() << "inferenceResultSlot() Zooming to square top=" << groupRect.top << "x" << groupRect.left << "and bottom:" << groupRect.bottom << "x" << groupRect.right;
*/
}
@@ -1,30 +0,0 @@
#pragma once
#include <QObject>
#include <QElapsedTimer>
#include "aiengineinference.h"
#include "aienginegimbalserver.h"
#include "aienginedefinitions.h"
class AiEngineGimbalClient : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalClient(QObject *parent = nullptr);
private slots:
void aiTargetZoomedSlot(AiEngineTargetPosition targetPosition);
void cameraPositionSlot(AiEngineCameraPosition cameraPosition);
public slots:
void inferenceResultSlot(AiEngineInferenceResult results);
signals:
void setDronePosition(AiEngineDronePosition);
void zoomToAiTarget(AiEngineCameraTarget);
void setCameraPosition(AiEngineCameraPosition);
private:
AiEngineRectangle getGroupCoordinates(QVector<AiEngineObject> &objects);
AiEngineGimbalServer *mGimbalServer;
};
@@ -1,103 +0,0 @@
#include <QDebug>
#include <QTimer>
#include "aienginegimbalserver.h"
#include "aienginegimbalserveractions.h"
AiEngineGimbalServer::AiEngineGimbalServer(QObject *parent)
: QObject{parent}
{
// TODO!! Setup and use serial port....
mIsAvailable = false;
qDebug() << "Initial is available: " << mIsAvailable;
QTimer::singleShot(5000, this, [this]() {
mIsAvailable = true;
qDebug() << "Initial is available: " << mIsAvailable;
});
mActions.setup(&mUdpSocket, &mUdpCommand, &mUdpResponse, &mGimbalStatus);
// TODO: Remove after testing
mDronePosition.position.alt = 1000;
mDronePosition.position.lat = 49.8397;
mDronePosition.position.lon = 24.0319;
mDronePosition.pitch = 0.0;
mDronePosition.yaw = 90.0;
connect(&mActions, &AiEngineGimbalServerActions::aiTargetZoomed, this, &AiEngineGimbalServer::aiTargetZoomed);
}
// TODO!! Client doesn't really send any signal yet to this slot.
void AiEngineGimbalServer::dronePositionSlot(AiEngineDronePosition dronePosition)
{
qDebug() << "AiEngineGimbalServer::dronePositionSlot() Server got new drone position:"
<< dronePosition.position.lat
<< dronePosition.position.lon
<< dronePosition.position.alt
<< dronePosition.pitch
<< dronePosition.yaw;
mDronePosition = dronePosition;
}
// This is actually called from the client.
void AiEngineGimbalServer::zoomToAiTargetSlot(AiEngineCameraTarget target)
{
qDebug() << "zoomToAiTargetSlot called";
if (mIsAvailable == true) {
mIsAvailable = false;
qDebug() << "Is available: " << mIsAvailable;
qDebug() << "AiEngineGimbalServer::zoomToAiTargetSlot() Move camera to the new target:"
<< "index:" << target.index
<< "pos:" << target.rectangle.top << target.rectangle.left << target.rectangle.bottom << target.rectangle.right;
// Rectangle calculation for having proper zoom on group / target
AiEngineRectangleProperties rectangle = mActions.calculateRectangleProperties(target.rectangle.top, target.rectangle.left, target.rectangle.bottom, target.rectangle.right);
qDebug() << "rectangle.middleX: " << rectangle.middleX;
qDebug() << "rectangle.middleY: " << rectangle.middleY;
qDebug() << "rectangle.width: " << rectangle.width;
qDebug() << "rectangle.height: " << rectangle.height;
// Turn
mActions.turnToTarget(rectangle);
// Calculate location
int delay1 = 3000; // Adjust this value as needed
AiEngineDronePosition dronePosition = mDronePosition;
int targetIndex = target.index;
QTimer::singleShot(delay1, this, [this, dronePosition, targetIndex]() { mActions.getLocation(dronePosition, targetIndex); });
// Zoom
int delay2 = delay1 + 100; // Adjust this value as needed
QTimer::singleShot(delay2, this, [this, rectangle]() { mActions.zoomToTarget(rectangle); });
// Return to previous position
int delay3 = delay2 + 10000; // Adjust this value as needed
AiEngineGimbalStatus gimbalStatus = mGimbalStatus;
QTimer::singleShot(delay3, this, [this, gimbalStatus]() { mActions.restoreOrientationAndZoom(gimbalStatus); });
// Allow calls
int delay4 = delay3 + 3000; // Adjust this value as needed
QTimer::singleShot(delay4, this, [this]() {
mIsAvailable = true;
qDebug() << "Is available: " << mIsAvailable;
});
}
}
bool AiEngineGimbalServer::isAvailable(void)
{
return mIsAvailable;
}
// TODO!! Not sent from the client yet.
void AiEngineGimbalServer::cameraPositionSlot(AiEngineCameraPosition position)
{
qDebug() << "AiEngineGimbalServer::cameraPositionSlot() Move camera to:" << position.pitch << position.yaw << "zoom:" << position.zoom;
}
@@ -1,37 +0,0 @@
#pragma once
#include <QObject>
#include <QMap>
#include "aienginedefinitions.h"
#include "aienginegimbalserveractions.h"
#include "aienginegimbalserverudpcommand.h"
#include "aienginegimbalserverudpresponse.h"
#include "aienginegimbalserverudp.h"
class AiEngineGimbalServer : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalServer(QObject *parent = nullptr);
bool isAvailable(void);
public slots:
void dronePositionSlot(AiEngineDronePosition);
void zoomToAiTargetSlot(AiEngineCameraTarget);
void cameraPositionSlot(AiEngineCameraPosition);
signals:
void aiTargetZoomed(AiEngineTargetPosition);
void newCameraPosition(AiEngineCameraPosition);
private:
AiEngineGimbalServerUDP mUdpSocket;
AiEngineGimbalStatus mGimbalStatus;
AiEngineDronePosition mDronePosition;
AiEngineGimbalServerUDPCommand mUdpCommand;
AiEngineGimbalServerUDPResponse mUdpResponse;
AiEngineGimbalServerActions mActions;
bool mIsAvailable;
};
@@ -1,356 +0,0 @@
#include <QUdpSocket>
#include <QVariant>
#include "aienginegimbalserveractions.h"
#include "aienginegimbalserverudp.h"
AiEngineGimbalServerActions::AiEngineGimbalServerActions(QObject *parent)
: QObject{parent}
{
}
void AiEngineGimbalServerActions::setup(AiEngineGimbalServerUDP *udpSocket, AiEngineGimbalServerUDPCommand *udpCommand, AiEngineGimbalServerUDPResponse *udpResponse, AiEngineGimbalStatus *gimbalStatus)
{
mUdpSocket = udpSocket;
mUdpCommand = udpCommand;
mUdpResponse = udpResponse;
mGimbalStatus = gimbalStatus;
// Set initial position and update status
QByteArray tempCommand;
QByteArray tempResponse;
QHash<QString, QVariant> responseValues;
// Get camera ID
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_HW_INFO);
mUdpSocket->sendCommand(tempCommand);
tempResponse = mUdpSocket->readResponse();
responseValues = mUdpResponse->getResponceValues(tempResponse);
mGimbalStatus->hardwareID = responseValues["hardware_id"].toInt();
switch (mGimbalStatus->hardwareID) {
case AI_ENGINE_CAMERA_ZR10_HID:
mGimbalStatus->aiEngineCameraFOVHMin = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MIN;
mGimbalStatus->aiEngineCameraFOVHMax = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MAX;
mGimbalStatus->aiEngineCameraFOVVMin = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MIN;
mGimbalStatus->aiEngineCameraFOVVMax = AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MAX;
mGimbalStatus->aiEngineCameraFLMin = AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MIN;
mGimbalStatus->aiEngineCameraFLMax = AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MAX;
break;
case AI_ENGINE_CAMERA_A8_HID:
mGimbalStatus->aiEngineCameraFOVHMin = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MIN;
mGimbalStatus->aiEngineCameraFOVHMax = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MAX;
mGimbalStatus->aiEngineCameraFOVVMin = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MIN;
mGimbalStatus->aiEngineCameraFOVVMax = AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MAX;
mGimbalStatus->aiEngineCameraFLMin = AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MIN;
mGimbalStatus->aiEngineCameraFLMax = AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MAX;
break;
default:
qDebug().noquote().nospace() << "ERROR: Unknown HardwareID " << mGimbalStatus->hardwareID;
break;
}
// Get resolution to reduce calls to camera
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_CAMERA_CODEC_SPECS);
mUdpSocket->sendCommand(tempCommand);
tempResponse = mUdpSocket->readResponse();
responseValues = mUdpResponse->getResponceValues(tempResponse);
mGimbalStatus->resolutionX = responseValues["width"].toInt();
mGimbalStatus->resolutionY = responseValues["height"].toInt();
// Get max zoom value to reduce calls to camera
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ACQUIRE_MAX_ZOOM_VALUE);
mUdpSocket->sendCommand(tempCommand);
tempResponse = mUdpSocket->readResponse();
responseValues = mUdpResponse->getResponceValues(tempResponse);
float maxZoom = responseValues["zoom"].toFloat();
mGimbalStatus->maxZoom = maxZoom > 10 ? 10 : maxZoom;
// Go to initial orientation
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_DEGREES);
int16_t degreesVal = AI_ENGINE_GIMBAL_INITIAL_YAW * 10;
tempCommand[8] = degreesVal & 0xFF;
tempCommand[9] = degreesVal >> 8;
degreesVal = AI_ENGINE_GIMBAL_INITIAL_PITCH * 10;
tempCommand[10] = degreesVal & 0xFF;
tempCommand[11] = degreesVal >> 8;
mUdpSocket->sendCommand(tempCommand);
// Go to initial zoom
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
uint8_t integerPart = static_cast<uint8_t>(AI_ENGINE_CAMERA_INITIAL_ZOOM);
float fractionalPart = AI_ENGINE_CAMERA_INITIAL_ZOOM - integerPart;
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
tempCommand[8] = integerPart;
tempCommand[9] = scaledFractional;
mUdpSocket->sendCommand(tempCommand);
mGimbalStatus->currentPitch = AI_ENGINE_GIMBAL_INITIAL_PITCH;
mGimbalStatus->currentRoll = AI_ENGINE_GIMBAL_INITIAL_ROLL;
mGimbalStatus->currentYaw = AI_ENGINE_GIMBAL_INITIAL_YAW;
mGimbalStatus->currentZoom = AI_ENGINE_CAMERA_INITIAL_ZOOM;
qDebug().noquote().nospace() << "mGimbalStatus->hardwareID: " << mGimbalStatus->hardwareID;
qDebug().noquote().nospace() << "mGimbalStatus->maxZoom: " << mGimbalStatus->maxZoom;
qDebug().noquote().nospace() << "mGimbalStatus->currentZoom: " << mGimbalStatus->currentZoom;
qDebug().noquote().nospace() << "mGimbalStatus->currentPitch: " << mGimbalStatus->currentPitch;
qDebug().noquote().nospace() << "mGimbalStatus->currentYaw: " << mGimbalStatus->currentYaw;
qDebug().noquote().nospace() << "mGimbalStatus->currentRoll: " << mGimbalStatus->currentRoll;
qDebug().noquote().nospace() << "mGimbalStatus->resolutionX: " << mGimbalStatus->resolutionX;
qDebug().noquote().nospace() << "mGimbalStatus->resolutionY: " << mGimbalStatus->resolutionY;
}
AiEngineRectangleProperties AiEngineGimbalServerActions::calculateRectangleProperties(int top, int left, int bottom, int right) {
// AI resolution is 640 * 360, which is half of 1280 * 720
// Multiply by 2 to get appropriate position on video
top = top * 2;
left = left * 2;
bottom = bottom * 2;
right = right * 2;
// Sanity check
// top cannot be greater than bottom
// left cannot be greater than right
if (top > bottom) {
int temp = top;
top = bottom;
bottom = temp;
qWarning().noquote().nospace() << "calculateRectangleProperties(): top and bottom mixed?";
}
if (left > right) {
int temp = left;
left = right;
right = temp;
qWarning().noquote().nospace() << "calculateRectangleProperties(): left and right mixed?";
}
AiEngineRectangleProperties properties;
properties.width = right - left;
properties.height = bottom - top;
properties.middleX = static_cast<int>(left + properties.width / 2);
properties.middleY = static_cast<int>(top + properties.height / 2);
// Sanity check, none cannot be 0
// If that is the case, we will not turn or zoom
if (properties.height == 0 || properties.width == 0 || properties.middleX == 0 || properties.middleY == 0) {
properties.height = mGimbalStatus->resolutionY;
properties.width = mGimbalStatus->resolutionX;
properties.middleX = mGimbalStatus->resolutionX / 2;
properties.middleY = mGimbalStatus->resolutionY / 2;
qWarning().noquote().nospace() << "calculateRectangleProperties(): Something was zero -> No zoom, no turn!";
}
return properties;
}
void AiEngineGimbalServerActions::getAnglesToOnScreenTarget(int targetX, int targetY, float &resultYaw, float &resultPitch)
{
// Get current yaw and pitch
resultYaw = mGimbalStatus->currentYaw;
resultPitch = mGimbalStatus->currentPitch;
// Normalize target pixel location to [-0.5, 0.5] range
float normPixelX = (targetX - mGimbalStatus->resolutionX / 2.0f) / (mGimbalStatus->resolutionX / 2.0f);
float normPixelY = (targetY - mGimbalStatus->resolutionY / 2.0f) / (mGimbalStatus->resolutionY / 2.0f);
// Adjust horizontal field of view for zoom
float horizontalFov = mGimbalStatus->aiEngineCameraFOVHMin + (10 - mGimbalStatus->currentZoom) * (mGimbalStatus->aiEngineCameraFOVHMax - mGimbalStatus->aiEngineCameraFOVHMin) / 9;
float verticalFov = mGimbalStatus->aiEngineCameraFOVVMin + (10 - mGimbalStatus->currentZoom) * (mGimbalStatus->aiEngineCameraFOVVMax - mGimbalStatus->aiEngineCameraFOVVMin) / 9;
float focalLength = mGimbalStatus->aiEngineCameraFLMin + (mGimbalStatus->currentZoom - 1) * (mGimbalStatus->aiEngineCameraFLMax - mGimbalStatus->aiEngineCameraFLMin) / 9;
// Calculate image plane dimensions based on focal length and aspect ratio
float imagePlaneWidth = 2.0f * focalLength * tan(degreesToRadians(horizontalFov) / 2.0f);
float imagePlaneHeight = 2.0f * focalLength * tan(degreesToRadians(verticalFov) / 2.0f);
// Calculate angle offsets based on normalized pixel location and image plane dimensions
float turnX = atan2(normPixelX * imagePlaneWidth / 2.0f, mGimbalStatus->aiEngineCameraFLMin) * 180.0f / M_PI;
float turnY = atan2(normPixelY * imagePlaneHeight / 2.0f, mGimbalStatus->aiEngineCameraFLMin) * 180.0f / M_PI;
// Make alterations to current angles
resultYaw -= turnX;
resultPitch -= turnY;
}
void AiEngineGimbalServerActions::turnToTarget(AiEngineRectangleProperties rectangle)
{
qDebug().noquote().nospace() << "Turning to target";
float resultYaw = 0.0f;
float resultPitch = 0.0f;
getAnglesToOnScreenTarget(rectangle.middleX, rectangle.middleY, resultYaw, resultPitch);
QByteArray serialCommandTurn = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_PIXEL);
int16_t degreesVal = resultYaw * 10;
serialCommandTurn[8] = degreesVal & 0xFF;
serialCommandTurn[9] = degreesVal >> 8;
degreesVal = resultPitch * 10;
serialCommandTurn[10] = degreesVal & 0xFF;
serialCommandTurn[11] = degreesVal >> 8;
mUdpSocket->sendCommand(serialCommandTurn);
}
void AiEngineGimbalServerActions::zoomToTarget(AiEngineRectangleProperties rectangle)
{
qDebug().noquote().nospace() << "Zooming to target";
float fillRatio = 0.5;
float targetPixelWidth = rectangle.width;
float targetPixelHeight = rectangle.height;
float adjustedObjectWidth = targetPixelWidth / fillRatio;
float adjustedObjectHeight = targetPixelHeight / fillRatio;
float zoomWidth = static_cast<double>(mGimbalStatus->resolutionX) / adjustedObjectWidth;
float zoomHeight = static_cast<double>(mGimbalStatus->resolutionY) / adjustedObjectHeight;
float zoom = std::min(zoomWidth, zoomHeight);
if (zoom < 1.0f) {
zoom = 1.0f;
}
QByteArray serialCommandNewZoom = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
uint8_t integerPart = static_cast<uint8_t>(zoom);
float fractionalPart = zoom - integerPart;
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
serialCommandNewZoom[8] = integerPart;
serialCommandNewZoom[9] = scaledFractional;
mUdpSocket->sendCommand(serialCommandNewZoom);
}
void AiEngineGimbalServerActions::getLocation(AiEngineDronePosition dronePosition, int targetIndex)
{
// From the drone and camera
CameraData cameraData = getCameraData();
GPSData gpsData = {dronePosition.position.alt, dronePosition.position.lat, dronePosition.position.lon};
DroneData droneData = {gpsData, dronePosition.yaw, dronePosition.pitch, 0.0f /* Roll */}; // GPS (latitude, longitude, altitude) and heading
// Calculate the GPS location of the target
AiEngineGeoPosition targetPosition = calculateTargetLocation(droneData, cameraData);
// TODO: Send position
AiEngineTargetPosition targetReturn;
targetReturn.position = targetPosition;
targetReturn.targetIndex = targetIndex;
qDebug() << "targetReturn.targetIndex: " << targetReturn.targetIndex;
qDebug() << "targetReturn.position.alt: " << targetReturn.position.alt;
qDebug() << "targetReturn.position.lat: " << targetReturn.position.lat;
qDebug() << "targetReturn.position.lon: " << targetReturn.position.lon;
emit aiTargetZoomed(targetReturn);
}
void AiEngineGimbalServerActions::restoreOrientationAndZoom(AiEngineGimbalStatus gimbalStatus)
{
QByteArray tempCommand;
// Go to initial orientation
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::TURN_TO_DEGREES);
int16_t degreesVal = gimbalStatus.currentYaw * 10;
tempCommand[8] = degreesVal & 0xFF;
tempCommand[9] = degreesVal >> 8;
degreesVal = gimbalStatus.currentPitch * 10;
tempCommand[10] = degreesVal & 0xFF;
tempCommand[11] = degreesVal >> 8;
mUdpSocket->sendCommand(tempCommand);
// Go to initial zoom
tempCommand = mUdpCommand->getCommand(UDP_COMMAND_ID::ZOOM_TO_X);
uint8_t integerPart = static_cast<uint8_t>(gimbalStatus.currentZoom);
float fractionalPart = gimbalStatus.currentZoom - integerPart;
uint8_t scaledFractional = uint8_t(fractionalPart * 10);
tempCommand[8] = integerPart;
tempCommand[9] = scaledFractional;
mUdpSocket->sendCommand(tempCommand);
// TODO: Maybe send signal that all done?
}
CameraData AiEngineGimbalServerActions::getCameraData()
{
uint16_t height = mGimbalStatus->resolutionY;
uint16_t width = mGimbalStatus->resolutionX;
float yaw = 0 - mGimbalStatus->currentYaw; // Reverse value for calculation purposes
float pitch = 0 - mGimbalStatus->currentPitch; // Reverse value for calculation purposes
return {height, width, pitch, yaw};
}
// Function to calculate the new GPS location
AiEngineGeoPosition AiEngineGimbalServerActions::calculateTargetLocation(DroneData drone, CameraData camera)
{
// Calculate altitude and distance to the target
//float targetDistance = calculateTargetDistanceFromTargetSize(targetTrueSize, targetPixelSize, cameraData.width, degreesToRadians(cameraData.fow));
float slantDistance = 0;
float horizontalDistance = 0;
calculateDistancesToTarget(drone.gps.altitude, camera.pitch, slantDistance, horizontalDistance);
// Calculate new altitude using the slant distance and angle
float pitchRad = degreesToRadians(camera.pitch);
float sinPitchRad = std::sin(pitchRad);
float altitudeDifference = std::round(slantDistance * sinPitchRad * 100.0) / 100.0; // Rounding to avoid weird targetAltitude
float targetAltitude = (std::round(drone.gps.altitude * 100.0) / 100.0) - altitudeDifference; // Rounding to avoid weird targetAltitude
// Calculate the bearing from the drone orientation and camera orientation
float targetBearing = std::fmod(drone.yaw + camera.yaw, 360.0f);
// Convert bearing and drone's latitude/longitude to radians
float bearingRad = degreesToRadians(targetBearing);
float latRad = degreesToRadians(drone.gps.latitude);
float lonRad = degreesToRadians(drone.gps.longitude);
// Calculate new latitude using Haversine formula
float newLatRad = std::asin(std::sin(latRad) * std::cos(horizontalDistance / EARTH_RADIUS) + std::cos(latRad) * std::sin(horizontalDistance / EARTH_RADIUS) * std::cos(bearingRad));
// Calculate new longitude using Haversine formula
float newLonRad = lonRad + std::atan2(std::sin(bearingRad) * std::sin(horizontalDistance / EARTH_RADIUS) * std::cos(latRad), std::cos(horizontalDistance / EARTH_RADIUS) - std::sin(latRad) * std::sin(newLatRad));
// Convert back to degrees for latitude and longitude
AiEngineGeoPosition newLocation;
newLocation.alt = targetAltitude;
newLocation.lat = newLatRad * 180.0f / static_cast<float>(M_PI);
newLocation.lon = newLonRad * 180.0f / static_cast<float>(M_PI);
return newLocation;
}
void AiEngineGimbalServerActions::calculateDistancesToTarget(float altitude, float cameraPitch, float &slantDistance, float &horizontalDistance)
{
// Convert pitch angle from degrees to radians
float cameraPitchRadians = degreesToRadians(cameraPitch);
// Value has to be between 0-90
cameraPitchRadians = std::clamp((double)cameraPitchRadians, (double)0.0f, M_PI_2);
// Calculate horizontal distance
horizontalDistance = altitude / tan(cameraPitchRadians);
// Adjust for Earth's curvature:
// We need to find the horizontal distance on the curved Earth surface that corresponds to this flat distance
double centralAngle = horizontalDistance / EARTH_RADIUS; // in radians
// Calculate the arc length on Earth's surface
horizontalDistance = EARTH_RADIUS * centralAngle;
// Calculate slant distance considering the Earth's curvature
slantDistance = sqrt(altitude * altitude + horizontalDistance * horizontalDistance);
}
// Function to convert degrees to radians
float AiEngineGimbalServerActions::degreesToRadians(float degrees)
{
return degrees * M_PI / 180.0f;
}
@@ -1,68 +0,0 @@
#pragma once
#include <QDebug>
#include "aienginedefinitions.h"
#include "aienginegimbalserverudpcommand.h"
#include "aienginegimbalserverudpresponse.h"
#include "aienginegimbalserverudp.h"
const double EARTH_RADIUS = 6371000.0; // Earth's radius in meters
struct GPSData
{
float altitude; // Meters
float latitude; // Decimal degrees
float longitude; // Decimal degrees
};
struct CameraData
{
uint16_t height; // Pixels
uint16_t width; // Pixels
float pitch; // Degrees
float yaw; // Degrees
};
struct DroneData
{
GPSData gps;
float yaw; // Degrees
float pitch; // Degrees
float roll; // Degrees
};
class AiEngineGimbalServerActions : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalServerActions(QObject *parent = nullptr);
public slots:
void setup(AiEngineGimbalServerUDP *udpSocket, AiEngineGimbalServerUDPCommand *udpCommand, AiEngineGimbalServerUDPResponse *udpResponse, AiEngineGimbalStatus *gimbalStatus);
AiEngineRectangleProperties calculateRectangleProperties(int top, int left, int bottom, int right);
void turnToTarget(AiEngineRectangleProperties rectangle);
void zoomToTarget(AiEngineRectangleProperties rectangle);
void getLocation(AiEngineDronePosition dronePosition, int targetIndex);
void restoreOrientationAndZoom(AiEngineGimbalStatus gimbalStatus);
signals:
void aiTargetZoomed(AiEngineTargetPosition);
private:
AiEngineGimbalServerUDP *mUdpSocket;
AiEngineGimbalServerUDPCommand *mUdpCommand;
AiEngineGimbalServerUDPResponse *mUdpResponse;
AiEngineGimbalStatus *mGimbalStatus;
private slots:
CameraData getCameraData(void);
void getAnglesToOnScreenTarget(int targetX, int targetY, float &resultYaw, float &resultPitch);
AiEngineGeoPosition calculateTargetLocation(DroneData drone, CameraData camera);
void calculateDistancesToTarget(float altitude, float cameraPitch, float &slantDistance, float &horizontalDistance);
float degreesToRadians(float degrees);
};
@@ -1,31 +0,0 @@
#include "aienginegimbalservercrc16.h"
static const uint16_t crc16Table[256] = {0x0, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 0x1231, 0x210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 0x2462, 0x3443, 0x420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 0x3653, 0x2672, 0x1611, 0x630,
0x76d7, 0x66f6, 0x5695, 0x46b4, 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x840, 0x1861, 0x2802, 0x3823, 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0xa50, 0x3a33, 0x2a12, 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0xc60, 0x1c41,
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0xe70, 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 0x1080, 0xa1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 0x2b1, 0x1290, 0x22f3, 0x32d2,
0x4235, 0x5214, 0x6277, 0x7256, 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 0x34e2, 0x24c3, 0x14a0, 0x481, 0x7466, 0x6447, 0x5424, 0x4405, 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 0x26d3, 0x36f2, 0x691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x8e1, 0x3882, 0x28a3,
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0xaf1, 0x1ad0, 0x2ab3, 0x3a92, 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0xcc1, 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0xed1, 0x1ef0};
uint16_t AiEngineGimbalServerCrc16::calculateCRC(const uint8_t *ptr, uint32_t len, uint16_t crcInit)
{
uint16_t crc = crcInit;
uint8_t temp;
while (len-- != 0) {
temp = (crc >> 8) & 0xFF;
crc = (crc << 8) ^ crc16Table[*ptr ^ temp];
ptr++;
}
return crc;
}
void AiEngineGimbalServerCrc16::getCRCBytes(const QByteArray &data, int8_t *bytes)
{
uint16_t crc16 = calculateCRC(reinterpret_cast<const uint8_t *>(data.constData()), data.size(), 0);
bytes[0] = crc16 & 0xFF; // Set LSB
bytes[1] = crc16 >> 8; // Set MSB
}
@@ -1,14 +0,0 @@
#pragma once
#include <QByteArray>
#include <cstdint>
class AiEngineGimbalServerCrc16
{
public:
static void getCRCBytes(const QByteArray &data, int8_t *bytes);
private:
static uint16_t calculateCRC(const uint8_t *ptr, uint32_t len, uint16_t crcInit);
};
@@ -1,69 +0,0 @@
/**
* This is a defines header for Siyi Gimbal Cameras.
* Other cameras might need their own defines header.
*/
#pragma once
#define AI_ENGINE_CAMERA_ZR10_HID 0x6B // 107
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MIN 6.7f
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_HORIZONTAL_MAX 47.0f
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MIN 3.8f
#define AI_ENGINE_CAMERA_ZR10_FIELD_OF_VIEW_VERTICAL_MAX 30.0f
#define AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MIN 5.2f
#define AI_ENGINE_CAMERA_ZR10_FOCAL_LENGTH_MAX 47.5f
#define AI_ENGINE_CAMERA_A8_HID 0x73 // 115
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MIN 85.0f
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_HORIZONTAL_MAX 85.0f
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MIN 58.0f
#define AI_ENGINE_CAMERA_A8_FIELD_OF_VIEW_VERTICAL_MAX 58.0f
#define AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MIN 21.0f
#define AI_ENGINE_CAMERA_A8_FOCAL_LENGTH_MAX 21.0f
#define AI_ENGINE_CAMERA_ASPECT_RATIO 1.777777778f
#define AI_ENGINE_GIMBAL_YAW_MIN -135.0f
#define AI_ENGINE_GIMBAL_YAW_MAX 135.0f
#define AI_ENGINE_GIMBAL_PITCH_MIN -90.0f
#define AI_ENGINE_GIMBAL_PITCH_MAX 25.0f
#define AI_ENGINE_CAMERA_INITIAL_ZOOM 1.0f
#define AI_ENGINE_GIMBAL_INITIAL_PITCH -20.0f
#define AI_ENGINE_GIMBAL_INITIAL_ROLL 0.0f
#define AI_ENGINE_GIMBAL_INITIAL_YAW 0.0f
enum UDP_COMMAND_ID {
TURN_TO_DEGREES = 1,
TURN_TO_PIXEL,
ZOOM_TO_X,
ACQUIRE_CAMERA_CODEC_SPECS,
ACQUIRE_CURRENT_ZOOM,
ACQUIRE_ATTITUDE_DATA,
AUTO_CENTER,
ZOOM_MOST,
ZOOM_LEAST,
FOCUS_MOST,
FOCUS_LEAST,
FOCUS_AUTO,
ROTATE_UP,
ROTATE_DOWN,
ROTATE_RIGHT,
ROTATE_LEFT,
ROTATE_STOP,
ACQUIRE_MAX_ZOOM_VALUE,
TAKE_PICTURES,
TAKE_VIDEO,
ROTATE_100_100,
ACQUIRE_GIMBAL_STATUS,
ACQUIRE_HW_INFO,
ACQUIRE_FIRMWARE_VERSION,
MODE_LOCK,
MODE_FOLLOW,
MODE_FPV,
ENABLE_HDMI,
ENABLE_CVBS,
DISABLE_HDMI_CVBS,
ACQUIRE_RANGE_DATA,
RUN_TARGET_LOCATION_TEST
};
@@ -1,73 +0,0 @@
#include <QDebug>
#include <QTimer>
#include "aienginegimbalserverudp.h"
#include "aienginegimbalservercrc16.h"
AiEngineGimbalServerUDP::AiEngineGimbalServerUDP(QObject *parent)
: QObject{parent}
{
// Open UDP socket
mUdpSocket = new QUdpSocket();
mUdpSocket->connectToHost(QHostAddress(SERVER_IP), SERVER_PORT);
if (mUdpSocket->isOpen() == false) {
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Unable to connect " << SERVER_IP << ":" << SERVER_PORT;
delete mUdpSocket;
exit(EXIT_FAILURE);
}
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Connected " << SERVER_IP << ":" << SERVER_PORT;
}
AiEngineGimbalServerUDP::~AiEngineGimbalServerUDP()
{
if (mUdpSocket->isOpen() == true) {
mUdpSocket->close();
}
delete mUdpSocket;
}
void AiEngineGimbalServerUDP::sendCommand(const QByteArray &command)
{
QByteArray toSend = command;
int8_t crcBytes[2];
AiEngineGimbalServerCrc16::getCRCBytes(toSend, crcBytes);
toSend.resize(toSend.size() + 2); // Increase array size to accommodate CRC bytes
toSend[toSend.size() - 2] = crcBytes[0]; // Set LSB
toSend[toSend.size() - 1] = crcBytes[1]; // Set MSB
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Sent HEX data: " << toSend.toHex();
if (mUdpSocket->isOpen() == false) {
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Connection not open (sendCommand)";
return;
}
if (mUdpSocket->write(toSend) == -1) {
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Failed to send data: " << mUdpSocket->errorString();
}
}
QByteArray AiEngineGimbalServerUDP::readResponse()
{
QByteArray response;
QByteArray recvBuff(RECV_BUF_SIZE, 0);
if (mUdpSocket->waitForReadyRead(3000)) {
qint64 recvLen = mUdpSocket->read(recvBuff.data(), RECV_BUF_SIZE);
if (recvLen > 0) {
response = recvBuff.left(recvLen);
qDebug().noquote().nospace() << "AiEngineGimbalServerUDP(): Received HEX data: " << response.toHex();
} else {
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): Failed to receive data: " << mUdpSocket->errorString();
}
} else {
qCritical().noquote().nospace() << "AiEngineGimbalServerUDP(): No data received.";
}
return response;
}
@@ -1,28 +0,0 @@
#pragma once
#include <QObject>
#include <QUdpSocket>
#define SERVER_IP "192.168.0.25"
#define SERVER_PORT 37260
#define RECV_BUF_SIZE 1024
class AiEngineGimbalServerUDP : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalServerUDP(QObject *parent = nullptr);
public:
~AiEngineGimbalServerUDP();
public slots:
void sendCommand(const QByteArray &command);
QByteArray readResponse();
private:
QUdpSocket *mUdpSocket;
};
@@ -1,102 +0,0 @@
/**
* This is a serial command class for Siyi Gimbal Cameras.
* Other cameras might need their own serial command class.
*/
#include <QDebug>
#include "aienginegimbalserverudpcommand.h"
AiEngineGimbalServerUDPCommand::AiEngineGimbalServerUDPCommand(QObject *parent)
: QObject{parent}
{
/*
Field Index Bytes Description
STX 0 2 0x6655: starting mark. Low byte in the front
CTRL 2 1 0: need_ack (if the current data pack need “ack”)
1: ack_pack (if the current data pack is an “ack” package) 2-7: reserved
Data_len 3 2 Data field byte length. Low byte in the front
SEQ 5 2 Frame sequence (0 ~ 65535). Low byte in the front
CMD_ID 7 1 Command ID
DATA 8 Data_len Data
CRC16 2 CRC16 check to the complete data package. Low
byte in the front
*/
mSerialCommands.push_back({UDP_COMMAND_ID::TURN_TO_DEGREES, createByteArray({0x55, 0x66, 0x01, 0x04, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00}), "Turn to degrees"});
mSerialCommands.push_back({UDP_COMMAND_ID::TURN_TO_PIXEL, createByteArray({0x55, 0x66, 0x01, 0x04, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00}), "Turn to pixel"});
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_TO_X, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x01, 0x00, 0x0F, 0x00, 0x00}), "Zoom to X"});
mSerialCommands.push_back({UDP_COMMAND_ID::AUTO_CENTER, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x08, 0x01}), "Auto Centering"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_ATTITUDE_DATA, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0D}), "Acquire Attitude Data"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_CURRENT_ZOOM, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x18}), "Acquire current zoom"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_CAMERA_CODEC_SPECS, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x20, 0x01}), "Acquire Camera Codec Specs"});
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_MOST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x01}), "Zoom +1"});
mSerialCommands.push_back({UDP_COMMAND_ID::ZOOM_LEAST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0xFF}), "Zoom -1"});
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_MOST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01}), "Manual Focus +1"});
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_LEAST, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x06, 0xFF}), "Manual Focus -1"});
mSerialCommands.push_back({UDP_COMMAND_ID::FOCUS_AUTO, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x04, 0x01}), "Auto Focus"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_UP, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x2D}), "Rotate Up"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_DOWN, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, -0x2D}), "Rotate Down"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_RIGHT, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x2D, 0x00}), "Rotate Right"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_LEFT, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, -0x2D, 0x00}), "Rotate Left"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_STOP, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00}), "Stop rotation"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_MAX_ZOOM_VALUE, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x16}), "Acquire the Max Zoom Value"});
mSerialCommands.push_back({UDP_COMMAND_ID::TAKE_PICTURES, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x00}), "Take Pictures"});
mSerialCommands.push_back({UDP_COMMAND_ID::TAKE_VIDEO, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x02}), "Record Video"});
mSerialCommands.push_back({UDP_COMMAND_ID::ROTATE_100_100, createByteArray({0x55, 0x66, 0x01, 0x02, 0x00, 0x00, 0x00, 0x07, 0x64, 0x64}), "Rotate 100 100"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_GIMBAL_STATUS, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0A}), "Gimbal Status Information"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_HW_INFO, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02}), "Acquire Hardware ID"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_FIRMWARE_VERSION, createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01}), "Acquire Firmware Version"});
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_LOCK, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x03}), "Lock Mode"});
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_FOLLOW, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x04}), "Follow Mode"});
mSerialCommands.push_back({UDP_COMMAND_ID::MODE_FPV, createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x05}), "FPV Mode"});
mSerialCommands.push_back({UDP_COMMAND_ID::ENABLE_HDMI,
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x06}),
"Set Video Output as HDMI (Only available on A8 mini, restart to take "
"effect)"});
mSerialCommands.push_back({UDP_COMMAND_ID::ENABLE_CVBS,
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x07}),
"Set Video Output as CVBS (Only available on A8 mini, restart to take "
"effect)"});
mSerialCommands.push_back({UDP_COMMAND_ID::DISABLE_HDMI_CVBS,
createByteArray({0x55, 0x66, 0x01, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x08}),
"Turn Off both CVBS and HDMI Output (Only available on A8 mini, restart "
"to take effect)"});
mSerialCommands.push_back({UDP_COMMAND_ID::ACQUIRE_RANGE_DATA,
createByteArray({0x55, 0x66, 0x01, 0x00, 0x00, 0x00, 0x00, 0x15}),
"Read Range from Laser Rangefinder(Low byte in the front, high byte in "
"the back, available on ZT30)"});
mSerialCommands.push_back({UDP_COMMAND_ID::RUN_TARGET_LOCATION_TEST, createByteArray({0x00, 0x00}), "TEST target location calculations"});
// Sort vector by SERIAL_COMMAND_ID
std::sort(mSerialCommands.begin(), mSerialCommands.end(), [](const AiEngineServerSerialCommandStructure &a, const AiEngineServerSerialCommandStructure &b) { return a.id < b.id; });
}
QByteArray AiEngineGimbalServerUDPCommand::createByteArray(const std::initializer_list<int> &bytes)
{
QByteArray byteArray;
for (int byte : bytes) {
byteArray.append(static_cast<char>(byte));
}
return byteArray;
}
int AiEngineGimbalServerUDPCommand::getCommandIndex(UDP_COMMAND_ID commandId)
{
for (uint i = 0; i < mSerialCommands.size(); i++) {
if (mSerialCommands.at(i).id == commandId) {
return i;
}
}
return -1;
}
QByteArray AiEngineGimbalServerUDPCommand::getCommand(UDP_COMMAND_ID commandId)
{
int commandIndex = getCommandIndex(commandId);
if (commandIndex == -1) {
qCritical().noquote().nospace() << "Command not found for command: " << commandId;
}
return mSerialCommands.at(commandIndex).command;
}
@@ -1,27 +0,0 @@
/**
* This is a serial command class for Siyi Gimbal Cameras.
* Other cameras might need their own serial command class.
*/
#pragma once
#include <QByteArray>
#include <QList>
#include <QObject>
#include <QString>
#include "aienginedefinitions.h"
class AiEngineGimbalServerUDPCommand : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalServerUDPCommand(QObject *parent = nullptr);
public:
QByteArray getCommand(UDP_COMMAND_ID commandId);
private:
QByteArray createByteArray(const std::initializer_list<int> &bytes);
int getCommandIndex(UDP_COMMAND_ID commandId);
std::vector<AiEngineServerSerialCommandStructure> mSerialCommands;
};
@@ -1,101 +0,0 @@
/**
* This is a serial response class for Siyi Gimbal Cameras.
* Other cameras might need their own serial response class.
*/
#include <QDebug>
#include <QHash>
#include <QString>
#include <QVariant>
#include "aienginegimbalservercrc16.h"
#include "aienginegimbalserverudpresponse.h"
AiEngineGimbalServerUDPResponse::AiEngineGimbalServerUDPResponse(QObject *parent)
: QObject{parent}
{
}
QHash<QString, QVariant> AiEngineGimbalServerUDPResponse::getResponceValues(QByteArray response)
{
QHash<QString, QVariant> results;
if (response.size() == 0) {
qWarning().noquote().nospace() << "Response is empty, exiting...";
return results;
}
// Check response data validity
int8_t crcCheck[2];
uint8_t desiredLength = response.size() - 2;
QByteArray subData(response.data(), desiredLength);
AiEngineGimbalServerCrc16::getCRCBytes(subData, crcCheck);
int8_t crcOriginal[2];
crcOriginal[0] = response.at(response.size() - 2);
crcOriginal[1] = response.at(response.size() - 1);
// Data not OK
if (crcCheck[0] != crcOriginal[0] || crcCheck[1] != crcOriginal[1]) {
qWarning().noquote().nospace() << "Response data INVALID";
QString responseCRC = QString("0x%1,0x%2").arg(crcOriginal[0], 2, 16, QLatin1Char('0')).arg(crcOriginal[1], 2, 16, QLatin1Char('0')).toUpper();
QString recalcCRC = QString("0x%1,0x%2").arg(crcCheck[0], 2, 16, QLatin1Char('0')).arg(crcCheck[1], 2, 16, QLatin1Char('0')).toUpper();
qWarning().noquote().nospace() << responseCRC << "!=" << recalcCRC;
}
uint8_t command = response.at(MESSAGE_IDX::CMD_ID);
if (command == 0x0E) {
int16_t yaw = ((uint8_t) response.at(9) << 8) | (uint8_t) response.at(8);
int16_t pitch = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
int16_t roll = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
results.insert("yaw", (float) (yaw / 10));
results.insert("pitch", (float) (pitch / 10));
results.insert("roll", (float) (roll / 10));
} else if (command == 0x0D) {
int16_t yaw = ((uint8_t) response.at(9) << 8) | (uint8_t) response.at(8);
int16_t pitch = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
int16_t roll = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
int16_t yawSpeed = ((uint8_t) response.at(15) << 8) | (uint8_t) response.at(14);
int16_t pitchSpeed = ((uint8_t) response.at(17) << 8) | (uint8_t) response.at(16);
int16_t rollSpeed = ((uint8_t) response.at(19) << 8) | (uint8_t) response.at(18);
results.insert("yaw", (float) (yaw / 10));
results.insert("pitch", (float) (pitch / 10));
results.insert("roll", (float) (roll / 10));
results.insert("yaw_speed", (float) (yawSpeed / 10));
results.insert("pitch_speed", (float) (pitchSpeed / 10));
results.insert("roll_speed", (float) (rollSpeed / 10));
} else if (command == 0x0F) {
int8_t zoom = (int8_t) response.at(8);
results.insert("zoom", zoom);
} else if (command == 0x16 || command == 0x18) {
float zoomInt = (float) response.at(8);
float zoomFloat = (float) ((float) response.at(9) / 10);
results.insert("zoom", zoomInt + zoomFloat);
} else if (command == 0x20) {
uint16_t width = ((uint8_t) response.at(11) << 8) | (uint8_t) response.at(10);
uint16_t height = ((uint8_t) response.at(13) << 8) | (uint8_t) response.at(12);
results.insert("width", width);
results.insert("height", height);
} else if (command == 0x02) {
QByteArray hidArray;
hidArray.append((char)response.at(8));
hidArray.append((char)response.at(9));
uint8_t hid = static_cast<uint8_t>(hidArray.toUShort(nullptr, 16));
results.insert("hardware_id", hid);
} else {
qWarning().noquote().nospace() << "Getting responce values not implemented yet for command " << QString("0x%1").arg(command, 2, 16, QLatin1Char('0'));
QString responseStr;
for (int i = 0; i < response.size(); i++) {
if (i > 0) {
responseStr += ",";
}
responseStr += QString("0x%1").arg(response.at(i), 2, 16, QChar('0')).toUpper();
responseStr.replace("0X", "0x");
}
qWarning().noquote().nospace() << "Responce byte array: " << responseStr;
}
return results;
}
@@ -1,24 +0,0 @@
/**
* This is a serial response class for Siyi Gimbal Cameras.
* Other cameras might need their own serial response class.
*/
#pragma once
#include <QString>
#include <QByteArray>
#include <QObject>
enum MESSAGE_IDX { STX = 0, CTRL = 2, Data_len = 3, SEQ = 5, CMD_ID = 7, DATA = 8 };
class AiEngineGimbalServerUDPResponse : public QObject
{
Q_OBJECT
public:
explicit AiEngineGimbalServerUDPResponse(QObject *parent = nullptr);
public:
QHash<QString, QVariant> getResponceValues(QByteArray response);
};
@@ -1,31 +0,0 @@
#include <QFile>
#include <QDataStream>
#include <QDebug>
#include "aiengineimagesaver.h"
AiEngineImageSaver::AiEngineImageSaver(const cv::Mat &image, int imageNumber, QObject *parent)
: QThread(parent), image(image.clone()), imageNumber(imageNumber)
{
}
AiEngineImageSaver::~AiEngineImageSaver()
{
wait();
}
void AiEngineImageSaver::run()
{
if (image.empty() || image.channels() == 0) {
qWarning() << "AiEngineImageSaver. Empty image or no channels, nothing to save.";
return;
}
// Calculate the size of the upper half
int halfHeight = image.rows / 2;
cv::Mat upperHalf = image(cv::Rect(0, 0, image.cols, halfHeight));
// Use bpm to reduce encoding time.
QString filePath = QString("/tmp/image-%1.bmp").arg(imageNumber, 5, 10, QChar('0'));
cv::imwrite(filePath.toStdString(), upperHalf);
}
-21
View File
@@ -1,21 +0,0 @@
#pragma once
#include <QThread>
#include <QString>
#include <opencv2/opencv.hpp>
class AiEngineImageSaver : public QThread
{
Q_OBJECT
public:
AiEngineImageSaver(const cv::Mat &image, int imageNumber, QObject *parent = nullptr);
~AiEngineImageSaver();
protected:
void run() override;
private:
cv::Mat image;
int imageNumber;
};
-148
View File
@@ -1,148 +0,0 @@
#include "aiengineinference.h"
AiEngineInference::AiEngineInference(QString modelPath, QObject *parent)
: QObject{parent},
mModelPath(modelPath),
mActive(false)
{
#ifdef YOLO_ONNX
mClassNames = {
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush"
};
#else
mClassNames = {
"Armoured vehicle",
"Truck",
"Vehicle",
"Artillery",
"Shadow artillery",
"Trenches",
"Military man",
"Tyre tracks",
"Additional protection tank",
"Smoke"
};
#endif
}
bool AiEngineInference::isActive(void)
{
return mActive;
}
cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src)
{
// Calculate the aspect ratio
float aspectRatio = static_cast<float>(src.cols) / src.rows;
// Determine new size while maintaining aspect ratio
int newWidth = src.cols;
int newHeight = src.rows;
if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
if (aspectRatio > 1)
{
// Width is greater than height
newWidth = INFERENCE_SQUARE_WIDTH;
newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
}
else {
// Height is greater than or equal to width
newHeight = INFERENCE_SQUARE_HEIGHT;
newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
}
}
// Resize the original image if needed
cv::Mat resized;
cv::resize(src, resized, cv::Size(newWidth, newHeight));
// Create a new 640x640 image with a black background
cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
// Copy the resized image to the top-left corner of the new image
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
return output;
}
-50
View File
@@ -1,50 +0,0 @@
#pragma once
#include <QObject>
#include <QString>
#include <QVector>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include "aienginedefinitions.h"
const int INFERENCE_SQUARE_WIDTH = 640;
const int INFERENCE_SQUARE_HEIGHT = 640;
class AiEngineObject {
public:
AiEngineRectangle rectangle;
float propability;
int classId;
QString classStr;
};
class AiEngineInferenceResult {
public:
cv::Mat frame;
QVector<AiEngineObject> objects;
};
class AiEngineInference : public QObject
{
Q_OBJECT
public:
explicit AiEngineInference(QString modelPath, QObject *parent = nullptr);
bool isActive(void);
protected:
cv::Mat resizeAndPad(const cv::Mat& src);
QString mModelPath;
bool mActive;
int mNumber;
QVector<QString> mClassNames;
public slots:
virtual void performInferenceSlot(cv::Mat frame) = 0;
virtual void initialize(int number) = 0;
signals:
void resultsReady(AiEngineInferenceResult results);
};
@@ -1,96 +0,0 @@
#include <QDebug>
#include <QtConcurrent/QtConcurrent>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include "aienginertsplistener.h"
#include "aiengineconfig.h"
AiEngineRtspListener::AiEngineRtspListener(QObject *parent)
: QObject{parent},
mIsListening(false)
{
}
AiEngineRtspListener::~AiEngineRtspListener()
{
stopListening();
}
void AiEngineRtspListener::startListening(void)
{
mIsListening = true;
(void)QtConcurrent::run([this](){
listenLoop();
});
}
void AiEngineRtspListener::stopListening()
{
mIsListening = false;
if (mCap.isOpened()) {
mCap.release();
}
}
void AiEngineRtspListener::listenLoop(void)
{
#ifdef AI_BENCH
QThread::msleep(2000);
QString directoryPath = "/home/pama/tmp/photos";
QDir directory(directoryPath);
// Ensure the directory exists
if (!directory.exists()) {
qDebug() << "Directory does not exist!";
exit(1);
}
QStringList filters;
filters << "*.jpg" << "*.jpeg" << "*.png" << "*.bmp";
directory.setNameFilters(filters);
QFileInfoList fileInfoList = directory.entryInfoList(QDir::Files, QDir::Name);
std::sort(fileInfoList.begin(), fileInfoList.end(), [](const QFileInfo &a, const QFileInfo &b) {
return a.fileName() < b.fileName();
});
qDebug() << "Images in folder:" << fileInfoList.size();
for (const QFileInfo &fileInfo : fileInfoList) {
QString filePath = fileInfo.absoluteFilePath();
cv::Mat image = cv::imread(filePath.toStdString());
if (image.empty()) {
qDebug() << "Failed to load image";
exit(1);
}
emit frameReceived(image.clone());
QThread::msleep(1500);
}
QThread::msleep(2000);
qDebug() << "Sleep 2000ms and exit";
exit(0);
#else
qDebug() << "AiEngineRtspListener loop running in thread: " << QThread::currentThreadId();
mCap.open(rtspVideoUrl.toStdString());
cv::Mat frame;
while (mIsListening) {
mCap >> frame;
if (frame.empty() == false) {
emit frameReceived(frame.clone());
}
}
#endif
}
@@ -1,23 +0,0 @@
#pragma once
#include <QThread>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
class AiEngineRtspListener : public QObject
{
Q_OBJECT
public:
explicit AiEngineRtspListener(QObject *parent = nullptr);
~AiEngineRtspListener();
void startListening(void);
void stopListening(void);
private:
void listenLoop(void);
bool mIsListening;
cv::VideoCapture mCap;
signals:
void frameReceived(cv::Mat frame);
};
@@ -1,10 +0,0 @@
Armoured_vehicle
Truck
Vehicle
Artillery
Shadow_artillery
Tranches
Military man
Tyre_tracks
Additional_protection_tank
Smoke
@@ -1,80 +0,0 @@
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
-1
View File
@@ -1 +0,0 @@
yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640
-27
View File
@@ -1,27 +0,0 @@
#include <QCoreApplication>
#include <QDebug>
#include <QFile>
#include <opencv2/opencv.hpp>
#include "aiengine.h"
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
if (argc != 2) {
qDebug() << "\nUsage: " << argv[0] << " <model_file>";
return 1;
}
QString modelFile = argv[1];
if (QFile::exists(modelFile) == false) {
qDebug() << "\nUsage: " << modelFile << " <model_file>";
return 1;
}
AiEngine aiEngine(modelFile);
aiEngine.start();
return app.exec();
}
-82
View File
@@ -1,82 +0,0 @@
QT += core network serialport
QT -= gui
CONFIG += concurrent console c++17
MOC_DIR = moc
OBJECTS_DIR = obj
SOURCES = $$PWD/*.cpp
HEADERS = $$PWD/*.h
ai_bench {
QMAKE_CXXFLAGS += -DAI_BENCH
}
gimbal {
message("Using real gimbal camera.")
QMAKE_CXXFLAGS += -DGIMBAL
}
else {
message("Not using real gimbal camera.")
}
yolo_onnx {
message("Using YOLOv8 ONNX models for indoor testing.")
QMAKE_CXXFLAGS += -DYOLO_ONNX
}
save_images {
message("Saving inference images to /tmp.")
QMAKE_CXXFLAGS += -DSAVE_IMAGES
}
opi5 {
message("OPI5 build")
CONFIG += link_pkgconfig
PKGCONFIG += opencv4 librga stb libturbojpeg
INCLUDEPATH += /usr/include/rga
QMAKE_CXXFLAGS += -DOPI5_BUILD
LIBS += /usr/local/lib/librknnrt.so
QMAKE_RPATHDIR += /usr/local/lib
SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
HEADERS += $$PWD/src-opi5/*.h
}
else:ncnn {
message("NCNN build")
CONFIG += link_pkgconfig
PKGCONFIG += opencv4
QMAKE_CXXFLAGS += -DNCNN_BUILD -fopenmp
QMAKE_LFLAGS += -fopenmp
INCLUDEPATH += /opt/ncnn/include
LIBS += /opt/ncnn/lib/libncnn.a -lgomp
SOURCES += $$PWD/src-ncnn/*.cpp
HEADERS += $$PWD/src-ncnn/*.h
}
else:opencv {
message("OpenCV build")
message("You must use YOLOv8 ONNX files. Azaion model does not work!")
message("OpenCV must be version 4.10.0 installed to /usr/local/")
INCLUDEPATH += /opt/opencv-4.10.0/include/opencv4/
LIBS += /opt/opencv-4.10.0/lib/libopencv_world.so
QMAKE_LFLAGS += -Wl,-rpath,/opt/opencv-4.10.0/lib
QMAKE_CXXFLAGS += -DOPENCV_BUILD
SOURCES += $$PWD/src-opencv-onnx/*.cpp
HEADERS += $$PWD/src-opencv-onnx/*.h
}
else {
message("ONNX build")
message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.")
QMAKE_CXXFLAGS += -DONNX_BUILD
# ONNX-runtime
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
# OpenCV 4.10.0
INCLUDEPATH += /opt/opencv-4.10.0/include/opencv4/
QMAKE_LFLAGS += -Wl,-rpath,/opt/opencv-4.10.0/lib
LIBS += /opt/opencv-4.10.0/lib/libopencv_world.so
SOURCES += $$PWD/src-onnx-runtime/*.cpp
HEADERS += $$PWD/src-onnx-runtime/*.h
}
@@ -1,408 +0,0 @@
#include <QDebug>
#include <QThread>
#include <iostream>
#include <vector>
#include "aiengineinferencencnn.h"
const int target_size = 640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
const int num_labels = 10; // 80 object labels in COCO, 10 in Azaion
const int MAX_STRIDE = 32;
char* getCharPointerCopy(const QString& modelPath) {
QByteArray byteArray = modelPath.toUtf8();
char* cString = new char[byteArray.size() + 1]; // Allocate memory
std::strcpy(cString, byteArray.constData()); // Copy the data
return cString; // Remember to delete[] this when done!
}
AiEngineInferencevNcnn::AiEngineInferencevNcnn(QString modelPath, QObject *parent) :
AiEngineInference{modelPath, parent}
{
qDebug() << "TUOMAS AiEngineInferencevNcnn() mModelPath=" << mModelPath;
yolov8.opt.num_threads = 4;
yolov8.opt.use_vulkan_compute = false;
QString paramPath = modelPath.chopped(3).append("param");
char *model = getCharPointerCopy(modelPath);
char *param = getCharPointerCopy(paramPath);
qDebug() << "model:" << model;
qDebug() << "param:" << param;
yolov8.load_param(param);
yolov8.load_model(model);
}
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + exp(-x)));
}
static inline float clampf(float d, float min, float max)
{
const float t = d < min ? min : d;
return t > max ? max : t;
}
static void parse_yolov8_detections(
float* inputs, float confidence_threshold,
int num_channels, int num_anchors, int num_labels,
int infer_img_width, int infer_img_height,
std::vector<Object>& objects)
{
std::vector<Object> detections;
cv::Mat output = cv::Mat((int)num_channels, (int)num_anchors, CV_32F, inputs).t();
for (int i = 0; i < num_anchors; i++)
{
const float* row_ptr = output.row(i).ptr<float>();
const float* bboxes_ptr = row_ptr;
const float* scores_ptr = row_ptr + 4;
const float* max_s_ptr = std::max_element(scores_ptr, scores_ptr + num_labels);
float score = *max_s_ptr;
if (score > confidence_threshold)
{
float x = *bboxes_ptr++;
float y = *bboxes_ptr++;
float w = *bboxes_ptr++;
float h = *bboxes_ptr;
float x0 = clampf((x - 0.5f * w), 0.f, (float)infer_img_width);
float y0 = clampf((y - 0.5f * h), 0.f, (float)infer_img_height);
float x1 = clampf((x + 0.5f * w), 0.f, (float)infer_img_width);
float y1 = clampf((y + 0.5f * h), 0.f, (float)infer_img_height);
cv::Rect_<float> bbox;
bbox.x = x0;
bbox.y = y0;
bbox.width = x1 - x0;
bbox.height = y1 - y0;
Object object;
object.label = max_s_ptr - scores_ptr;
object.prob = score;
object.rect = bbox;
detections.push_back(object);
}
}
objects = detections;
}
int AiEngineInferencevNcnn::detect_yolov8(const cv::Mat& bgr, std::vector<Object>& objects)
{
/*
int img_w = bgr.cols;
int img_h = bgr.rows;
// letterbox pad to multiple of MAX_STRIDE
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
*/
int w = 640;
int h = 640;
int img_w = 640;
int img_h = 640;
float scale = 1.f;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
int wpad = (target_size + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
int hpad = (target_size + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
auto start = std::chrono::high_resolution_clock::now();
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
std::vector<Object> proposals;
// stride 32
{
ncnn::Mat out;
ex.extract("out0", out);
std::vector<Object> objects32;
parse_yolov8_detections(
(float*)out.data, prob_threshold,
out.h, out.w, num_labels,
in_pad.w, in_pad.h,
objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "Time taken: " << duration.count() << " milliseconds" << std::endl;
return 0;
}
static cv::Mat draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"Armoured vehicles",
"Truck",
"Passenger car",
"Artillery",
"Shadow of the vehicle",
"Trenches",
"Military",
"Ramps",
"Tank with additional protection",
"Smoke"
};
static const unsigned char colors[19][3] = {
{54, 67, 244},
{99, 30, 233},
{176, 39, 156},
{183, 58, 103},
{181, 81, 63},
{243, 150, 33},
{244, 169, 3},
{212, 188, 0},
{136, 150, 0},
{80, 175, 76},
{74, 195, 139},
{57, 220, 205},
{59, 235, 255},
{7, 193, 255},
{0, 152, 255},
{34, 87, 255},
{72, 85, 121},
{158, 158, 158},
{139, 125, 96}
};
int color_index = 0;
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const unsigned char* color = colors[color_index % 19];
color_index++;
cv::Scalar cc(color[0], color[1], color[2]);
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cc, 2);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cc, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));
}
return image;
}
void AiEngineInferencevNcnn::performInferenceSlot(cv::Mat frame)
{
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
std::vector<Object> objects;
detect_yolov8(scaledImage, objects);
if (objects.empty() == false) {
AiEngineInferenceResult result;
result.frame = draw_objects(scaledImage, objects);
for (uint i = 0; i < objects.size(); i++) {
const Object &detection = objects[i];
AiEngineObject object;
object.classId = detection.label;
object.classStr = mClassNames[detection.label];
object.propability = detection.prob;
object.rectangle.top = detection.rect.y;
object.rectangle.left = detection.rect.x;
object.rectangle.bottom = detection.rect.y + detection.rect.height;
object.rectangle.right = detection.rect.x + detection.rect.width;
result.objects.append(object);
}
emit resultsReady(result);
}
mActive = false;
}
void AiEngineInferencevNcnn::initialize(int number)
{
(void)number;
}
@@ -1,31 +0,0 @@
#pragma once
#include <QObject>
#include <opencv2/core.hpp>
#include <ncnn/net.h>
#include "aiengineinference.h"
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
class AiEngineInferencevNcnn : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferencevNcnn(QString modelPath, QObject *parent = nullptr);
void initialize(int number);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
int detect_yolov8(const cv::Mat& bgr, std::vector<Object>& objects);
ncnn::Net yolov8;
};
@@ -1,131 +0,0 @@
#include <QDebug>
#include <QThread>
#include <vector>
#include "aiengineinferenceonnxruntime.h"
static const float confThreshold = 0.25f;
static const float iouThreshold = 0.45f;
static const float maskThreshold = 0.45f;
AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) :
AiEngineInference{modelPath, parent},
mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold)
{
qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath;
qDebug() << "AiEngineInferencevOnnxRuntime() mClassNames.size() =" << mClassNames.size();
}
cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
int confidence = roundf(detection.conf * 100);
std::string label = mClassNames[detection.classId].toStdString() + ": " + std::to_string(confidence) + "%";
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_COMPLEX, 0.5, 1, &baseLine);
cv::Point labelOrigin(detection.box.x, detection.box.y - labelSize.height - baseLine);
cv::rectangle(
result,
labelOrigin,
cv::Point(detection.box.x + labelSize.width, detection.box.y),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(detection.box.x, detection.box.y - baseLine + 2),
cv::FONT_HERSHEY_COMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1,
cv::LINE_AA);
}
return result;
}
void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame)
{
qDebug() << __PRETTY_FUNCTION__;
try {
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
std::vector<Yolov8Result> detections = mPredictor.predict(scaledImage);
#ifdef YOLO_ONNX
// Only keep following detected objects.
// car = 2
// train = 6
// cup = 41
// banana = 46
auto it = std::remove_if(detections.begin(), detections.end(),
[](const Yolov8Result& result) {
return result.classId != 2 &&
result.classId != 6 &&
result.classId != 41 &&
result.classId != 46;
});
detections.erase(it, detections.end());
#endif
AiEngineInferenceResult result;
for (uint i = 0; i < detections.size(); i++) {
const Yolov8Result &detection = detections[i];
if (detection.classId >= mClassNames.size()) {
qDebug() << "performInferenceSlot() invalid classId =" << detection.classId;
continue;
}
else {
cv::imwrite("scaledImage.png", scaledImage);
}
// Add detected objects to the results
AiEngineObject object;
object.classId = detection.classId;
object.classStr = mClassNames[detection.classId];
object.propability = detection.conf;
object.rectangle.top = detection.box.y;
object.rectangle.left = detection.box.x;
object.rectangle.bottom = detection.box.y + detection.box.height;
object.rectangle.right = detection.box.x + detection.box.width;
result.objects.append(object);
}
if (result.objects.empty() == false) {
result.frame = drawLabels(scaledImage, detections);
emit resultsReady(result);
}
mActive = false;
}
catch (const cv::Exception& e) {
std::cout << "OpenCV exception caught: " << e.what() << std::endl;
}
catch (const std::exception& e) {
std::cout << "Standard exception caught: " << e.what() << std::endl;
}
catch (...) {
std::cout << "Unknown exception caught" << std::endl;
}
}
void AiEngineInferencevOnnxRuntime::initialize(int number)
{
(void)number;
}
@@ -1,20 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "yolov8Predictor.h"
class AiEngineInferencevOnnxRuntime : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr);
void initialize(int number);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections);
YOLOPredictor mPredictor;
};
@@ -1,167 +0,0 @@
#include "utils.h"
size_t utils::vectorProduct(const std::vector<int64_t> &vector)
{
if (vector.empty())
return 0;
size_t product = 1;
for (const auto &element : vector)
product *= element;
return product;
}
std::wstring utils::charToWstring(const char *str)
{
typedef std::codecvt_utf8<wchar_t> convert_type;
std::wstring_convert<convert_type, wchar_t> converter;
return converter.from_bytes(str);
}
std::vector<std::string> utils::loadNames(const std::string &path)
{
// load class names
std::vector<std::string> classNames;
std::ifstream infile(path);
if (infile.good())
{
std::string line;
while (getline(infile, line))
{
if (line.back() == '\r')
line.pop_back();
classNames.emplace_back(line);
}
infile.close();
}
else
{
std::cerr << "ERROR: Failed to access class name path: " << path << std::endl;
}
// set color
srand(time(0));
for (int i = 0; i < (int)(2 * classNames.size()); i++)
{
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
colors.push_back(cv::Scalar(b, g, r));
}
return classNames;
}
void utils::visualizeDetection(cv::Mat &im, std::vector<Yolov8Result> &results,
const std::vector<std::string> &classNames)
{
cv::Mat image = im.clone();
for (const Yolov8Result &result : results)
{
int x = result.box.x;
int y = result.box.y;
int conf = (int)std::round(result.conf * 100);
int classId = result.classId;
std::string label = classNames[classId] + " 0." + std::to_string(conf);
int baseline = 0;
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline);
image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
cv::rectangle(image, result.box, colors[classId], 2);
cv::rectangle(image,
cv::Point(x, y), cv::Point(x + size.width, y + 12),
colors[classId], -1);
cv::putText(image, label,
cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
0.4, cv::Scalar(0, 0, 0), 1);
}
cv::addWeighted(im, 0.4, image, 0.6, 0, im);
}
void utils::letterbox(const cv::Mat &image, cv::Mat &outImage,
const cv::Size &newShape = cv::Size(640, 640),
const cv::Scalar &color = cv::Scalar(114, 114, 114),
bool auto_ = true,
bool scaleFill = false,
bool scaleUp = true,
int stride = 32)
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height,
(float)newShape.width / (float)shape.width);
if (!scaleUp)
r = std::min(r, 1.0f);
float ratio[2]{r, r};
int newUnpad[2]{(int)std::round((float)shape.width * r),
(int)std::round((float)shape.height * r)};
auto dw = (float)(newShape.width - newUnpad[0]);
auto dh = (float)(newShape.height - newUnpad[1]);
if (auto_)
{
dw = (float)((int)dw % stride);
dh = (float)((int)dh % stride);
}
else if (scaleFill)
{
dw = 0.0f;
dh = 0.0f;
newUnpad[0] = newShape.width;
newUnpad[1] = newShape.height;
ratio[0] = (float)newShape.width / (float)shape.width;
ratio[1] = (float)newShape.height / (float)shape.height;
}
dw /= 2.0f;
dh /= 2.0f;
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
{
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
}
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
void utils::scaleCoords(cv::Rect &coords,
cv::Mat &mask,
const float maskThreshold,
const cv::Size &imageShape,
const cv::Size &imageOriginalShape)
{
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
(float)imageShape.width / (float)imageOriginalShape.width);
int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
(int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
coords.x = std::max(0, coords.x);
coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
coords.y = std::max(0, coords.y);
coords.width = (int)std::round(((float)coords.width / gain));
coords.width = std::min(coords.width, imageOriginalShape.width - coords.x);
coords.height = (int)std::round(((float)coords.height / gain));
coords.height = std::min(coords.height, imageOriginalShape.height - coords.y);
mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1]));
cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR);
mask = mask(coords) > maskThreshold;
}
template <typename T>
T utils::clip(const T &n, const T &lower, const T &upper)
{
return std::max(lower, std::min(n, upper));
}
@@ -1,38 +0,0 @@
#pragma once
#include <codecvt>
#include <fstream>
#include <opencv2/opencv.hpp>
struct Yolov8Result
{
cv::Rect box;
cv::Mat boxMask; // mask in box
float conf{};
int classId{};
};
namespace utils
{
static std::vector<cv::Scalar> colors;
size_t vectorProduct(const std::vector<int64_t> &vector);
std::wstring charToWstring(const char *str);
std::vector<std::string> loadNames(const std::string &path);
void visualizeDetection(cv::Mat &image, std::vector<Yolov8Result> &results,
const std::vector<std::string> &classNames);
void letterbox(const cv::Mat &image, cv::Mat &outImage,
const cv::Size &newShape,
const cv::Scalar &color,
bool auto_,
bool scaleFill,
bool scaleUp,
int stride);
void scaleCoords(cv::Rect &coords, cv::Mat &mask,
const float maskThreshold,
const cv::Size &imageShape, const cv::Size &imageOriginalShape);
template <typename T>
T clip(const T &n, const T &lower, const T &upper);
}
@@ -1,243 +0,0 @@
#include "yolov8Predictor.h"
YOLOPredictor::YOLOPredictor(const std::string &modelPath,
float confThreshold,
float iouThreshold,
float maskThreshold)
{
this->confThreshold = confThreshold;
this->iouThreshold = iouThreshold;
this->maskThreshold = maskThreshold;
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
sessionOptions = Ort::SessionOptions();
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
std::cout << "Inference device: CPU" << std::endl;
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
const size_t num_input_nodes = session.GetInputCount(); //==1
const size_t num_output_nodes = session.GetOutputCount(); //==1,2
if (num_output_nodes > 1)
{
this->hasMask = true;
std::cout << "Instance Segmentation" << std::endl;
}
else
std::cout << "Object Detection" << std::endl;
Ort::AllocatorWithDefaultOptions allocator;
for (int i = 0; i < (int)num_input_nodes; i++)
{
auto input_name = session.GetInputNameAllocated(i, allocator);
this->inputNames.push_back(input_name.get());
input_names_ptr.push_back(std::move(input_name));
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->inputShapes.push_back(inputTensorShape);
this->isDynamicInputShape = true;
// checking if width and height are dynamic
if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
{
std::cout << "Dynamic input shape" << std::endl;
this->isDynamicInputShape = true;
}
}
for (int i = 0; i < (int)num_output_nodes; i++)
{
auto output_name = session.GetOutputNameAllocated(i, allocator);
this->outputNames.push_back(output_name.get());
output_names_ptr.push_back(std::move(output_name));
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
this->outputShapes.push_back(outputTensorShape);
if (i == 0)
{
if (!this->hasMask)
classNums = outputTensorShape[1] - 4;
else
classNums = outputTensorShape[1] - 4 - 32;
}
}
// for (const char *x : this->inputNames)
// {
// std::cout << x << std::endl;
// }
// for (const char *x : this->outputNames)
// {
// std::cout << x << std::endl;
// }
// std::cout << classNums << std::endl;
}
void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
float &bestConf,
int &bestClassId,
const int _classNums)
{
// first 4 element are box
bestClassId = 4;
bestConf = 0;
for (int i = 4; i < _classNums + 4; i++)
{
if (it[i] > bestConf)
{
bestConf = it[i];
bestClassId = i - 4;
}
}
}
cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
const cv::Mat &maskProtos)
{
cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
cv::Mat matmul_res = (maskProposals * protos).t();
cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
cv::Mat dest;
// sigmoid
cv::exp(-masks, dest);
dest = 1.0 / (1.0 + dest);
cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
return dest;
}
void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape)
{
cv::Mat resizedImage, floatImage;
cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB);
utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
cv::Scalar(114, 114, 114), this->isDynamicInputShape,
false, true, 32);
inputTensorShape[2] = resizedImage.rows;
inputTensorShape[3] = resizedImage.cols;
resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
cv::Size floatImageSize{floatImage.cols, floatImage.rows};
// hwc -> chw
std::vector<cv::Mat> chw(floatImage.channels());
for (int i = 0; i < floatImage.channels(); ++i)
{
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
}
cv::split(floatImage, chw);
}
std::vector<Yolov8Result> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
const cv::Size &originalImageShape,
std::vector<Ort::Value> &outputTensors)
{
// for box
std::vector<cv::Rect> boxes;
std::vector<float> confs;
std::vector<int> classIds;
float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
//[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
float *output0ptr = (float *)output0.data;
int rows = (int)this->outputShapes[0][2];
int cols = (int)this->outputShapes[0][1];
// std::cout << rows << cols << std::endl;
// if hasMask
std::vector<std::vector<float>> picked_proposals;
cv::Mat mask_protos;
for (int i = 0; i < rows; i++)
{
std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
float confidence;
int classId;
this->getBestClassInfo(it.begin(), confidence, classId, classNums);
if (confidence > this->confThreshold)
{
if (this->hasMask)
{
std::vector<float> temp(it.begin() + 4 + classNums, it.end());
picked_proposals.push_back(temp);
}
int centerX = (int)(it[0]);
int centerY = (int)(it[1]);
int width = (int)(it[2]);
int height = (int)(it[3]);
int left = centerX - width / 2;
int top = centerY - height / 2;
boxes.emplace_back(left, top, width, height);
confs.emplace_back(confidence);
classIds.emplace_back(classId);
}
}
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices);
if (this->hasMask)
{
float *maskOutput = outputTensors[1].GetTensorMutableData<float>();
std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
}
std::vector<Yolov8Result> results;
for (int idx : indices)
{
Yolov8Result res;
res.box = cv::Rect(boxes[idx]);
if (this->hasMask)
res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos);
else
res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape);
res.conf = confs[idx];
res.classId = classIds[idx];
results.emplace_back(res);
}
return results;
}
std::vector<Yolov8Result> YOLOPredictor::predict(cv::Mat &image)
{
float *blob = nullptr;
std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
this->preprocessing(image, blob, inputTensorShape);
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
std::vector<Ort::Value> inputTensors;
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
inputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, inputTensorValues.data(), inputTensorSize,
inputTensorShape.data(), inputTensorShape.size()));
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
this->inputNames.data(),
inputTensors.data(),
1,
this->outputNames.data(),
this->outputNames.size());
cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]);
std::vector<Yolov8Result> result = this->postprocessing(resizedShape,
image.size(),
outputTensors);
delete[] blob;
return result;
}
@@ -1,50 +0,0 @@
#pragma once
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
#include <utility>
#include "utils.h"
class YOLOPredictor
{
public:
explicit YOLOPredictor(std::nullptr_t){};
YOLOPredictor(const std::string &modelPath,
float confThreshold,
float iouThreshold,
float maskThreshold);
// ~YOLOPredictor();
std::vector<Yolov8Result> predict(cv::Mat &image);
int classNums = 10;
private:
Ort::Env env{nullptr};
Ort::SessionOptions sessionOptions{nullptr};
Ort::Session session{nullptr};
void preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape);
std::vector<Yolov8Result> postprocessing(const cv::Size &resizedImageShape,
const cv::Size &originalImageShape,
std::vector<Ort::Value> &outputTensors);
static void getBestClassInfo(std::vector<float>::iterator it,
float &bestConf,
int &bestClassId,
const int _classNums);
cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos);
bool isDynamicInputShape{};
std::vector<const char *> inputNames;
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
std::vector<const char *> outputNames;
std::vector<Ort::AllocatedStringPtr> output_names_ptr;
std::vector<std::vector<int64_t>> inputShapes;
std::vector<std::vector<int64_t>> outputShapes;
float confThreshold = 0.3f;
float iouThreshold = 0.4f;
bool hasMask = false;
float maskThreshold = 0.5f;
};
@@ -1,67 +0,0 @@
#include <QDebug>
#include <QThread>
#include "aiengineinferenceopencvonnx.h"
AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent},
mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
{
//qDebug() << "TUOMAS test mModelPath=" << mModelPath;
//mEngine = new InferenceEngine(modelPath.toStdString());
//mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
}
void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
{
try {
qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
cv::Mat scaledImage = resizeAndPad(frame);
//cv::imwrite("/tmp/frame.png", scaledImage);
std::vector<Detection> detections = mInference.runInference(scaledImage);
AiEngineInferenceResult result;
qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
for (uint i = 0; i < detections.size(); ++i) {
const Detection &detection = detections[i];
// Add detected objects to the results
AiEngineObject object;
object.classId = detection.class_id;
object.propability = detection.confidence;
object.rectangle.top = detection.box.y;
object.rectangle.left = detection.box.x;
object.rectangle.bottom = detection.box.y + detection.box.height;
object.rectangle.right = detection.box.x + detection.box.width;
result.objects.append(object);
//qDebug() << "performInferenceSlot()" << object.rectangle.top << object.rectangle.left << "and" << object.rectangle.bottom << object.rectangle.right;
}
auto end = std::remove_if(detections.begin(), detections.end(), [](const Detection& detection) {
return detection.class_id != 0;
});
detections.erase(end, detections.end());
if (result.objects.empty() == false) {
result.frame = mInference.drawLabels(scaledImage, detections);
emit resultsReady(result);
}
mActive = false;
}
catch (const cv::Exception& e) {
std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
}
}
void AiEngineInferenceOpencvOnnx::initialize(int number)
{
(void)number;
}
@@ -1,19 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "src-opencv-onnx/inference.h"
class AiEngineInferenceOpencvOnnx : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
void initialize(int number);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
Inference mInference;
};
@@ -1,227 +0,0 @@
#include "inference.h"
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
classesPath = classesTxtFile;
cudaEnabled = runWithCuda;
std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
loadOnnxNetwork();
// loadClassesFromFile(); The classes are hard-coded for this example
}
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
cv::Mat blob;
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
bool yolov8 = true;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
/*
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
std::cout << "yolov8 = " << yolov8 << std::endl;
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
*/
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data+4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data+5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Detection> detections{};
for (unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id];
result.box = boxes[idx];
detections.push_back(result);
}
return detections;
}
void Inference::loadClassesFromFile()
{
std::ifstream inputFile(classesPath);
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
classes.push_back(classLine);
inputFile.close();
}
}
void Inference::loadOnnxNetwork()
{
printf("loadOnnxNetwork() starts\n");
net = cv::dnn::readNetFromONNX(modelPath);
if (cudaEnabled)
{
std::cout << "\nRunning on CUDA" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
}
else
{
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
std::string label = detection.className + ": " + std::to_string(detection.confidence);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.box.x, detection.box.y - labelSize.height),
cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.box.x,
detection.box.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
@@ -1,51 +0,0 @@
#pragma once
// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>
// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
int class_id{0};
std::string className{};
float confidence{0.0};
cv::Scalar color{};
cv::Rect box{};
};
class Inference
{
public:
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
std::vector<Detection> runInference(const cv::Mat &input);
cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
private:
void loadClassesFromFile();
void loadOnnxNetwork();
cv::Mat formatToSquare(const cv::Mat &source);
std::string modelPath{};
std::string classesPath{};
bool cudaEnabled{};
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
cv::Size2f modelShape{};
float modelConfidenceThreshold {0.25};
float modelScoreThreshold {0.45};
float modelNMSThreshold {0.50};
bool letterBoxForSquare = false;
cv::dnn::Net net;
};
@@ -1,175 +0,0 @@
#include <QDebug>
#include <QThread>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <iomanip>
#include "aiengineinferenceopi5.h"
#include "file_utils.h"
#include "image_drawing.h"
AiEngineInferenceOpi5::AiEngineInferenceOpi5(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent}
{
qDebug() << "AiEngineInferenceOpi5() test mModelPath=" << mModelPath;
}
void AiEngineInferenceOpi5::initialize(int number)
{
mNumber = number;
memset(&mRrknnAppCtx0, 0, sizeof(rknn_app_context_t));
init_post_process();
int ret = init_yolov8_model(mModelPath.toLocal8Bit(), &mRrknnAppCtx0);
if (ret != 0) {
qDebug() << "init_yolov8_model() failure! ret: " << ret << "modelPath = " << mModelPath << "number:" << number;
return;
}
}
AiEngineInferenceOpi5::~AiEngineInferenceOpi5()
{
deinit_post_process();
release_yolov8_model(&mRrknnAppCtx0);
}
image_buffer_t AiEngineInferenceOpi5::convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame)
{
// Convert BGR to RGB
cv::Mat rgbFrame;
cv::cvtColor(bgrFrame, rgbFrame, cv::COLOR_BGR2RGB);
image_buffer_t imgBuffer;
memset(&imgBuffer, 0, sizeof(image_buffer_t));
imgBuffer.width = rgbFrame.cols;
imgBuffer.height = rgbFrame.rows;
imgBuffer.width_stride = rgbFrame.step;
imgBuffer.height_stride = rgbFrame.rows;
imgBuffer.format = IMAGE_FORMAT_RGB888;
imgBuffer.size = rgbFrame.total() * rgbFrame.elemSize();
imgBuffer.virt_addr = new unsigned char[imgBuffer.size];
std::memcpy(imgBuffer.virt_addr, rgbFrame.data, imgBuffer.size);
return imgBuffer;
}
void AiEngineInferenceOpi5::freeImageBuffer(image_buffer_t& imgBuffer)
{
if (imgBuffer.virt_addr) {
delete[] imgBuffer.virt_addr;
imgBuffer.virt_addr = nullptr;
}
}
cv::Mat AiEngineInferenceOpi5::resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame)
{
/*
// Resize input frame to half size
cv::Mat resizedFrame;
cv::resize(inputFrame, resizedFrame, cv::Size(), 0.5, 0.5);
// Create a 640x640 frame to place the resized frame
cv::Mat outputFrame = cv::Mat::zeros(640, 640, inputFrame.type());
// Copy the resized frame to the top-left corner of the output frame
cv::Rect roi(0, 0, resizedFrame.cols, resizedFrame.rows);
resizedFrame.copyTo(outputFrame(roi));
return outputFrame;
*/
const int targetWidth = 640;
const int targetHeight = 640;
float aspectRatio = static_cast<float>(inputFrame.cols) / static_cast<float>(inputFrame.rows);
int newWidth = targetWidth;
int newHeight = static_cast<int>(targetWidth / aspectRatio);
if (newHeight > targetHeight) {
newHeight = targetHeight;
newWidth = static_cast<int>(targetHeight * aspectRatio);
}
cv::Mat resizedFrame;
cv::resize(inputFrame, resizedFrame, cv::Size(newWidth, newHeight));
cv::Mat outputFrame = cv::Mat::zeros(targetHeight, targetWidth, inputFrame.type());
cv::Rect roi(cv::Point(0, 0), resizedFrame.size());
resizedFrame.copyTo(outputFrame(roi));
return outputFrame;
}
void AiEngineInferenceOpi5::drawObjects(cv::Mat& image, const object_detect_result_list& result_list)
{
for (int i = 0; i < result_list.count; i++) {
const object_detect_result& result = result_list.results[i];
if (result.cls_id >= mClassNames.size()) {
//result.cls_id = result.cls_id % mClassNames.size();
qDebug() << "Class id >= mClassNames.size() Reducing it.";
//continue;
}
fprintf(stderr, "Inference[%d] probability = %f\n", i, result.prop * 100);
int left = result.box.left;
int top = result.box.top;
int right = result.box.right;
int bottom = result.box.bottom;
cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
// Text
char c_text[256];
//sprintf(c_text, "%s %d%%", coco_cls_to_name(result.cls_id), (int)(round(result.prop * 100)));
sprintf(c_text, "%s %d%%", mClassNames[result.cls_id % mClassNames.size()].toStdString().c_str(), (int)(round(result.prop * 100)));
cv::Point textOrg(left, top - 5);
cv::putText(image, std::string(c_text), textOrg, cv::FONT_HERSHEY_COMPLEX, 0.70, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
}
}
void AiEngineInferenceOpi5::performInferenceSlot(cv::Mat frame)
{
//qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
cv::Mat scaledFrame = resizeToHalfAndAssigntoTopLeft640x640(frame);
image_buffer_t imgBuffer = convertCV2FrameToImageBuffer(scaledFrame);
object_detect_result_list od_results;
int ret = inference_yolov8_model(&mRrknnAppCtx0, &imgBuffer, &od_results, mNumber);
freeImageBuffer(imgBuffer);
if (ret != 0) {
qDebug() << "AiEngineInferenceOpi5::performInferenceSlot() failure! ret: " << ret;
mActive = false;
return;
}
AiEngineInferenceResult result;
for (int i = 0; i < od_results.count; i++) {
object_detect_result *det_result = &(od_results.results[i]);
qDebug() << "____box:" << det_result->box.top << det_result->box.left << det_result->box.bottom << det_result->box.right << "\n";
AiEngineObject object;
object.classId = det_result->cls_id;
object.propability = det_result->prop;
object.rectangle.top = det_result->box.top;
object.rectangle.left = det_result->box.left;
object.rectangle.bottom = det_result->box.bottom;
object.rectangle.right = det_result->box.right;
result.objects.append(object);
}
drawObjects(scaledFrame, od_results);
result.frame = scaledFrame.clone();
emit resultsReady(result);
mActive = false;
}
@@ -1,26 +0,0 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "yolov8.h"
#include "image_utils.h"
class AiEngineInferenceOpi5 : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOpi5(QString modelPath, QObject *parent = nullptr);
~AiEngineInferenceOpi5();
void initialize(int number) override;
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
image_buffer_t convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame);
void freeImageBuffer(image_buffer_t& imgBuffer);
cv::Mat resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame);
void drawObjects(cv::Mat& image, const object_detect_result_list& result_list);
rknn_app_context_t mRrknnAppCtx0;
};
Binary file not shown.

Before

Width:  |  Height:  |  Size: 602 KiB

-42
View File
@@ -1,42 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_COMMON_H_
#define _RKNN_MODEL_ZOO_COMMON_H_
/**
* @brief Image pixel format
*
*/
typedef enum {
IMAGE_FORMAT_GRAY8,
IMAGE_FORMAT_RGB888,
IMAGE_FORMAT_RGBA8888,
IMAGE_FORMAT_YUV420SP_NV21,
IMAGE_FORMAT_YUV420SP_NV12,
} image_format_t;
/**
* @brief Image buffer
*
*/
typedef struct {
int width;
int height;
int width_stride;
int height_stride;
image_format_t format;
unsigned char* virt_addr;
int size;
int fd;
} image_buffer_t;
/**
* @brief Image rectangle
*
*/
typedef struct {
int left;
int top;
int right;
int bottom;
} image_rect_t;
#endif //_RKNN_MODEL_ZOO_COMMON_H_
-129
View File
@@ -1,129 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TEXT_LINE_LENGTH 1024
unsigned char* load_model(const char* filename, int* model_size)
{
FILE* fp = fopen(filename, "rb");
if (fp == NULL) {
printf("fopen %s fail!\n", filename);
return NULL;
}
fseek(fp, 0, SEEK_END);
int model_len = ftell(fp);
unsigned char* model = (unsigned char*)malloc(model_len);
fseek(fp, 0, SEEK_SET);
if (model_len != fread(model, 1, model_len, fp)) {
printf("fread %s fail!\n", filename);
free(model);
fclose(fp);
return NULL;
}
*model_size = model_len;
fclose(fp);
return model;
}
int read_data_from_file(const char *path, char **out_data)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
char *data = (char *)malloc(file_size+1);
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
fclose(fp);
return -1;
}
if(fp) {
fclose(fp);
}
*out_data = data;
return file_size;
}
int write_data_to_file(const char *path, const char *data, unsigned int size)
{
FILE *fp;
fp = fopen(path, "w");
if(fp == NULL) {
printf("open error: %s\n", path);
return -1;
}
fwrite(data, 1, size, fp);
fflush(fp);
fclose(fp);
return 0;
}
int count_lines(FILE* file)
{
int count = 0;
char ch;
while(!feof(file))
{
ch = fgetc(file);
if(ch == '\n')
{
count++;
}
}
count += 1;
rewind(file);
return count;
}
char** read_lines_from_file(const char* filename, int* line_count)
{
FILE* file = fopen(filename, "r");
if (file == NULL) {
printf("Failed to open the file.\n");
return NULL;
}
int num_lines = count_lines(file);
printf("num_lines=%d\n", num_lines);
char** lines = (char**)malloc(num_lines * sizeof(char*));
memset(lines, 0, num_lines * sizeof(char*));
char buffer[MAX_TEXT_LINE_LENGTH];
int line_index = 0;
while (fgets(buffer, sizeof(buffer), file) != NULL) {
buffer[strcspn(buffer, "\n")] = '\0'; // 移除换行符
lines[line_index] = (char*)malloc(strlen(buffer) + 1);
strcpy(lines[line_index], buffer);
line_index++;
}
fclose(file);
*line_count = num_lines;
return lines;
}
void free_lines(char** lines, int line_count)
{
for (int i = 0; i < line_count; i++) {
if (lines[i] != NULL) {
free(lines[i]);
}
}
free(lines);
}
-48
View File
@@ -1,48 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_FILE_UTILS_H_
#define _RKNN_MODEL_ZOO_FILE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Read data from file
*
* @param path [in] File path
* @param out_data [out] Read data
* @return int -1: error; > 0: Read data size
*/
int read_data_from_file(const char *path, char **out_data);
/**
* @brief Write data to file
*
* @param path [in] File path
* @param data [in] Write data
* @param size [in] Write data size
* @return int 0: success; -1: error
*/
int write_data_to_file(const char *path, const char *data, unsigned int size);
/**
* @brief Read all lines from text file
*
* @param path [in] File path
* @param line_count [out] File line count
* @return char** String array of all lines, remeber call free_lines() to release after used
*/
char** read_lines_from_file(const char* path, int* line_count);
/**
* @brief Free lines string array
*
* @param lines [in] String array
* @param line_count [in] Line count
*/
void free_lines(char** lines, int line_count);
#ifdef __cplusplus
} // extern "C"
#endif
#endif //_RKNN_MODEL_ZOO_FILE_UTILS_H_
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,89 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#define _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
// Color Format ARGB8888
#define COLOR_GREEN 0xFF00FF00
#define COLOR_BLUE 0xFF0000FF
#define COLOR_RED 0xFFFF0000
#define COLOR_YELLOW 0xFFFFFF00
#define COLOR_ORANGE 0xFFFF4500
#define COLOR_BLACK 0xFF000000
#define COLOR_WHITE 0xFFFFFFFF
/**
* @brief Draw rectangle
*
* @param image [in] Image buffer
* @param rx [in] Rectangle top left x
* @param ry [in] Rectangle top left y
* @param rw [in] Rectangle width
* @param rh [in] Rectangle height
* @param color [in] Rectangle line color
* @param thickness [in] Rectangle line thickness
*/
void draw_rectangle(image_buffer_t* image, int rx, int ry, int rw, int rh, unsigned int color,
int thickness);
/**
* @brief Draw line
*
* @param image [in] Image buffer
* @param x0 [in] Line begin point x
* @param y0 [in] Line begin point y
* @param x1 [in] Line end point x
* @param y1 [in] Line end point y
* @param color [in] Line color
* @param thickness [in] Line thickness
*/
void draw_line(image_buffer_t* image, int x0, int y0, int x1, int y1, unsigned int color,
int thickness);
/**
* @brief Draw text (only support ASCII char)
*
* @param image [in] Image buffer
* @param text [in] Text
* @param x [in] Text position x
* @param y [in] Text position y
* @param color [in] Text color
* @param fontsize [in] Text fontsize
*/
void draw_text(image_buffer_t* image, const char* text, int x, int y, unsigned int color,
int fontsize);
/**
* @brief Draw circle
*
* @param image [in] Image buffer
* @param cx [in] Circle center x
* @param cy [in] Circle center y
* @param radius [in] Circle radius
* @param color [in] Circle color
* @param thickness [in] Circle thickness
*/
void draw_circle(image_buffer_t* image, int cx, int cy, int radius, unsigned int color,
int thickness);
/**
* @brief Draw image
*
* @param image [in] Target Image buffer
* @param draw_img [in] Image for drawing
* @param x [in] Target Image draw position x
* @param y [in] Target Image draw position y
* @param rw [in] Width of image for drawing
* @param rh [in] Height of image for drawing
*/
void draw_image(image_buffer_t* image, unsigned char* draw_img, int x, int y, int rw, int rh);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
-782
View File
@@ -1,782 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <math.h>
#include <sys/time.h>
#include "im2d.h"
#include "drmrga.h"
#define STB_IMAGE_IMPLEMENTATION
#define STBI_NO_THREAD_LOCALS
#define STBI_ONLY_JPEG
#define STBI_ONLY_PNG
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "turbojpeg.h"
#include "image_utils.h"
#include "file_utils.h"
static const char* filter_image_names[] = {
"jpg",
"jpeg",
"JPG",
"JPEG",
"png",
"PNG",
"data",
NULL
};
static const char* subsampName[TJ_NUMSAMP] = {"4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1"};
static const char* colorspaceName[TJ_NUMCS] = {"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"};
static int image_file_filter(const struct dirent *entry)
{
const char ** filter;
for (filter = filter_image_names; *filter; ++filter) {
if(strstr(entry->d_name, *filter) != NULL) {
return 1;
}
}
return 0;
}
static int read_image_jpeg(const char* path, image_buffer_t* image)
{
FILE* jpegFile = NULL;
unsigned long jpegSize;
int flags = 0;
int width, height;
int origin_width, origin_height;
unsigned char* imgBuf = NULL;
unsigned char* jpegBuf = NULL;
unsigned long size;
unsigned short orientation = 1;
struct timeval tv1, tv2;
if ((jpegFile = fopen(path, "rb")) == NULL) {
printf("open input file failure\n");
}
if (fseek(jpegFile, 0, SEEK_END) < 0 || (size = ftell(jpegFile)) < 0 || fseek(jpegFile, 0, SEEK_SET) < 0) {
printf("determining input file size failure\n");
}
if (size == 0) {
printf("determining input file size, Input file contains no data\n");
}
jpegSize = (unsigned long)size;
if ((jpegBuf = (unsigned char*)malloc(jpegSize * sizeof(unsigned char))) == NULL) {
printf("allocating JPEG buffer\n");
}
if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) {
printf("reading input file");
}
fclose(jpegFile);
jpegFile = NULL;
tjhandle handle = NULL;
int subsample, colorspace;
int padding = 1;
int ret = 0;
handle = tjInitDecompress();
ret = tjDecompressHeader3(handle, jpegBuf, size, &origin_width, &origin_height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
// 对图像做裁剪16对齐,利于后续rga操作
int crop_width = origin_width / 16 * 16;
int crop_height = origin_height / 16 * 16;
printf("origin size=%dx%d crop size=%dx%d\n", origin_width, origin_height, crop_width, crop_height);
// gettimeofday(&tv1, NULL);
ret = tjDecompressHeader3(handle, jpegBuf, size, &width, &height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
printf("input image: %d x %d, subsampling: %s, colorspace: %s, orientation: %d\n",
width, height, subsampName[subsample], colorspaceName[colorspace], orientation);
int sw_out_size = width * height * 3;
unsigned char* sw_out_buf = image->virt_addr;
if (sw_out_buf == NULL) {
sw_out_buf = (unsigned char*)malloc(sw_out_size * sizeof(unsigned char));
}
if (sw_out_buf == NULL) {
printf("sw_out_buf is NULL\n");
goto out;
}
flags |= 0;
// 错误码为0时,表示警告,错误码为-1时表示错误
int pixelFormat = TJPF_RGB;
ret = tjDecompress2(handle, jpegBuf, size, sw_out_buf, width, 0, height, pixelFormat, flags);
// ret = tjDecompressToYUV2(handle, jpeg_buf, size, dst_buf, *width, padding, *height, flags);
if ((0 != tjGetErrorCode(handle)) && (ret < 0)) {
printf("error : decompress to yuv failed, errorStr:%s, errorCode:%d\n", tjGetErrorStr(),
tjGetErrorCode(handle));
goto out;
}
if ((0 == tjGetErrorCode(handle)) && (ret < 0)) {
printf("warning : errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
}
tjDestroy(handle);
// gettimeofday(&tv2, NULL);
// printf("decode time %ld ms\n", (tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000);
image->width = width;
image->height = height;
image->format = IMAGE_FORMAT_RGB888;
image->virt_addr = sw_out_buf;
image->size = sw_out_size;
out:
if (jpegBuf) {
free(jpegBuf);
}
return 0;
}
static int read_image_raw(const char* path, image_buffer_t* image)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
unsigned char *data = image->virt_addr;
if (image->virt_addr == NULL) {
data = (unsigned char *)malloc(file_size+1);
}
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
return -1;
}
if(fp) {
fclose(fp);
}
if (image->virt_addr == NULL) {
image->virt_addr = data;
image->size = file_size;
}
return 0;
}
static int write_image_jpeg(const char* path, int quality, const image_buffer_t* image)
{
int ret;
int jpegSubsamp = TJSAMP_422;
unsigned char* jpegBuf = NULL;
unsigned long jpegSize = 0;
int flags = 0;
const unsigned char* data = image->virt_addr;
int width = image->width;
int height = image->height;
int pixelFormat = TJPF_RGB;
tjhandle handle = tjInitCompress();
if (image->format == IMAGE_FORMAT_RGB888) {
ret = tjCompress2(handle, data, width, 0, height, pixelFormat, &jpegBuf, &jpegSize, jpegSubsamp, quality, flags);
} else {
printf("write_image_jpeg: pixel format %d not support\n", image->format);
return -1;
}
// printf("ret=%d jpegBuf=%p jpegSize=%d\n", ret, jpegBuf, jpegSize);
if (jpegBuf != NULL && jpegSize > 0) {
write_data_to_file(path, (const char*)jpegBuf, jpegSize);
tjFree(jpegBuf);
}
tjDestroy(handle);
return 0;
}
static int read_image_stb(const char* path, image_buffer_t* image)
{
// 默认图像为3通道
int w, h, c;
unsigned char* pixeldata = stbi_load(path, &w, &h, &c, 0);
if (!pixeldata) {
printf("error: read image %s fail\n", path);
return -1;
}
// printf("load image wxhxc=%dx%dx%d path=%s\n", w, h, c, path);
int size = w * h * c;
// 设置图像数据
if (image->virt_addr != NULL) {
memcpy(image->virt_addr, pixeldata, size);
stbi_image_free(pixeldata);
} else {
image->virt_addr = pixeldata;
}
image->width = w;
image->height = h;
if (c == 4) {
image->format = IMAGE_FORMAT_RGBA8888;
} else if (c == 1) {
image->format = IMAGE_FORMAT_GRAY8;
} else {
image->format = IMAGE_FORMAT_RGB888;
}
return 0;
}
int read_image(const char* path, image_buffer_t* image)
{
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -666;
}
if (strcmp(_ext, ".data") == 0) {
return read_image_raw(path, image);
} else if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
return read_image_jpeg(path, image);
} else {
return read_image_stb(path, image);
}
}
int write_image(const char* path, const image_buffer_t* img)
{
int ret;
int width = img->width;
int height = img->height;
int channel = 3;
void* data = img->virt_addr;
printf("write_image path: %s width=%d height=%d channel=%d data=%p\n",
path, width, height, channel, data);
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -1;
}
if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
int quality = 95;
ret = write_image_jpeg(path, quality, img);
} else if (strcmp(_ext, ".png") == 0 | strcmp(_ext, ".PNG") == 0) {
ret = stbi_write_png(path, width, height, channel, data, 0);
} else if (strcmp(_ext, ".data") == 0 | strcmp(_ext, ".DATA") == 0) {
int size = get_image_size(img);
ret = write_data_to_file(path, data, size);
} else {
// unknown extension type
return -1;
}
return ret;
}
static int crop_and_scale_image_c(int channel, unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
if (dst == NULL) {
printf("dst buffer is null\n");
return -1;
}
float x_ratio = (float)crop_width / (float)dst_box_width;
float y_ratio = (float)crop_height / (float)dst_box_height;
// printf("src_width=%d src_height=%d crop_x=%d crop_y=%d crop_width=%d crop_height=%d\n",
// src_width, src_height, crop_x, crop_y, crop_width, crop_height);
// printf("dst_width=%d dst_height=%d dst_box_x=%d dst_box_y=%d dst_box_width=%d dst_box_height=%d\n",
// dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
// printf("channel=%d x_ratio=%f y_ratio=%f\n", channel, x_ratio, y_ratio);
// 从原图指定区域取数据,双线性缩放到目标指定区域
for (int dst_y = dst_box_y; dst_y < dst_box_y + dst_box_height; dst_y++) {
for (int dst_x = dst_box_x; dst_x < dst_box_x + dst_box_width; dst_x++) {
int dst_x_offset = dst_x - dst_box_x;
int dst_y_offset = dst_y - dst_box_y;
int src_x = (int)(dst_x_offset * x_ratio) + crop_x;
int src_y = (int)(dst_y_offset * y_ratio) + crop_y;
float x_diff = (dst_x_offset * x_ratio) - (src_x - crop_x);
float y_diff = (dst_y_offset * y_ratio) - (src_y - crop_y);
int index1 = src_y * src_width * channel + src_x * channel;
int index2 = index1 + src_width * channel; // down
if (src_y == src_height - 1) {
// 如果到图像最下边缘,变成选择上面的像素
index2 = index1 - src_width * channel;
}
int index3 = index1 + 1 * channel; // right
int index4 = index2 + 1 * channel; // down right
if (src_x == src_width - 1) {
// 如果到图像最右边缘,变成选择左边的像素
index3 = index1 - 1 * channel;
index4 = index2 - 1 * channel;
}
// printf("dst_x=%d dst_y=%d dst_x_offset=%d dst_y_offset=%d src_x=%d src_y=%d x_diff=%f y_diff=%f src index=%d %d %d %d\n",
// dst_x, dst_y, dst_x_offset, dst_y_offset,
// src_x, src_y, x_diff, y_diff,
// index1, index2, index3, index4);
for (int c = 0; c < channel; c++) {
unsigned char A = src[index1+c];
unsigned char B = src[index3+c];
unsigned char C = src[index2+c];
unsigned char D = src[index4+c];
unsigned char pixel = (unsigned char)(
A * (1 - x_diff) * (1 - y_diff) +
B * x_diff * (1 - y_diff) +
C * y_diff * (1 - x_diff) +
D * x_diff * y_diff
);
dst[(dst_y * dst_width + dst_x) * channel + c] = pixel;
}
}
}
return 0;
}
static int crop_and_scale_image_yuv420sp(unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
unsigned char* src_y = src;
unsigned char* src_uv = src + src_width * src_height;
unsigned char* dst_y = dst;
unsigned char* dst_uv = dst + dst_width * dst_height;
crop_and_scale_image_c(1, src_y, src_width, src_height, crop_x, crop_y, crop_width, crop_height,
dst_y, dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
crop_and_scale_image_c(2, src_uv, src_width / 2, src_height / 2, crop_x / 2, crop_y / 2, crop_width / 2, crop_height / 2,
dst_uv, dst_width / 2, dst_height / 2, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
return 0;
}
static int convert_image_cpu(image_buffer_t *src, image_buffer_t *dst, image_rect_t *src_box, image_rect_t *dst_box, char color) {
int ret;
if (dst->virt_addr == NULL) {
return -1;
}
if (src->virt_addr == NULL) {
return -1;
}
if (src->format != dst->format) {
return -1;
}
int src_box_x = 0;
int src_box_y = 0;
int src_box_w = src->width;
int src_box_h = src->height;
if (src_box != NULL) {
src_box_x = src_box->left;
src_box_y = src_box->top;
src_box_w = src_box->right - src_box->left + 1;
src_box_h = src_box->bottom - src_box->top + 1;
}
int dst_box_x = 0;
int dst_box_y = 0;
int dst_box_w = dst->width;
int dst_box_h = dst->height;
if (dst_box != NULL) {
dst_box_x = dst_box->left;
dst_box_y = dst_box->top;
dst_box_w = dst_box->right - dst_box->left + 1;
dst_box_h = dst_box->bottom - dst_box->top + 1;
}
// fill pad color
if (dst_box_w != dst->width || dst_box_h != dst->height) {
int dst_size = get_image_size(dst);
memset(dst->virt_addr, color, dst_size);
}
int need_release_dst_buffer = 0;
int reti = 0;
if (src->format == IMAGE_FORMAT_RGB888) {
reti = crop_and_scale_image_c(3, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_RGBA8888) {
reti = crop_and_scale_image_c(4, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_GRAY8) {
reti = crop_and_scale_image_c(1, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_YUV420SP_NV12 || src->format == IMAGE_FORMAT_YUV420SP_NV12) {
reti = crop_and_scale_image_yuv420sp(src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else {
printf("no support format %d\n", src->format);
}
if (reti != 0) {
printf("convert_image_cpu fail %d\n", reti);
return -1;
}
printf("finish\n");
return 0;
}
static int get_rga_fmt(image_format_t fmt) {
switch (fmt)
{
case IMAGE_FORMAT_RGB888:
return RK_FORMAT_RGB_888;
case IMAGE_FORMAT_RGBA8888:
return RK_FORMAT_RGBA_8888;
case IMAGE_FORMAT_YUV420SP_NV12:
return RK_FORMAT_YCbCr_420_SP;
case IMAGE_FORMAT_YUV420SP_NV21:
return RK_FORMAT_YCrCb_420_SP;
default:
return -1;
}
}
int get_image_size(image_buffer_t* image)
{
if (image == NULL) {
return 0;
}
switch (image->format)
{
case IMAGE_FORMAT_GRAY8:
return image->width * image->height;
case IMAGE_FORMAT_RGB888:
return image->width * image->height * 3;
case IMAGE_FORMAT_RGBA8888:
return image->width * image->height * 4;
case IMAGE_FORMAT_YUV420SP_NV12:
case IMAGE_FORMAT_YUV420SP_NV21:
return image->width * image->height * 3 / 2;
default:
break;
}
}
static int convert_image_rga(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret = 0;
int srcWidth = src_img->width;
int srcHeight = src_img->height;
void *src = src_img->virt_addr;
int src_fd = src_img->fd;
void *src_phy = NULL;
int srcFmt = get_rga_fmt(src_img->format);
int dstWidth = dst_img->width;
int dstHeight = dst_img->height;
void *dst = dst_img->virt_addr;
int dst_fd = dst_img->fd;
void *dst_phy = NULL;
int dstFmt = get_rga_fmt(dst_img->format);
int rotate = 0;
int use_handle = 0;
#if defined(LIBRGA_IM2D_HANDLE)
use_handle = 1;
#endif
// printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// srcWidth, srcHeight, srcFmt, src, src_fd);
// printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// dstWidth, dstHeight, dstFmt, dst, dst_fd);
// printf("rotate=%d\n", rotate);
int usage = 0;
IM_STATUS ret_rga = IM_STATUS_NOERROR;
// set rga usage
usage |= rotate;
// set rga rect
im_rect srect;
im_rect drect;
im_rect prect;
memset(&prect, 0, sizeof(im_rect));
if (src_box != NULL) {
srect.x = src_box->left;
srect.y = src_box->top;
srect.width = src_box->right - src_box->left + 1;
srect.height = src_box->bottom - src_box->top + 1;
} else {
srect.x = 0;
srect.y = 0;
srect.width = srcWidth;
srect.height = srcHeight;
}
if (dst_box != NULL) {
drect.x = dst_box->left;
drect.y = dst_box->top;
drect.width = dst_box->right - dst_box->left + 1;
drect.height = dst_box->bottom - dst_box->top + 1;
} else {
drect.x = 0;
drect.y = 0;
drect.width = dstWidth;
drect.height = dstHeight;
}
// set rga buffer
rga_buffer_t rga_buf_src;
rga_buffer_t rga_buf_dst;
rga_buffer_t pat;
rga_buffer_handle_t rga_handle_src = 0;
rga_buffer_handle_t rga_handle_dst = 0;
memset(&pat, 0, sizeof(rga_buffer_t));
im_handle_param_t in_param;
in_param.width = srcWidth;
in_param.height = srcHeight;
in_param.format = srcFmt;
im_handle_param_t dst_param;
dst_param.width = dstWidth;
dst_param.height = dstHeight;
dst_param.format = dstFmt;
if (use_handle) {
if (src_phy != NULL) {
rga_handle_src = importbuffer_physicaladdr((uint64_t)src_phy, &in_param);
} else if (src_fd > 0) {
rga_handle_src = importbuffer_fd(src_fd, &in_param);
} else {
rga_handle_src = importbuffer_virtualaddr(src, &in_param);
}
if (rga_handle_src <= 0) {
printf("src handle error %d\n", rga_handle_src);
ret = -1;
goto err;
}
rga_buf_src = wrapbuffer_handle(rga_handle_src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
if (src_phy != NULL) {
rga_buf_src = wrapbuffer_physicaladdr(src_phy, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else if (src_fd > 0) {
rga_buf_src = wrapbuffer_fd(src_fd, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
rga_buf_src = wrapbuffer_virtualaddr(src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
}
}
if (use_handle) {
if (dst_phy != NULL) {
rga_handle_dst = importbuffer_physicaladdr((uint64_t)dst_phy, &dst_param);
} else if (dst_fd > 0) {
rga_handle_dst = importbuffer_fd(dst_fd, &dst_param);
} else {
rga_handle_dst = importbuffer_virtualaddr(dst, &dst_param);
}
if (rga_handle_dst <= 0) {
printf("dst handle error %d\n", rga_handle_dst);
ret = -1;
goto err;
}
rga_buf_dst = wrapbuffer_handle(rga_handle_dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
if (dst_phy != NULL) {
rga_buf_dst = wrapbuffer_physicaladdr(dst_phy, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else if (dst_fd > 0) {
rga_buf_dst = wrapbuffer_fd(dst_fd, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
rga_buf_dst = wrapbuffer_virtualaddr(dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
}
}
if (drect.width != dstWidth || drect.height != dstHeight) {
im_rect dst_whole_rect = {0, 0, dstWidth, dstHeight};
int imcolor;
char* p_imcolor = &imcolor;
p_imcolor[0] = color;
p_imcolor[1] = color;
p_imcolor[2] = color;
p_imcolor[3] = color;
printf("fill dst image (x y w h)=(%d %d %d %d) with color=0x%x\n",
dst_whole_rect.x, dst_whole_rect.y, dst_whole_rect.width, dst_whole_rect.height, imcolor);
ret_rga = imfill(rga_buf_dst, dst_whole_rect, imcolor);
if (ret_rga <= 0) {
if (dst != NULL) {
size_t dst_size = get_image_size(dst_img);
memset(dst, color, dst_size);
} else {
printf("Warning: Can not fill color on target image\n");
}
}
}
// rga process
ret_rga = improcess(rga_buf_src, rga_buf_dst, pat, srect, drect, prect, usage);
if (ret_rga <= 0) {
printf("Error on improcess STATUS=%d\n", ret_rga);
printf("RGA error message: %s\n", imStrError((IM_STATUS)ret_rga));
ret = -1;
}
err:
if (rga_handle_src > 0) {
releasebuffer_handle(rga_handle_src);
}
if (rga_handle_dst > 0) {
releasebuffer_handle(rga_handle_dst);
}
// printf("finish\n");
return ret;
}
int convert_image(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret;
printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
src_img->width, src_img->height, src_img->format, src_img->virt_addr, src_img->fd);
printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
dst_img->width, dst_img->height, dst_img->format, dst_img->virt_addr, dst_img->fd);
if (src_box != NULL) {
printf("src_box=(%d %d %d %d)\n", src_box->left, src_box->top, src_box->right, src_box->bottom);
}
if (dst_box != NULL) {
printf("dst_box=(%d %d %d %d)\n", dst_box->left, dst_box->top, dst_box->right, dst_box->bottom);
}
printf("color=0x%x\n", color);
ret = convert_image_rga(src_img, dst_img, src_box, dst_box, color);
if (ret != 0) {
printf("try convert image use cpu\n");
ret = convert_image_cpu(src_img, dst_img, src_box, dst_box, color);
}
return ret;
}
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color)
{
int ret = 0;
int allow_slight_change = 1;
int src_w = src_image->width;
int src_h = src_image->height;
int dst_w = dst_image->width;
int dst_h = dst_image->height;
int resize_w = dst_w;
int resize_h = dst_h;
int padding_w = 0;
int padding_h = 0;
int _left_offset = 0;
int _top_offset = 0;
float scale = 1.0;
image_rect_t src_box;
src_box.left = 0;
src_box.top = 0;
src_box.right = src_image->width - 1;
src_box.bottom = src_image->height - 1;
image_rect_t dst_box;
dst_box.left = 0;
dst_box.top = 0;
dst_box.right = dst_image->width - 1;
dst_box.bottom = dst_image->height - 1;
float _scale_w = (float)dst_w / src_w;
float _scale_h = (float)dst_h / src_h;
if(_scale_w < _scale_h) {
scale = _scale_w;
resize_h = (int) src_h*scale;
} else {
scale = _scale_h;
resize_w = (int) src_w*scale;
}
// slight change image size for align
if (allow_slight_change == 1 && (resize_w % 4 != 0)) {
resize_w -= resize_w % 4;
}
if (allow_slight_change == 1 && (resize_h % 2 != 0)) {
resize_h -= resize_h % 2;
}
// padding
padding_h = dst_h - resize_h;
padding_w = dst_w - resize_w;
// center
if (_scale_w < _scale_h) {
dst_box.top = padding_h / 2;
if (dst_box.top % 2 != 0) {
dst_box.top -= dst_box.top % 2;
if (dst_box.top < 0) {
dst_box.top = 0;
}
}
dst_box.bottom = dst_box.top + resize_h - 1;
_top_offset = dst_box.top;
} else {
dst_box.left = padding_w / 2;
if (dst_box.left % 2 != 0) {
dst_box.left -= dst_box.left % 2;
if (dst_box.left < 0) {
dst_box.left = 0;
}
}
dst_box.right = dst_box.left + resize_w - 1;
_left_offset = dst_box.left;
}
printf("scale=%f dst_box=(%d %d %d %d) allow_slight_change=%d _left_offset=%d _top_offset=%d padding_w=%d padding_h=%d\n",
scale, dst_box.left, dst_box.top, dst_box.right, dst_box.bottom, allow_slight_change,
_left_offset, _top_offset, padding_w, padding_h);
//set offset and scale
if(letterbox != NULL){
letterbox->scale = scale;
letterbox->x_pad = _left_offset;
letterbox->y_pad = _top_offset;
}
// alloc memory buffer for dst image,
// remember to free
if (dst_image->virt_addr == NULL && dst_image->fd <= 0) {
int dst_size = get_image_size(dst_image);
dst_image->virt_addr = (uint8_t *)malloc(dst_size);
if (dst_image->virt_addr == NULL) {
printf("malloc size %d error\n", dst_size);
return -1;
}
}
ret = convert_image(src_image, dst_image, &src_box, &dst_box, color);
return ret;
}
@@ -1,73 +0,0 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#define _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
/**
* @brief LetterBox
*
*/
typedef struct {
int x_pad;
int y_pad;
float scale;
} letterbox_t;
/**
* @brief Read image file (support png/jpeg/bmp)
*
* @param path [in] Image path
* @param image [out] Read image
* @return int 0: success; -1: error
*/
int read_image(const char* path, image_buffer_t* image);
/**
* @brief Write image file (support jpg/png)
*
* @param path [in] Image path
* @param image [in] Image for write (only support IMAGE_FORMAT_RGB888)
* @return int 0: success; -1: error
*/
int write_image(const char* path, const image_buffer_t* image);
/**
* @brief Convert image for resize and pixel format change
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param src_box [in] Crop rectangle on source image
* @param dst_box [in] Crop rectangle on target image
* @param color [in] Pading color if dst_box can not fill target image
* @return int
*/
int convert_image(image_buffer_t* src_image, image_buffer_t* dst_image, image_rect_t* src_box, image_rect_t* dst_box, char color);
/**
* @brief Convert image with letterbox
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param letterbox [out] Letterbox
* @param color [in] Fill color on target image
* @return int
*/
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color);
/**
* @brief Get the image size
*
* @param image [in] Image
* @return int image size
*/
int get_image_size(image_buffer_t* image);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
-500
View File
@@ -1,500 +0,0 @@
// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "yolov8.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <set>
#include <vector>
#ifdef OPI5_BUILD
#define LABEL_NALE_TXT_PATH "azaion_10_labels_list.txt"
#else
#define LABEL_NALE_TXT_PATH "coco_80_labels_list.txt"
#endif
static char *labels[OBJ_CLASS_NUM];
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
static char *readLine(FILE *fp, char *buffer, int *len)
{
int ch;
int i = 0;
size_t buff_len = 0;
buffer = (char *)malloc(buff_len + 1);
if (!buffer)
return NULL; // Out of memory
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
{
buff_len++;
void *tmp = realloc(buffer, buff_len + 1);
if (tmp == NULL)
{
free(buffer);
return NULL; // Out of memory
}
buffer = (char *)tmp;
buffer[i] = (char)ch;
i++;
}
buffer[i] = '\0';
*len = buff_len;
// Detect end
if (ch == EOF && (i == 0 || ferror(fp)))
{
free(buffer);
return NULL;
}
return buffer;
}
static int readLines(const char *fileName, char *lines[], int max_line)
{
FILE *file = fopen(fileName, "r");
char *s;
int i = 0;
int n = 0;
if (file == NULL)
{
printf("Open %s fail!\n", fileName);
return -1;
}
while ((s = readLine(file, s, &n)) != NULL)
{
lines[i++] = s;
if (i >= max_line)
break;
}
fclose(file);
return i;
}
static int loadLabelName(const char *locationFilename, char *label[])
{
printf("load lable %s\n", locationFilename);
readLines(locationFilename, label, OBJ_CLASS_NUM);
return 0;
}
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
float ymax1)
{
float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
float i = w * h;
float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
return u <= 0.f ? 0.f : (i / u);
}
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
int filterId, float threshold)
{
for (int i = 0; i < validCount; ++i)
{
if (order[i] == -1 || classIds[i] != filterId)
{
continue;
}
int n = order[i];
for (int j = i + 1; j < validCount; ++j)
{
int m = order[j];
if (m == -1 || classIds[i] != filterId)
{
continue;
}
float xmin0 = outputLocations[n * 4 + 0];
float ymin0 = outputLocations[n * 4 + 1];
float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
float xmin1 = outputLocations[m * 4 + 0];
float ymin1 = outputLocations[m * 4 + 1];
float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
if (iou > threshold)
{
order[j] = -1;
}
}
}
return 0;
}
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
{
float key;
int key_index;
int low = left;
int high = right;
if (left < right)
{
key_index = indices[left];
key = input[left];
while (low < high)
{
while (low < high && input[high] <= key)
{
high--;
}
input[low] = input[high];
indices[low] = indices[high];
while (low < high && input[low] >= key)
{
low++;
}
input[high] = input[low];
indices[high] = indices[low];
}
input[low] = key;
indices[low] = key_index;
quick_sort_indice_inverse(input, left, low - 1, indices);
quick_sort_indice_inverse(input, low + 1, right, indices);
}
return low;
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
inline static int32_t __clip(float val, float min, float max)
{
float f = val <= min ? min : (val >= max ? max : val);
return f;
}
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
{
float dst_val = (f32 / scale) + zp;
int8_t res = (int8_t)__clip(dst_val, -128, 127);
return res;
}
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
void compute_dfl(float* tensor, int dfl_len, float* box){
for (int b=0; b<4; b++){
float exp_t[dfl_len];
float exp_sum=0;
float acc_sum=0;
for (int i=0; i< dfl_len; i++){
exp_t[i] = exp(tensor[i+b*dfl_len]);
exp_sum += exp_t[i];
}
for (int i=0; i< dfl_len; i++){
acc_sum += exp_t[i]/exp_sum *i;
}
box[b] = acc_sum;
}
}
static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
int8_t *score_tensor, int32_t score_zp, float score_scale,
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < score_sum_thres_i8){
continue;
}
}
int8_t max_score = -score_zp;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> score_thres_i8){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < threshold){
continue;
}
}
float max_score = 0;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> threshold){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = box_tensor[offset];
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(max_score);
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
{
std::vector<float> filterBoxes;
std::vector<float> objProbs;
std::vector<int> classId;
int validCount = 0;
int stride = 0;
int grid_h = 0;
int grid_w = 0;
int model_in_w = app_ctx->model_width;
int model_in_h = app_ctx->model_height;
memset(od_results, 0, sizeof(object_detect_result_list));
// default 3 branch
int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
int output_per_branch = app_ctx->io_num.n_output / 3;
for (int i = 0; i < 3; i++)
{
void *score_sum = nullptr;
int32_t score_sum_zp = 0;
float score_sum_scale = 1.0;
if (output_per_branch == 3){
score_sum = outputs[i*output_per_branch + 2].buf;
score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
}
int box_idx = i*output_per_branch;
int score_idx = i*output_per_branch + 1;
grid_h = app_ctx->output_attrs[box_idx].dims[2];
grid_w = app_ctx->output_attrs[box_idx].dims[3];
stride = model_in_h / grid_h;
if (app_ctx->is_quant)
{
validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
(int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
else
{
validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
}
// no object detect
if (validCount <= 0)
{
return 0;
}
std::vector<int> indexArray;
for (int i = 0; i < validCount; ++i)
{
indexArray.push_back(i);
}
quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
std::set<int> class_set(std::begin(classId), std::end(classId));
for (auto c : class_set)
{
nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
}
int last_count = 0;
od_results->count = 0;
/* box valid detect target */
for (int i = 0; i < validCount; ++i)
{
if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE)
{
continue;
}
int n = indexArray[i];
float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
float x2 = x1 + filterBoxes[n * 4 + 2];
float y2 = y1 + filterBoxes[n * 4 + 3];
int id = classId[n];
float obj_conf = objProbs[i];
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].prop = obj_conf;
od_results->results[last_count].cls_id = id;
last_count++;
}
od_results->count = last_count;
return 0;
}
int init_post_process()
{
int ret = 0;
ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
if (ret < 0)
{
printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
return -1;
}
return 0;
}
char *coco_cls_to_name(int cls_id)
{
if (cls_id >= OBJ_CLASS_NUM)
{
return "null";
}
if (labels[cls_id])
{
return labels[cls_id];
}
return "null";
}
void deinit_post_process()
{
for (int i = 0; i < OBJ_CLASS_NUM; i++)
{
if (labels[i] != nullptr)
{
free(labels[i]);
labels[i] = nullptr;
}
}
}
@@ -1,36 +0,0 @@
#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#include <stdint.h>
#include <vector>
#include "rknn_api.h"
#include "common.h"
#include "image_utils.h"
#define OBJ_NAME_MAX_SIZE 64
#define OBJ_NUMB_MAX_SIZE 128
#define OBJ_CLASS_NUM 80
#define NMS_THRESH 0.45
#define BOX_THRESH 0.25
// class rknn_app_context_t;
typedef struct {
image_rect_t box;
float prop;
int cls_id;
} object_detect_result;
typedef struct {
int id;
int count;
object_detect_result results[OBJ_NUMB_MAX_SIZE];
} object_detect_result_list;
int init_post_process();
void deinit_post_process();
char *coco_cls_to_name(int cls_id);
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results);
void deinitPostProcess();
#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
-273
View File
@@ -1,273 +0,0 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "yolov8.h"
#include "common.h"
#include "file_utils.h"
#include "image_utils.h"
static void dump_tensor_attr(rknn_tensor_attr *attr)
{
printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
"zp=%d, scale=%f\n",
attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
}
int init_yolov8_model(const char *model_path, rknn_app_context_t *app_ctx)
{
int ret;
int model_len = 0;
char *model;
rknn_context ctx = 0;
// Load RKNN Model
model_len = read_data_from_file(model_path, &model);
if (model == NULL)
{
printf("load_model fail!\n");
return -1;
}
ret = rknn_init(&ctx, model, model_len, 0, NULL);
free(model);
if (ret < 0)
{
printf("rknn_init fail! ret=%d\n", ret);
return -1;
}
// Get Model Input Output Number
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
// Get Model Input Info
printf("input tensors:\n");
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs[i]));
}
// Get Model Output Info
printf("output tensors:\n");
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(output_attrs[i]));
}
// Set to context
app_ctx->rknn_ctx = ctx;
// TODO
if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type == RKNN_TENSOR_INT8)
{
app_ctx->is_quant = true;
}
else
{
app_ctx->is_quant = false;
}
app_ctx->io_num = io_num;
app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
app_ctx->model_channel = input_attrs[0].dims[1];
app_ctx->model_height = input_attrs[0].dims[2];
app_ctx->model_width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
app_ctx->model_height = input_attrs[0].dims[1];
app_ctx->model_width = input_attrs[0].dims[2];
app_ctx->model_channel = input_attrs[0].dims[3];
}
printf("model input height=%d, width=%d, channel=%d\n",
app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
return 0;
}
int release_yolov8_model(rknn_app_context_t *app_ctx)
{
if (app_ctx->rknn_ctx != 0)
{
rknn_destroy(app_ctx->rknn_ctx);
app_ctx->rknn_ctx = 0;
}
if (app_ctx->input_attrs != NULL)
{
free(app_ctx->input_attrs);
app_ctx->input_attrs = NULL;
}
if (app_ctx->output_attrs != NULL)
{
free(app_ctx->output_attrs);
app_ctx->output_attrs = NULL;
}
return 0;
}
int inference_yolov8_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results, int core)
{
int ret;
image_buffer_t dst_img;
letterbox_t letter_box;
rknn_input inputs[app_ctx->io_num.n_input];
rknn_output outputs[app_ctx->io_num.n_output];
const float nms_threshold = NMS_THRESH; // 默认的NMS阈值
const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
int bg_color = 114;
if ((!app_ctx) || !(img) || (!od_results))
{
return -1;
}
memset(od_results, 0x00, sizeof(*od_results));
memset(&letter_box, 0, sizeof(letterbox_t));
memset(&dst_img, 0, sizeof(image_buffer_t));
memset(inputs, 0, sizeof(inputs));
memset(outputs, 0, sizeof(outputs));
// Pre Process
dst_img.width = app_ctx->model_width;
dst_img.height = app_ctx->model_height;
dst_img.format = IMAGE_FORMAT_RGB888;
dst_img.size = get_image_size(&dst_img);
dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
if (dst_img.virt_addr == NULL)
{
printf("malloc buffer size:%d fail!\n", dst_img.size);
return -1;
}
// letterbox
ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
if (ret < 0)
{
printf("convert_image_with_letterbox fail! ret=%d\n", ret);
return -1;
}
// Set Input Data
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
inputs[0].buf = dst_img.virt_addr;
ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
if (ret < 0)
{
printf("rknn_input_set fail! ret=%d\n", ret);
return -1;
}
if (core == 1) {
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_0);
//ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_0_1_2);
if (ret < 0) {
printf("rknn_set_core_mask(RKNN_NPU_CORE_0) fail! ret=%d\n", ret);
return -1;
}
}
else if (core == 2) {
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_1);
if (ret < 0) {
printf("rknn_set_core_mask(RKNN_NPU_CORE_1) fail! ret=%d\n", ret);
return -1;
}
}
else if (core == 3) {
ret = rknn_set_core_mask(app_ctx->rknn_ctx, RKNN_NPU_CORE_2);
if (ret < 0) {
printf("rknn_set_core_mask(RKNN_NPU_CORE_1) fail! ret=%d\n", ret);
return -1;
}
}
// Run
printf("rknn_run\n");
ret = rknn_run(app_ctx->rknn_ctx, nullptr);
if (ret < 0)
{
printf("rknn_run fail! ret=%d\n", ret);
return -1;
}
// Get Output
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < app_ctx->io_num.n_output; i++)
{
outputs[i].index = i;
outputs[i].want_float = (!app_ctx->is_quant);
}
ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
if (ret < 0)
{
printf("rknn_outputs_get fail! ret=%d\n", ret);
goto out;
}
// Post Process
post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
// Remeber to release rknn output
rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
out:
if (dst_img.virt_addr != NULL)
{
free(dst_img.virt_addr);
}
return ret;
}
-42
View File
@@ -1,42 +0,0 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _RKNN_DEMO_YOLOV8_H_
#define _RKNN_DEMO_YOLOV8_H_
#include "rknn_api.h"
#include "common.h"
typedef struct {
rknn_context rknn_ctx;
rknn_input_output_num io_num;
rknn_tensor_attr* input_attrs;
rknn_tensor_attr* output_attrs;
int model_channel;
int model_width;
int model_height;
bool is_quant;
} rknn_app_context_t;
#include "postprocess.h"
int init_yolov8_model(const char* model_path, rknn_app_context_t* app_ctx);
int release_yolov8_model(rknn_app_context_t* app_ctx);
int inference_yolov8_model(rknn_app_context_t* app_ctx, image_buffer_t* img, object_detect_result_list* od_results, int core);
#endif //_RKNN_DEMO_YOLOV8_H_
+39
View File
@@ -0,0 +1,39 @@
#include <QCoreApplication>
#include <QUdpSocket>
#include <QHostAddress>
#include <QThread>
#include <QJsonObject>
#include <QJsonDocument>
int main(int argc, char *argv[]) {
QCoreApplication a(argc, argv);
if (argc != 2) {
qDebug() << "Give port number of server as only parameter";
return 1;
}
QUdpSocket udpSocket;
quint16 receiverPort = atoi(argv[1]);
for (int i = 0; i < 10000000 ; i++) {
QJsonObject json;
json["value"] = i;
QJsonDocument jsonDoc(json);
QByteArray datagram = jsonDoc.toJson();
qint64 bytesSent = udpSocket.writeDatagram(datagram, QHostAddress("127.0.0.1"), receiverPort);
if (bytesSent == -1) {
qWarning("Failed to send the datagram: %s", qPrintable(udpSocket.errorString()));
return 1;
}
else {
qDebug("Datagram sent successfully");
}
QThread::msleep(100);
}
return 0;
}
+6
View File
@@ -0,0 +1,6 @@
QT = core network
CONFIG += c++17 cmdline
SOURCES += \
main.cpp
+9
View File
@@ -0,0 +1,9 @@
#include <QCoreApplication>
#include "udpserver.h"
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
UdpServer server(5678);
return a.exec();
}
+19
View File
@@ -0,0 +1,19 @@
QT = core network
CONFIG += c++17 cmdline
# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp \
udpserver.cpp
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target
HEADERS += \
udpserver.h
+42
View File
@@ -0,0 +1,42 @@
#include <QJsonObject>
#include <QJsonDocument>
#include "udpserver.h"
UdpServer::UdpServer(int portNumber, QObject *parent)
: QObject{parent}
{
mUdpSocket = new QUdpSocket(this);
connect(mUdpSocket, &QUdpSocket::readyRead, this, &UdpServer::readPendingDatagrams);
mUdpSocket->bind(QHostAddress::Any, portNumber);
}
void UdpServer::readPendingDatagrams()
{
while (mUdpSocket->hasPendingDatagrams()) {
QByteArray datagram;
datagram.resize(mUdpSocket->pendingDatagramSize());
mUdpSocket->readDatagram(datagram.data(), datagram.size());
QJsonParseError jsonError;
QJsonDocument jsonDoc = QJsonDocument::fromJson(datagram, &jsonError);
if (jsonError.error != QJsonParseError::NoError) {
qWarning() << "Error parsing JSON:" << jsonError.errorString();
continue;
}
if (jsonDoc.isObject()) {
emit newJsonDocument(jsonDoc);
}
// Test code to see that valid value was in the doc.
QJsonObject jsonObj = jsonDoc.object();
if (jsonObj.contains("value") && jsonObj["value"].isDouble()) {
int value = jsonObj["value"].toInt();
qDebug() << "Received value:" << value;
}
else {
qWarning() << "Invalid or missing 'value' field in JSON";
}
}
}
+21
View File
@@ -0,0 +1,21 @@
#pragma once
#include <QObject>
#include <QUdpSocket>
#include <QJsonDocument>
class UdpServer : public QObject
{
Q_OBJECT
public:
explicit UdpServer(int portNumber, QObject *parent = nullptr);
private slots:
void readPendingDatagrams();
private:
QUdpSocket *mUdpSocket;
signals:
void newJsonDocument(QJsonDocument);
};
+61
View File
@@ -0,0 +1,61 @@
cmake_minimum_required(VERSION 3.10)
# Set the project name in a variable
set(project_name yolov10_cpp)
project(${project_name})
set(CMAKE_CXX_STANDARD 17)
find_package(OpenCV REQUIRED)
# Find ONNX Runtime package
find_path(ONNXRUNTIME_INCLUDE_DIR onnxruntime_c_api.h
HINTS /opt/onnxruntime-linux-x64-rocm-1.18.0/include
)
find_library(ONNXRUNTIME_LIBRARY onnxruntime
HINTS /opt/onnxruntime-linux-x64-rocm-1.18.0/lib
)
if(NOT ONNXRUNTIME_INCLUDE_DIR)
message(FATAL_ERROR "ONNX Runtime include directory not found")
endif()
if(NOT ONNXRUNTIME_LIBRARY)
message(FATAL_ERROR "ONNX Runtime library not found")
endif()
add_library(${project_name}-lib
src/placeholder.cpp
src/ia/inference.cpp
src/ia/inference.h
)
target_include_directories(${project_name}-lib PUBLIC src)
target_include_directories(${project_name}-lib PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
target_link_libraries(${project_name}-lib
PUBLIC ${OpenCV_LIBS}
PUBLIC ${ONNXRUNTIME_LIBRARY}
)
# Add the main executable
add_executable(${project_name}
./src/main.cpp
)
target_include_directories(${project_name} PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
target_link_libraries(${project_name} ${project_name}-lib)
# Add the video executable
add_executable(${project_name}_video
./src/video.cpp
)
# Add the video executable
add_executable(${project_name}_video_rtsp
./src/video_rtsp.cpp
)
target_include_directories(${project_name}_video PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
target_link_libraries(${project_name}_video ${project_name}-lib)
target_include_directories(${project_name}_video_rtsp PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
target_link_libraries(${project_name}_video_rtsp ${project_name}-lib)
@@ -0,0 +1,200 @@
#include "inference.h"
#include <algorithm>
#include <iostream>
const std::vector<std::string> InferenceEngine::CLASS_NAMES = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"};
InferenceEngine::InferenceEngine(const std::string &model_path)
: env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"),
session_options(),
session(env, model_path.c_str(), session_options),
input_shape{1, 3, 640, 640}
{
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
}
InferenceEngine::~InferenceEngine() {}
/*
* Function to preprocess the image
*
* @param image_path: path to the image
* @param orig_width: original width of the image
* @param orig_height: original height of the image
*
* @return: vector of floats representing the preprocessed image
*/
std::vector<float> InferenceEngine::preprocessImage(const cv::Mat &image)
{
if (image.empty())
{
throw std::runtime_error("Could not read the image");
}
cv::Mat resized_image;
cv::resize(image, resized_image, cv::Size(input_shape[2], input_shape[3]));
resized_image.convertTo(resized_image, CV_32F, 1.0 / 255);
std::vector<cv::Mat> channels(3);
cv::split(resized_image, channels);
std::vector<float> input_tensor_values;
for (int c = 0; c < 3; ++c)
{
input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]);
}
return input_tensor_values;
}
/*
* Function to filter the detections based on the confidence threshold
*
* @param results: vector of floats representing the output tensor
* @param confidence_threshold: minimum confidence threshold
* @param img_width: width of the input image
* @param img_height: height of the input image
* @param orig_width: original width of the image
* @param orig_height: original height of the image
*
* @return: vector of Detection objects
*/
std::vector<Detection> InferenceEngine::filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height)
{
std::vector<Detection> detections;
const int num_detections = results.size() / 6;
for (int i = 0; i < num_detections; ++i)
{
float left = results[i * 6 + 0];
float top = results[i * 6 + 1];
float right = results[i * 6 + 2];
float bottom = results[i * 6 + 3];
float confidence = results[i * 6 + 4];
int class_id = results[i * 6 + 5];
if (confidence >= confidence_threshold)
{
int x = static_cast<int>(left * orig_width / img_width);
int y = static_cast<int>(top * orig_height / img_height);
int width = static_cast<int>((right - left) * orig_width / img_width);
int height = static_cast<int>((bottom - top) * orig_height / img_height);
detections.push_back(
{confidence,
cv::Rect(x, y, width, height),
class_id,
CLASS_NAMES[class_id]});
}
}
return detections;
}
/*
* Function to run inference
*
* @param input_tensor_values: vector of floats representing the input tensor
*
* @return: vector of floats representing the output tensor
*/
std::vector<float> InferenceEngine::runInference(const std::vector<float> &input_tensor_values)
{
Ort::AllocatorWithDefaultOptions allocator;
std::string input_name = getInputName();
std::string output_name = getOutputName();
const char *input_name_ptr = input_name.c_str();
const char *output_name_ptr = output_name.c_str();
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float *>(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size());
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1);
float *floatarr = output_tensors[0].GetTensorMutableData<float>();
size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
return std::vector<float>(floatarr, floatarr + output_tensor_size);
}
/*
* Function to draw the labels on the image
*
* @param image: input image
* @param detections: vector of Detection objects
*
* @return: image with labels drawn
*/
cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Detection> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2);
std::string label = detection.class_name + ": " + std::to_string(detection.confidence);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height),
cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.bbox.x,
detection.bbox.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
/*
* Function to get the input name
*
* @return: name of the input tensor
*/
std::string InferenceEngine::getInputName()
{
Ort::AllocatorWithDefaultOptions allocator;
Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator);
return std::string(name_allocator.get());
}
/*
* Function to get the output name
*
* @return: name of the output tensor
*/
std::string InferenceEngine::getOutputName()
{
Ort::AllocatorWithDefaultOptions allocator;
Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator);
return std::string(name_allocator.get());
}
@@ -0,0 +1,44 @@
#ifndef INFERENCE_H
#define INFERENCE_H
#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <vector>
#include <string>
struct Detection
{
float confidence;
cv::Rect bbox;
int class_id;
std::string class_name;
};
class InferenceEngine
{
public:
InferenceEngine(const std::string &model_path);
~InferenceEngine();
std::vector<float> preprocessImage(const cv::Mat &image);
std::vector<Detection> filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height);
std::vector<float> runInference(const std::vector<float> &input_tensor_values);
cv::Mat draw_labels(const cv::Mat &image, const std::vector<Detection> &detections);
std::vector<int64_t> input_shape;
private:
Ort::Env env;
Ort::SessionOptions session_options;
Ort::Session session;
std::string getInputName();
std::string getOutputName();
static const std::vector<std::string> CLASS_NAMES;
};
#endif // INFERENCE_H
+44
View File
@@ -0,0 +1,44 @@
#include "./ia/inference.h"
#include <iostream>
#include <opencv2/opencv.hpp>
int main(int argc, char *argv[])
{
if (argc != 3)
{
std::cerr << "Usage: " << argv[0] << " <model_path> <image_path>" << std::endl;
return 1;
}
std::string model_path = argv[1];
std::string image_path = argv[2];
try
{
InferenceEngine engine(model_path);
cv::Mat image = cv::imread(image_path);
int orig_width = image.cols;
int orig_height = image.rows;
std::vector<float> input_tensor_values = engine.preprocessImage(image );
std::vector<float> results = engine.runInference(input_tensor_values);
float confidence_threshold = 0.5;
std::vector<Detection> detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
cv::Mat output = engine.draw_labels(image, detections);
cv::imwrite("result.jpg", output);
}
catch (const std::exception &e)
{
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
@@ -0,0 +1,2 @@
// src/placeholder.cpp
void placeholder_function() {}
+71
View File
@@ -0,0 +1,71 @@
#include "./ia/inference.h"
#include <iostream>
#include <opencv2/opencv.hpp>
int main(int argc, char const *argv[])
{
if (argc != 3)
{
std::cerr << "Usage: " << argv[0] << " <model_path> <source>" << std::endl;
return 1;
}
std::string model_path = argv[1];
auto source = argv[2]; // 0 for webcam, 1 for video file
int apiID = cv::CAP_ANY; // 0 = autodetect default API
cv::namedWindow("yolov10", cv::WINDOW_AUTOSIZE);
InferenceEngine engine(model_path);
cv::VideoCapture cap(source);
//cap.open(source, apiID);
if (!cap.isOpened())
{
std::cerr << "ERROR! Unable to open video\n";
return -1;
}
cv::Mat frame;
std::cout << "Start grabbing" << std::endl
<< "Press any key to terminate" << std::endl;
for (;;)
{
cap.read(frame);
if (frame.empty())
{
std::cerr << "ERROR! blank frame grabbed\n";
continue;
}
int orig_width = frame.cols;
int orig_height = frame.rows;
auto timer = cv::getTickCount();
std::vector<float> input_tensor_values = engine.preprocessImage(frame);
std::vector<float> results = engine.runInference(input_tensor_values);
float confidence_threshold = 0.3;
std::vector<Detection> detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
double fps = cv::getTickFrequency() / ((double)cv::getTickCount() - timer);
cv::putText(frame, "FPS: " + std::to_string(fps), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8);
cv::Mat output = engine.draw_labels(frame, detections);
cv::imshow("test", output);
if (cv::waitKey(5) >= 0)
break;
}
return 0;
}
@@ -0,0 +1,73 @@
#include "./ia/inference.h"
#include <iostream>
#include <opencv2/opencv.hpp>
int main(int argc, char const *argv[])
{
if (argc != 2)
{
std::cerr << "Usage: " << argv[0] << " <model_path>" << std::endl;
return 1;
}
std::string model_path = argv[1];
std::string rtsp_url = "rtsp://localhost:8554/live.stream";
auto source = argv[2]; // 0 for webcam, 1 for video file
int apiID = cv::CAP_ANY; // 0 = autodetect default API
cv::namedWindow("yolov10", cv::WINDOW_AUTOSIZE);
InferenceEngine engine(model_path);
cv::VideoCapture cap(rtsp_url);
//cap.open(source, apiID);
if (!cap.isOpened())
{
std::cerr << "ERROR! Unable to open video\n";
return -1;
}
cv::Mat frame;
std::cout << "Start grabbing" << std::endl
<< "Press any key to terminate" << std::endl;
for (;;)
{
cap.read(frame);
if (frame.empty())
{
std::cerr << "ERROR! blank frame grabbed\n";
continue;
}
int orig_width = frame.cols;
int orig_height = frame.rows;
auto timer = cv::getTickCount();
std::vector<float> input_tensor_values = engine.preprocessImage(frame);
std::vector<float> results = engine.runInference(input_tensor_values);
float confidence_threshold = 0.3;
std::vector<Detection> detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
double fps = cv::getTickFrequency() / ((double)cv::getTickCount() - timer);
cv::putText(frame, "FPS: " + std::to_string(fps), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8);
cv::Mat output = engine.draw_labels(frame, detections);
cv::imshow("test", output);
if (cv::waitKey(5) >= 0)
break;
}
return 0;
}