New threaded RTSP and AI image recognition.

This commit is contained in:
Your Name
2024-07-01 08:32:38 +03:00
parent 0c37aa6116
commit c03d477c45
27 changed files with 8140 additions and 0 deletions
+52
View File
@@ -0,0 +1,52 @@
#include <QDebug>
#include <opencv2/highgui.hpp>
#include "aiengine.h"
#include "aiengineinference.h"
#include "aiengineinferenceonnx.h"
AiEngine::AiEngine(QString modelPath, QObject *parent)
: QObject{parent}
{
mRtspListener = new AiEngineRtspListener(this);
connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
QThread *inferenceThread = new QThread(this);
mInference = new AiEngineInferenceOnnx(modelPath);
mInference->moveToThread(inferenceThread);
connect(mInference, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
connect(this, &AiEngine::inferenceFrame, mInference, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
inferenceThread->start();
}
void AiEngine::start(void)
{
mRtspListener->startListening();
}
void AiEngine::stop(void)
{
mRtspListener->stopListening();
}
void AiEngine::inferenceResultsReceivedSlot(AiEngineInferenceResult results)
{
(void)results;
qDebug() << "AiEngine got inference results in thread: " << QThread::currentThreadId();
cv::imshow("Received Frame", results.frame);
}
void AiEngine::frameReceivedSlot(cv::Mat frame)
{
//qDebug() << "AiEngine got frame from RTSP listener in thread: " << QThread::currentThreadId();
//cv::imshow("Received Frame", frame);
if (mInference->isActive() == false) {
qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
emit inferenceFrame(frame.clone());
}
}
+29
View File
@@ -0,0 +1,29 @@
#pragma once
#include <QObject>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
#include "aienginertsplistener.h"
#include "aiengineinference.h"
class AiEngine : public QObject
{
Q_OBJECT
public:
explicit AiEngine(QString modelPath, QObject *parent = nullptr);
void start(void);
void stop(void);
public slots:
void frameReceivedSlot(cv::Mat frame);
void inferenceResultsReceivedSlot(AiEngineInferenceResult results);
signals:
void inferenceFrame(cv::Mat frame);
private:
AiEngineRtspListener *mRtspListener;
AiEngineInference *mInference;
signals:
};
+13
View File
@@ -0,0 +1,13 @@
#include "aiengineinference.h"
AiEngineInference::AiEngineInference(QString modelPath, QObject *parent)
: QObject{parent},
mModelPath(modelPath),
mActive(false)
{}
bool AiEngineInference::isActive(void)
{
return mActive;
}
+31
View File
@@ -0,0 +1,31 @@
#pragma once
#include <QObject>
#include <QString>
#include <opencv2/core.hpp>
class AiEngineInferenceResult {
public:
cv::Mat frame;
int objects;
};
class AiEngineInference : public QObject
{
Q_OBJECT
public:
explicit AiEngineInference(QString modelPath, QObject *parent = nullptr);
bool isActive(void);
protected:
QString mModelPath;
bool mActive;
public slots:
virtual void performInferenceSlot(cv::Mat frame) = 0;
signals:
void resultsReady(AiEngineInferenceResult results);
};
+34
View File
@@ -0,0 +1,34 @@
#include <QThread>
#include <QDebug>
#include <thread>
#include <chrono>
#include "aiengineinferenceonnx.h"
AiEngineInferenceOnnx::AiEngineInferenceOnnx(QString modelPath, QObject *parent)
: AiEngineInference{modelPath, parent}
{
qDebug() << "TUOMAS test mModelPath=" << mModelPath;
mEngine = new InferenceEngine(modelPath.toStdString());
}
void AiEngineInferenceOnnx::performInferenceSlot(cv::Mat frame)
{
qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
mActive = true;
int orig_width = frame.cols;
int orig_height = frame.rows;
std::vector<float> input_tensor_values = mEngine->preprocessImage(frame);
std::vector<float> results = mEngine->runInference(input_tensor_values);
float confidence_threshold = 0.5;
std::vector<Detection> detections = mEngine->filterDetections(results, confidence_threshold, mEngine->input_shape[2], mEngine->input_shape[3], orig_width, orig_height);
AiEngineInferenceResult result;
result.frame = mEngine->draw_labels(frame.clone(), detections);
result.objects = 1;
emit resultsReady(result);
mActive = false;
}
+18
View File
@@ -0,0 +1,18 @@
#pragma once
#include <QObject>
#include "aiengineinference.h"
#include "pc/inference.h"
class AiEngineInferenceOnnx : public AiEngineInference
{
Q_OBJECT
public:
explicit AiEngineInferenceOnnx(QString modelPath, QObject *parent = nullptr);
public slots:
void performInferenceSlot(cv::Mat frame) override;
private:
InferenceEngine *mEngine;
};
+54
View File
@@ -0,0 +1,54 @@
#include <QDebug>
#include <QtConcurrent/QtConcurrent>
#include "aienginertsplistener.h"
#include "config.h"
AiEngineRtspListener::AiEngineRtspListener(QObject *parent)
: QObject{parent},
mIsListening(false)
{
}
AiEngineRtspListener::~AiEngineRtspListener()
{
stopListening();
}
void AiEngineRtspListener::startListening(void)
{
mIsListening = true;
(void)QtConcurrent::run([this](){
listenLoop();
});
}
void AiEngineRtspListener::stopListening()
{
mIsListening = false;
if (mCap.isOpened()) {
mCap.release();
}
}
void AiEngineRtspListener::listenLoop(void)
{
qDebug() << "AiEngineRtspListener loop running in thread: " << QThread::currentThreadId();
mCap.open(rtspVideoUrl.toStdString());
cv::Mat frame;
while (mIsListening) {
mCap >> frame;
if (frame.empty() == false) {
emit frameReceived(frame.clone());
}
}
}
+23
View File
@@ -0,0 +1,23 @@
#pragma once
#include <QThread>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
class AiEngineRtspListener : public QObject
{
Q_OBJECT
public:
explicit AiEngineRtspListener(QObject *parent = nullptr);
~AiEngineRtspListener();
void startListening(void);
void stopListening(void);
private:
void listenLoop(void);
bool mIsListening;
cv::VideoCapture mCap;
signals:
void frameReceived(cv::Mat frame);
};
+80
View File
@@ -0,0 +1,80 @@
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
+5
View File
@@ -0,0 +1,5 @@
#pragma once
#include <QString>
QString rtspVideoUrl = "rtsp://localhost:8554/live.stream";
+29
View File
@@ -0,0 +1,29 @@
#include <QCoreApplication>
#include <QDebug>
#include <QFile>
#include <opencv2/opencv.hpp>
#include "aiengine.h"
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
if (argc != 2) {
qDebug() << "\nUsage: " << argv[0] << " <model_file>";
return 1;
}
QString modelFile = argv[1];
if (QFile::exists(modelFile) == false) {
qDebug() << "\nUsage: " << modelFile << " <model_file>";
return 1;
}
cv::namedWindow("AiEngine", cv::WINDOW_AUTOSIZE);
AiEngine aiEngine(modelFile);
aiEngine.start();
return app.exec();
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 602 KiB

+42
View File
@@ -0,0 +1,42 @@
#ifndef _RKNN_MODEL_ZOO_COMMON_H_
#define _RKNN_MODEL_ZOO_COMMON_H_
/**
* @brief Image pixel format
*
*/
typedef enum {
IMAGE_FORMAT_GRAY8,
IMAGE_FORMAT_RGB888,
IMAGE_FORMAT_RGBA8888,
IMAGE_FORMAT_YUV420SP_NV21,
IMAGE_FORMAT_YUV420SP_NV12,
} image_format_t;
/**
* @brief Image buffer
*
*/
typedef struct {
int width;
int height;
int width_stride;
int height_stride;
image_format_t format;
unsigned char* virt_addr;
int size;
int fd;
} image_buffer_t;
/**
* @brief Image rectangle
*
*/
typedef struct {
int left;
int top;
int right;
int bottom;
} image_rect_t;
#endif //_RKNN_MODEL_ZOO_COMMON_H_
+129
View File
@@ -0,0 +1,129 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TEXT_LINE_LENGTH 1024
unsigned char* load_model(const char* filename, int* model_size)
{
FILE* fp = fopen(filename, "rb");
if (fp == NULL) {
printf("fopen %s fail!\n", filename);
return NULL;
}
fseek(fp, 0, SEEK_END);
int model_len = ftell(fp);
unsigned char* model = (unsigned char*)malloc(model_len);
fseek(fp, 0, SEEK_SET);
if (model_len != fread(model, 1, model_len, fp)) {
printf("fread %s fail!\n", filename);
free(model);
fclose(fp);
return NULL;
}
*model_size = model_len;
fclose(fp);
return model;
}
int read_data_from_file(const char *path, char **out_data)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
char *data = (char *)malloc(file_size+1);
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
fclose(fp);
return -1;
}
if(fp) {
fclose(fp);
}
*out_data = data;
return file_size;
}
int write_data_to_file(const char *path, const char *data, unsigned int size)
{
FILE *fp;
fp = fopen(path, "w");
if(fp == NULL) {
printf("open error: %s\n", path);
return -1;
}
fwrite(data, 1, size, fp);
fflush(fp);
fclose(fp);
return 0;
}
int count_lines(FILE* file)
{
int count = 0;
char ch;
while(!feof(file))
{
ch = fgetc(file);
if(ch == '\n')
{
count++;
}
}
count += 1;
rewind(file);
return count;
}
char** read_lines_from_file(const char* filename, int* line_count)
{
FILE* file = fopen(filename, "r");
if (file == NULL) {
printf("Failed to open the file.\n");
return NULL;
}
int num_lines = count_lines(file);
printf("num_lines=%d\n", num_lines);
char** lines = (char**)malloc(num_lines * sizeof(char*));
memset(lines, 0, num_lines * sizeof(char*));
char buffer[MAX_TEXT_LINE_LENGTH];
int line_index = 0;
while (fgets(buffer, sizeof(buffer), file) != NULL) {
buffer[strcspn(buffer, "\n")] = '\0'; // 移除换行符
lines[line_index] = (char*)malloc(strlen(buffer) + 1);
strcpy(lines[line_index], buffer);
line_index++;
}
fclose(file);
*line_count = num_lines;
return lines;
}
void free_lines(char** lines, int line_count)
{
for (int i = 0; i < line_count; i++) {
if (lines[i] != NULL) {
free(lines[i]);
}
}
free(lines);
}
+48
View File
@@ -0,0 +1,48 @@
#ifndef _RKNN_MODEL_ZOO_FILE_UTILS_H_
#define _RKNN_MODEL_ZOO_FILE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Read data from file
*
* @param path [in] File path
* @param out_data [out] Read data
* @return int -1: error; > 0: Read data size
*/
int read_data_from_file(const char *path, char **out_data);
/**
* @brief Write data to file
*
* @param path [in] File path
* @param data [in] Write data
* @param size [in] Write data size
* @return int 0: success; -1: error
*/
int write_data_to_file(const char *path, const char *data, unsigned int size);
/**
* @brief Read all lines from text file
*
* @param path [in] File path
* @param line_count [out] File line count
* @return char** String array of all lines, remeber call free_lines() to release after used
*/
char** read_lines_from_file(const char* path, int* line_count);
/**
* @brief Free lines string array
*
* @param lines [in] String array
* @param line_count [in] Line count
*/
void free_lines(char** lines, int line_count);
#ifdef __cplusplus
} // extern "C"
#endif
#endif //_RKNN_MODEL_ZOO_FILE_UTILS_H_
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+89
View File
@@ -0,0 +1,89 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#define _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
// Color Format ARGB8888
#define COLOR_GREEN 0xFF00FF00
#define COLOR_BLUE 0xFF0000FF
#define COLOR_RED 0xFFFF0000
#define COLOR_YELLOW 0xFFFFFF00
#define COLOR_ORANGE 0xFFFF4500
#define COLOR_BLACK 0xFF000000
#define COLOR_WHITE 0xFFFFFFFF
/**
* @brief Draw rectangle
*
* @param image [in] Image buffer
* @param rx [in] Rectangle top left x
* @param ry [in] Rectangle top left y
* @param rw [in] Rectangle width
* @param rh [in] Rectangle height
* @param color [in] Rectangle line color
* @param thickness [in] Rectangle line thickness
*/
void draw_rectangle(image_buffer_t* image, int rx, int ry, int rw, int rh, unsigned int color,
int thickness);
/**
* @brief Draw line
*
* @param image [in] Image buffer
* @param x0 [in] Line begin point x
* @param y0 [in] Line begin point y
* @param x1 [in] Line end point x
* @param y1 [in] Line end point y
* @param color [in] Line color
* @param thickness [in] Line thickness
*/
void draw_line(image_buffer_t* image, int x0, int y0, int x1, int y1, unsigned int color,
int thickness);
/**
* @brief Draw text (only support ASCII char)
*
* @param image [in] Image buffer
* @param text [in] Text
* @param x [in] Text position x
* @param y [in] Text position y
* @param color [in] Text color
* @param fontsize [in] Text fontsize
*/
void draw_text(image_buffer_t* image, const char* text, int x, int y, unsigned int color,
int fontsize);
/**
* @brief Draw circle
*
* @param image [in] Image buffer
* @param cx [in] Circle center x
* @param cy [in] Circle center y
* @param radius [in] Circle radius
* @param color [in] Circle color
* @param thickness [in] Circle thickness
*/
void draw_circle(image_buffer_t* image, int cx, int cy, int radius, unsigned int color,
int thickness);
/**
* @brief Draw image
*
* @param image [in] Target Image buffer
* @param draw_img [in] Image for drawing
* @param x [in] Target Image draw position x
* @param y [in] Target Image draw position y
* @param rw [in] Width of image for drawing
* @param rh [in] Height of image for drawing
*/
void draw_image(image_buffer_t* image, unsigned char* draw_img, int x, int y, int rw, int rh);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
+782
View File
@@ -0,0 +1,782 @@
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <math.h>
#include <sys/time.h>
#include "im2d.h"
#include "drmrga.h"
#define STB_IMAGE_IMPLEMENTATION
#define STBI_NO_THREAD_LOCALS
#define STBI_ONLY_JPEG
#define STBI_ONLY_PNG
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "turbojpeg.h"
#include "image_utils.h"
#include "file_utils.h"
static const char* filter_image_names[] = {
"jpg",
"jpeg",
"JPG",
"JPEG",
"png",
"PNG",
"data",
NULL
};
static const char* subsampName[TJ_NUMSAMP] = {"4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1"};
static const char* colorspaceName[TJ_NUMCS] = {"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"};
static int image_file_filter(const struct dirent *entry)
{
const char ** filter;
for (filter = filter_image_names; *filter; ++filter) {
if(strstr(entry->d_name, *filter) != NULL) {
return 1;
}
}
return 0;
}
static int read_image_jpeg(const char* path, image_buffer_t* image)
{
FILE* jpegFile = NULL;
unsigned long jpegSize;
int flags = 0;
int width, height;
int origin_width, origin_height;
unsigned char* imgBuf = NULL;
unsigned char* jpegBuf = NULL;
unsigned long size;
unsigned short orientation = 1;
struct timeval tv1, tv2;
if ((jpegFile = fopen(path, "rb")) == NULL) {
printf("open input file failure\n");
}
if (fseek(jpegFile, 0, SEEK_END) < 0 || (size = ftell(jpegFile)) < 0 || fseek(jpegFile, 0, SEEK_SET) < 0) {
printf("determining input file size failure\n");
}
if (size == 0) {
printf("determining input file size, Input file contains no data\n");
}
jpegSize = (unsigned long)size;
if ((jpegBuf = (unsigned char*)malloc(jpegSize * sizeof(unsigned char))) == NULL) {
printf("allocating JPEG buffer\n");
}
if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) {
printf("reading input file");
}
fclose(jpegFile);
jpegFile = NULL;
tjhandle handle = NULL;
int subsample, colorspace;
int padding = 1;
int ret = 0;
handle = tjInitDecompress();
ret = tjDecompressHeader3(handle, jpegBuf, size, &origin_width, &origin_height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
// 对图像做裁剪16对齐,利于后续rga操作
int crop_width = origin_width / 16 * 16;
int crop_height = origin_height / 16 * 16;
printf("origin size=%dx%d crop size=%dx%d\n", origin_width, origin_height, crop_width, crop_height);
// gettimeofday(&tv1, NULL);
ret = tjDecompressHeader3(handle, jpegBuf, size, &width, &height, &subsample, &colorspace);
if (ret < 0) {
printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
return -1;
}
printf("input image: %d x %d, subsampling: %s, colorspace: %s, orientation: %d\n",
width, height, subsampName[subsample], colorspaceName[colorspace], orientation);
int sw_out_size = width * height * 3;
unsigned char* sw_out_buf = image->virt_addr;
if (sw_out_buf == NULL) {
sw_out_buf = (unsigned char*)malloc(sw_out_size * sizeof(unsigned char));
}
if (sw_out_buf == NULL) {
printf("sw_out_buf is NULL\n");
goto out;
}
flags |= 0;
// 错误码为0时,表示警告,错误码为-1时表示错误
int pixelFormat = TJPF_RGB;
ret = tjDecompress2(handle, jpegBuf, size, sw_out_buf, width, 0, height, pixelFormat, flags);
// ret = tjDecompressToYUV2(handle, jpeg_buf, size, dst_buf, *width, padding, *height, flags);
if ((0 != tjGetErrorCode(handle)) && (ret < 0)) {
printf("error : decompress to yuv failed, errorStr:%s, errorCode:%d\n", tjGetErrorStr(),
tjGetErrorCode(handle));
goto out;
}
if ((0 == tjGetErrorCode(handle)) && (ret < 0)) {
printf("warning : errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
}
tjDestroy(handle);
// gettimeofday(&tv2, NULL);
// printf("decode time %ld ms\n", (tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000);
image->width = width;
image->height = height;
image->format = IMAGE_FORMAT_RGB888;
image->virt_addr = sw_out_buf;
image->size = sw_out_size;
out:
if (jpegBuf) {
free(jpegBuf);
}
return 0;
}
static int read_image_raw(const char* path, image_buffer_t* image)
{
FILE *fp = fopen(path, "rb");
if(fp == NULL) {
printf("fopen %s fail!\n", path);
return -1;
}
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
unsigned char *data = image->virt_addr;
if (image->virt_addr == NULL) {
data = (unsigned char *)malloc(file_size+1);
}
data[file_size] = 0;
fseek(fp, 0, SEEK_SET);
if(file_size != fread(data, 1, file_size, fp)) {
printf("fread %s fail!\n", path);
free(data);
return -1;
}
if(fp) {
fclose(fp);
}
if (image->virt_addr == NULL) {
image->virt_addr = data;
image->size = file_size;
}
return 0;
}
static int write_image_jpeg(const char* path, int quality, const image_buffer_t* image)
{
int ret;
int jpegSubsamp = TJSAMP_422;
unsigned char* jpegBuf = NULL;
unsigned long jpegSize = 0;
int flags = 0;
const unsigned char* data = image->virt_addr;
int width = image->width;
int height = image->height;
int pixelFormat = TJPF_RGB;
tjhandle handle = tjInitCompress();
if (image->format == IMAGE_FORMAT_RGB888) {
ret = tjCompress2(handle, data, width, 0, height, pixelFormat, &jpegBuf, &jpegSize, jpegSubsamp, quality, flags);
} else {
printf("write_image_jpeg: pixel format %d not support\n", image->format);
return -1;
}
// printf("ret=%d jpegBuf=%p jpegSize=%d\n", ret, jpegBuf, jpegSize);
if (jpegBuf != NULL && jpegSize > 0) {
write_data_to_file(path, (const char*)jpegBuf, jpegSize);
tjFree(jpegBuf);
}
tjDestroy(handle);
return 0;
}
static int read_image_stb(const char* path, image_buffer_t* image)
{
// 默认图像为3通道
int w, h, c;
unsigned char* pixeldata = stbi_load(path, &w, &h, &c, 0);
if (!pixeldata) {
printf("error: read image %s fail\n", path);
return -1;
}
// printf("load image wxhxc=%dx%dx%d path=%s\n", w, h, c, path);
int size = w * h * c;
// 设置图像数据
if (image->virt_addr != NULL) {
memcpy(image->virt_addr, pixeldata, size);
stbi_image_free(pixeldata);
} else {
image->virt_addr = pixeldata;
}
image->width = w;
image->height = h;
if (c == 4) {
image->format = IMAGE_FORMAT_RGBA8888;
} else if (c == 1) {
image->format = IMAGE_FORMAT_GRAY8;
} else {
image->format = IMAGE_FORMAT_RGB888;
}
return 0;
}
int read_image(const char* path, image_buffer_t* image)
{
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -666;
}
if (strcmp(_ext, ".data") == 0) {
return read_image_raw(path, image);
} else if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
return read_image_jpeg(path, image);
} else {
return read_image_stb(path, image);
}
}
int write_image(const char* path, const image_buffer_t* img)
{
int ret;
int width = img->width;
int height = img->height;
int channel = 3;
void* data = img->virt_addr;
printf("write_image path: %s width=%d height=%d channel=%d data=%p\n",
path, width, height, channel, data);
const char* _ext = strrchr(path, '.');
if (!_ext) {
// missing extension
return -1;
}
if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
strcmp(_ext, ".JPEG") == 0) {
int quality = 95;
ret = write_image_jpeg(path, quality, img);
} else if (strcmp(_ext, ".png") == 0 | strcmp(_ext, ".PNG") == 0) {
ret = stbi_write_png(path, width, height, channel, data, 0);
} else if (strcmp(_ext, ".data") == 0 | strcmp(_ext, ".DATA") == 0) {
int size = get_image_size(img);
ret = write_data_to_file(path, data, size);
} else {
// unknown extension type
return -1;
}
return ret;
}
static int crop_and_scale_image_c(int channel, unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
if (dst == NULL) {
printf("dst buffer is null\n");
return -1;
}
float x_ratio = (float)crop_width / (float)dst_box_width;
float y_ratio = (float)crop_height / (float)dst_box_height;
// printf("src_width=%d src_height=%d crop_x=%d crop_y=%d crop_width=%d crop_height=%d\n",
// src_width, src_height, crop_x, crop_y, crop_width, crop_height);
// printf("dst_width=%d dst_height=%d dst_box_x=%d dst_box_y=%d dst_box_width=%d dst_box_height=%d\n",
// dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
// printf("channel=%d x_ratio=%f y_ratio=%f\n", channel, x_ratio, y_ratio);
// 从原图指定区域取数据,双线性缩放到目标指定区域
for (int dst_y = dst_box_y; dst_y < dst_box_y + dst_box_height; dst_y++) {
for (int dst_x = dst_box_x; dst_x < dst_box_x + dst_box_width; dst_x++) {
int dst_x_offset = dst_x - dst_box_x;
int dst_y_offset = dst_y - dst_box_y;
int src_x = (int)(dst_x_offset * x_ratio) + crop_x;
int src_y = (int)(dst_y_offset * y_ratio) + crop_y;
float x_diff = (dst_x_offset * x_ratio) - (src_x - crop_x);
float y_diff = (dst_y_offset * y_ratio) - (src_y - crop_y);
int index1 = src_y * src_width * channel + src_x * channel;
int index2 = index1 + src_width * channel; // down
if (src_y == src_height - 1) {
// 如果到图像最下边缘,变成选择上面的像素
index2 = index1 - src_width * channel;
}
int index3 = index1 + 1 * channel; // right
int index4 = index2 + 1 * channel; // down right
if (src_x == src_width - 1) {
// 如果到图像最右边缘,变成选择左边的像素
index3 = index1 - 1 * channel;
index4 = index2 - 1 * channel;
}
// printf("dst_x=%d dst_y=%d dst_x_offset=%d dst_y_offset=%d src_x=%d src_y=%d x_diff=%f y_diff=%f src index=%d %d %d %d\n",
// dst_x, dst_y, dst_x_offset, dst_y_offset,
// src_x, src_y, x_diff, y_diff,
// index1, index2, index3, index4);
for (int c = 0; c < channel; c++) {
unsigned char A = src[index1+c];
unsigned char B = src[index3+c];
unsigned char C = src[index2+c];
unsigned char D = src[index4+c];
unsigned char pixel = (unsigned char)(
A * (1 - x_diff) * (1 - y_diff) +
B * x_diff * (1 - y_diff) +
C * y_diff * (1 - x_diff) +
D * x_diff * y_diff
);
dst[(dst_y * dst_width + dst_x) * channel + c] = pixel;
}
}
}
return 0;
}
static int crop_and_scale_image_yuv420sp(unsigned char *src, int src_width, int src_height,
int crop_x, int crop_y, int crop_width, int crop_height,
unsigned char *dst, int dst_width, int dst_height,
int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
unsigned char* src_y = src;
unsigned char* src_uv = src + src_width * src_height;
unsigned char* dst_y = dst;
unsigned char* dst_uv = dst + dst_width * dst_height;
crop_and_scale_image_c(1, src_y, src_width, src_height, crop_x, crop_y, crop_width, crop_height,
dst_y, dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
crop_and_scale_image_c(2, src_uv, src_width / 2, src_height / 2, crop_x / 2, crop_y / 2, crop_width / 2, crop_height / 2,
dst_uv, dst_width / 2, dst_height / 2, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
return 0;
}
static int convert_image_cpu(image_buffer_t *src, image_buffer_t *dst, image_rect_t *src_box, image_rect_t *dst_box, char color) {
int ret;
if (dst->virt_addr == NULL) {
return -1;
}
if (src->virt_addr == NULL) {
return -1;
}
if (src->format != dst->format) {
return -1;
}
int src_box_x = 0;
int src_box_y = 0;
int src_box_w = src->width;
int src_box_h = src->height;
if (src_box != NULL) {
src_box_x = src_box->left;
src_box_y = src_box->top;
src_box_w = src_box->right - src_box->left + 1;
src_box_h = src_box->bottom - src_box->top + 1;
}
int dst_box_x = 0;
int dst_box_y = 0;
int dst_box_w = dst->width;
int dst_box_h = dst->height;
if (dst_box != NULL) {
dst_box_x = dst_box->left;
dst_box_y = dst_box->top;
dst_box_w = dst_box->right - dst_box->left + 1;
dst_box_h = dst_box->bottom - dst_box->top + 1;
}
// fill pad color
if (dst_box_w != dst->width || dst_box_h != dst->height) {
int dst_size = get_image_size(dst);
memset(dst->virt_addr, color, dst_size);
}
int need_release_dst_buffer = 0;
int reti = 0;
if (src->format == IMAGE_FORMAT_RGB888) {
reti = crop_and_scale_image_c(3, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_RGBA8888) {
reti = crop_and_scale_image_c(4, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_GRAY8) {
reti = crop_and_scale_image_c(1, src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else if (src->format == IMAGE_FORMAT_YUV420SP_NV12 || src->format == IMAGE_FORMAT_YUV420SP_NV12) {
reti = crop_and_scale_image_yuv420sp(src->virt_addr, src->width, src->height,
src_box_x, src_box_y, src_box_w, src_box_h,
dst->virt_addr, dst->width, dst->height,
dst_box_x, dst_box_y, dst_box_w, dst_box_h);
} else {
printf("no support format %d\n", src->format);
}
if (reti != 0) {
printf("convert_image_cpu fail %d\n", reti);
return -1;
}
printf("finish\n");
return 0;
}
static int get_rga_fmt(image_format_t fmt) {
switch (fmt)
{
case IMAGE_FORMAT_RGB888:
return RK_FORMAT_RGB_888;
case IMAGE_FORMAT_RGBA8888:
return RK_FORMAT_RGBA_8888;
case IMAGE_FORMAT_YUV420SP_NV12:
return RK_FORMAT_YCbCr_420_SP;
case IMAGE_FORMAT_YUV420SP_NV21:
return RK_FORMAT_YCrCb_420_SP;
default:
return -1;
}
}
int get_image_size(image_buffer_t* image)
{
if (image == NULL) {
return 0;
}
switch (image->format)
{
case IMAGE_FORMAT_GRAY8:
return image->width * image->height;
case IMAGE_FORMAT_RGB888:
return image->width * image->height * 3;
case IMAGE_FORMAT_RGBA8888:
return image->width * image->height * 4;
case IMAGE_FORMAT_YUV420SP_NV12:
case IMAGE_FORMAT_YUV420SP_NV21:
return image->width * image->height * 3 / 2;
default:
break;
}
}
static int convert_image_rga(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret = 0;
int srcWidth = src_img->width;
int srcHeight = src_img->height;
void *src = src_img->virt_addr;
int src_fd = src_img->fd;
void *src_phy = NULL;
int srcFmt = get_rga_fmt(src_img->format);
int dstWidth = dst_img->width;
int dstHeight = dst_img->height;
void *dst = dst_img->virt_addr;
int dst_fd = dst_img->fd;
void *dst_phy = NULL;
int dstFmt = get_rga_fmt(dst_img->format);
int rotate = 0;
int use_handle = 0;
#if defined(LIBRGA_IM2D_HANDLE)
use_handle = 1;
#endif
// printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// srcWidth, srcHeight, srcFmt, src, src_fd);
// printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
// dstWidth, dstHeight, dstFmt, dst, dst_fd);
// printf("rotate=%d\n", rotate);
int usage = 0;
IM_STATUS ret_rga = IM_STATUS_NOERROR;
// set rga usage
usage |= rotate;
// set rga rect
im_rect srect;
im_rect drect;
im_rect prect;
memset(&prect, 0, sizeof(im_rect));
if (src_box != NULL) {
srect.x = src_box->left;
srect.y = src_box->top;
srect.width = src_box->right - src_box->left + 1;
srect.height = src_box->bottom - src_box->top + 1;
} else {
srect.x = 0;
srect.y = 0;
srect.width = srcWidth;
srect.height = srcHeight;
}
if (dst_box != NULL) {
drect.x = dst_box->left;
drect.y = dst_box->top;
drect.width = dst_box->right - dst_box->left + 1;
drect.height = dst_box->bottom - dst_box->top + 1;
} else {
drect.x = 0;
drect.y = 0;
drect.width = dstWidth;
drect.height = dstHeight;
}
// set rga buffer
rga_buffer_t rga_buf_src;
rga_buffer_t rga_buf_dst;
rga_buffer_t pat;
rga_buffer_handle_t rga_handle_src = 0;
rga_buffer_handle_t rga_handle_dst = 0;
memset(&pat, 0, sizeof(rga_buffer_t));
im_handle_param_t in_param;
in_param.width = srcWidth;
in_param.height = srcHeight;
in_param.format = srcFmt;
im_handle_param_t dst_param;
dst_param.width = dstWidth;
dst_param.height = dstHeight;
dst_param.format = dstFmt;
if (use_handle) {
if (src_phy != NULL) {
rga_handle_src = importbuffer_physicaladdr((uint64_t)src_phy, &in_param);
} else if (src_fd > 0) {
rga_handle_src = importbuffer_fd(src_fd, &in_param);
} else {
rga_handle_src = importbuffer_virtualaddr(src, &in_param);
}
if (rga_handle_src <= 0) {
printf("src handle error %d\n", rga_handle_src);
ret = -1;
goto err;
}
rga_buf_src = wrapbuffer_handle(rga_handle_src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
if (src_phy != NULL) {
rga_buf_src = wrapbuffer_physicaladdr(src_phy, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else if (src_fd > 0) {
rga_buf_src = wrapbuffer_fd(src_fd, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
} else {
rga_buf_src = wrapbuffer_virtualaddr(src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
}
}
if (use_handle) {
if (dst_phy != NULL) {
rga_handle_dst = importbuffer_physicaladdr((uint64_t)dst_phy, &dst_param);
} else if (dst_fd > 0) {
rga_handle_dst = importbuffer_fd(dst_fd, &dst_param);
} else {
rga_handle_dst = importbuffer_virtualaddr(dst, &dst_param);
}
if (rga_handle_dst <= 0) {
printf("dst handle error %d\n", rga_handle_dst);
ret = -1;
goto err;
}
rga_buf_dst = wrapbuffer_handle(rga_handle_dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
if (dst_phy != NULL) {
rga_buf_dst = wrapbuffer_physicaladdr(dst_phy, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else if (dst_fd > 0) {
rga_buf_dst = wrapbuffer_fd(dst_fd, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
} else {
rga_buf_dst = wrapbuffer_virtualaddr(dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
}
}
if (drect.width != dstWidth || drect.height != dstHeight) {
im_rect dst_whole_rect = {0, 0, dstWidth, dstHeight};
int imcolor;
char* p_imcolor = &imcolor;
p_imcolor[0] = color;
p_imcolor[1] = color;
p_imcolor[2] = color;
p_imcolor[3] = color;
printf("fill dst image (x y w h)=(%d %d %d %d) with color=0x%x\n",
dst_whole_rect.x, dst_whole_rect.y, dst_whole_rect.width, dst_whole_rect.height, imcolor);
ret_rga = imfill(rga_buf_dst, dst_whole_rect, imcolor);
if (ret_rga <= 0) {
if (dst != NULL) {
size_t dst_size = get_image_size(dst_img);
memset(dst, color, dst_size);
} else {
printf("Warning: Can not fill color on target image\n");
}
}
}
// rga process
ret_rga = improcess(rga_buf_src, rga_buf_dst, pat, srect, drect, prect, usage);
if (ret_rga <= 0) {
printf("Error on improcess STATUS=%d\n", ret_rga);
printf("RGA error message: %s\n", imStrError((IM_STATUS)ret_rga));
ret = -1;
}
err:
if (rga_handle_src > 0) {
releasebuffer_handle(rga_handle_src);
}
if (rga_handle_dst > 0) {
releasebuffer_handle(rga_handle_dst);
}
// printf("finish\n");
return ret;
}
int convert_image(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
{
int ret;
printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
src_img->width, src_img->height, src_img->format, src_img->virt_addr, src_img->fd);
printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
dst_img->width, dst_img->height, dst_img->format, dst_img->virt_addr, dst_img->fd);
if (src_box != NULL) {
printf("src_box=(%d %d %d %d)\n", src_box->left, src_box->top, src_box->right, src_box->bottom);
}
if (dst_box != NULL) {
printf("dst_box=(%d %d %d %d)\n", dst_box->left, dst_box->top, dst_box->right, dst_box->bottom);
}
printf("color=0x%x\n", color);
ret = convert_image_rga(src_img, dst_img, src_box, dst_box, color);
if (ret != 0) {
printf("try convert image use cpu\n");
ret = convert_image_cpu(src_img, dst_img, src_box, dst_box, color);
}
return ret;
}
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color)
{
int ret = 0;
int allow_slight_change = 1;
int src_w = src_image->width;
int src_h = src_image->height;
int dst_w = dst_image->width;
int dst_h = dst_image->height;
int resize_w = dst_w;
int resize_h = dst_h;
int padding_w = 0;
int padding_h = 0;
int _left_offset = 0;
int _top_offset = 0;
float scale = 1.0;
image_rect_t src_box;
src_box.left = 0;
src_box.top = 0;
src_box.right = src_image->width - 1;
src_box.bottom = src_image->height - 1;
image_rect_t dst_box;
dst_box.left = 0;
dst_box.top = 0;
dst_box.right = dst_image->width - 1;
dst_box.bottom = dst_image->height - 1;
float _scale_w = (float)dst_w / src_w;
float _scale_h = (float)dst_h / src_h;
if(_scale_w < _scale_h) {
scale = _scale_w;
resize_h = (int) src_h*scale;
} else {
scale = _scale_h;
resize_w = (int) src_w*scale;
}
// slight change image size for align
if (allow_slight_change == 1 && (resize_w % 4 != 0)) {
resize_w -= resize_w % 4;
}
if (allow_slight_change == 1 && (resize_h % 2 != 0)) {
resize_h -= resize_h % 2;
}
// padding
padding_h = dst_h - resize_h;
padding_w = dst_w - resize_w;
// center
if (_scale_w < _scale_h) {
dst_box.top = padding_h / 2;
if (dst_box.top % 2 != 0) {
dst_box.top -= dst_box.top % 2;
if (dst_box.top < 0) {
dst_box.top = 0;
}
}
dst_box.bottom = dst_box.top + resize_h - 1;
_top_offset = dst_box.top;
} else {
dst_box.left = padding_w / 2;
if (dst_box.left % 2 != 0) {
dst_box.left -= dst_box.left % 2;
if (dst_box.left < 0) {
dst_box.left = 0;
}
}
dst_box.right = dst_box.left + resize_w - 1;
_left_offset = dst_box.left;
}
printf("scale=%f dst_box=(%d %d %d %d) allow_slight_change=%d _left_offset=%d _top_offset=%d padding_w=%d padding_h=%d\n",
scale, dst_box.left, dst_box.top, dst_box.right, dst_box.bottom, allow_slight_change,
_left_offset, _top_offset, padding_w, padding_h);
//set offset and scale
if(letterbox != NULL){
letterbox->scale = scale;
letterbox->x_pad = _left_offset;
letterbox->y_pad = _top_offset;
}
// alloc memory buffer for dst image,
// remember to free
if (dst_image->virt_addr == NULL && dst_image->fd <= 0) {
int dst_size = get_image_size(dst_image);
dst_image->virt_addr = (uint8_t *)malloc(dst_size);
if (dst_image->virt_addr == NULL) {
printf("malloc size %d error\n", dst_size);
return -1;
}
}
ret = convert_image(src_image, dst_image, &src_box, &dst_box, color);
return ret;
}
+73
View File
@@ -0,0 +1,73 @@
#ifndef _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#define _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
/**
* @brief LetterBox
*
*/
typedef struct {
int x_pad;
int y_pad;
float scale;
} letterbox_t;
/**
* @brief Read image file (support png/jpeg/bmp)
*
* @param path [in] Image path
* @param image [out] Read image
* @return int 0: success; -1: error
*/
int read_image(const char* path, image_buffer_t* image);
/**
* @brief Write image file (support jpg/png)
*
* @param path [in] Image path
* @param image [in] Image for write (only support IMAGE_FORMAT_RGB888)
* @return int 0: success; -1: error
*/
int write_image(const char* path, const image_buffer_t* image);
/**
* @brief Convert image for resize and pixel format change
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param src_box [in] Crop rectangle on source image
* @param dst_box [in] Crop rectangle on target image
* @param color [in] Pading color if dst_box can not fill target image
* @return int
*/
int convert_image(image_buffer_t* src_image, image_buffer_t* dst_image, image_rect_t* src_box, image_rect_t* dst_box, char color);
/**
* @brief Convert image with letterbox
*
* @param src_image [in] Source Image
* @param dst_image [out] Target Image
* @param letterbox [out] Letterbox
* @param color [in] Fill color on target image
* @return int
*/
int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color);
/**
* @brief Get the image size
*
* @param image [in] Image
* @return int image size
*/
int get_image_size(image_buffer_t* image);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
+494
View File
@@ -0,0 +1,494 @@
// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "yolov8.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <set>
#include <vector>
#define LABEL_NALE_TXT_PATH "./model/coco_80_labels_list.txt"
static char *labels[OBJ_CLASS_NUM];
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
static char *readLine(FILE *fp, char *buffer, int *len)
{
int ch;
int i = 0;
size_t buff_len = 0;
buffer = (char *)malloc(buff_len + 1);
if (!buffer)
return NULL; // Out of memory
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
{
buff_len++;
void *tmp = realloc(buffer, buff_len + 1);
if (tmp == NULL)
{
free(buffer);
return NULL; // Out of memory
}
buffer = (char *)tmp;
buffer[i] = (char)ch;
i++;
}
buffer[i] = '\0';
*len = buff_len;
// Detect end
if (ch == EOF && (i == 0 || ferror(fp)))
{
free(buffer);
return NULL;
}
return buffer;
}
static int readLines(const char *fileName, char *lines[], int max_line)
{
FILE *file = fopen(fileName, "r");
char *s;
int i = 0;
int n = 0;
if (file == NULL)
{
printf("Open %s fail!\n", fileName);
return -1;
}
while ((s = readLine(file, s, &n)) != NULL)
{
lines[i++] = s;
if (i >= max_line)
break;
}
fclose(file);
return i;
}
static int loadLabelName(const char *locationFilename, char *label[])
{
printf("load lable %s\n", locationFilename);
readLines(locationFilename, label, OBJ_CLASS_NUM);
return 0;
}
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
float ymax1)
{
float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
float i = w * h;
float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
return u <= 0.f ? 0.f : (i / u);
}
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
int filterId, float threshold)
{
for (int i = 0; i < validCount; ++i)
{
if (order[i] == -1 || classIds[i] != filterId)
{
continue;
}
int n = order[i];
for (int j = i + 1; j < validCount; ++j)
{
int m = order[j];
if (m == -1 || classIds[i] != filterId)
{
continue;
}
float xmin0 = outputLocations[n * 4 + 0];
float ymin0 = outputLocations[n * 4 + 1];
float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
float xmin1 = outputLocations[m * 4 + 0];
float ymin1 = outputLocations[m * 4 + 1];
float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
if (iou > threshold)
{
order[j] = -1;
}
}
}
return 0;
}
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
{
float key;
int key_index;
int low = left;
int high = right;
if (left < right)
{
key_index = indices[left];
key = input[left];
while (low < high)
{
while (low < high && input[high] <= key)
{
high--;
}
input[low] = input[high];
indices[low] = indices[high];
while (low < high && input[low] >= key)
{
low++;
}
input[high] = input[low];
indices[high] = indices[low];
}
input[low] = key;
indices[low] = key_index;
quick_sort_indice_inverse(input, left, low - 1, indices);
quick_sort_indice_inverse(input, low + 1, right, indices);
}
return low;
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
inline static int32_t __clip(float val, float min, float max)
{
float f = val <= min ? min : (val >= max ? max : val);
return f;
}
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
{
float dst_val = (f32 / scale) + zp;
int8_t res = (int8_t)__clip(dst_val, -128, 127);
return res;
}
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
void compute_dfl(float* tensor, int dfl_len, float* box){
for (int b=0; b<4; b++){
float exp_t[dfl_len];
float exp_sum=0;
float acc_sum=0;
for (int i=0; i< dfl_len; i++){
exp_t[i] = exp(tensor[i+b*dfl_len]);
exp_sum += exp_t[i];
}
for (int i=0; i< dfl_len; i++){
acc_sum += exp_t[i]/exp_sum *i;
}
box[b] = acc_sum;
}
}
static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
int8_t *score_tensor, int32_t score_zp, float score_scale,
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < score_sum_thres_i8){
continue;
}
}
int8_t max_score = -score_zp;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> score_thres_i8){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes,
std::vector<float> &objProbs,
std::vector<int> &classId,
float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
int max_class_id = -1;
// 通过 score sum 起到快速过滤的作用
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < threshold){
continue;
}
}
float max_score = 0;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
// compute box
if (max_score> threshold){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = box_tensor[offset];
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1,y1,x2,y2,w,h;
x1 = (-box[0] + j + 0.5)*stride;
y1 = (-box[1] + i + 0.5)*stride;
x2 = (box[2] + j + 0.5)*stride;
y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1);
boxes.push_back(y1);
boxes.push_back(w);
boxes.push_back(h);
objProbs.push_back(max_score);
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
{
std::vector<float> filterBoxes;
std::vector<float> objProbs;
std::vector<int> classId;
int validCount = 0;
int stride = 0;
int grid_h = 0;
int grid_w = 0;
int model_in_w = app_ctx->model_width;
int model_in_h = app_ctx->model_height;
memset(od_results, 0, sizeof(object_detect_result_list));
// default 3 branch
int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
int output_per_branch = app_ctx->io_num.n_output / 3;
for (int i = 0; i < 3; i++)
{
void *score_sum = nullptr;
int32_t score_sum_zp = 0;
float score_sum_scale = 1.0;
if (output_per_branch == 3){
score_sum = outputs[i*output_per_branch + 2].buf;
score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
}
int box_idx = i*output_per_branch;
int score_idx = i*output_per_branch + 1;
grid_h = app_ctx->output_attrs[box_idx].dims[2];
grid_w = app_ctx->output_attrs[box_idx].dims[3];
stride = model_in_h / grid_h;
if (app_ctx->is_quant)
{
validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
(int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
else
{
validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
}
// no object detect
if (validCount <= 0)
{
return 0;
}
std::vector<int> indexArray;
for (int i = 0; i < validCount; ++i)
{
indexArray.push_back(i);
}
quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
std::set<int> class_set(std::begin(classId), std::end(classId));
for (auto c : class_set)
{
nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
}
int last_count = 0;
od_results->count = 0;
/* box valid detect target */
for (int i = 0; i < validCount; ++i)
{
if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE)
{
continue;
}
int n = indexArray[i];
float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
float x2 = x1 + filterBoxes[n * 4 + 2];
float y2 = y1 + filterBoxes[n * 4 + 3];
int id = classId[n];
float obj_conf = objProbs[i];
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].prop = obj_conf;
od_results->results[last_count].cls_id = id;
last_count++;
}
od_results->count = last_count;
return 0;
}
int init_post_process()
{
int ret = 0;
ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
if (ret < 0)
{
printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
return -1;
}
return 0;
}
char *coco_cls_to_name(int cls_id)
{
if (cls_id >= OBJ_CLASS_NUM)
{
return "null";
}
if (labels[cls_id])
{
return labels[cls_id];
}
return "null";
}
void deinit_post_process()
{
for (int i = 0; i < OBJ_CLASS_NUM; i++)
{
if (labels[i] != nullptr)
{
free(labels[i]);
labels[i] = nullptr;
}
}
}
+36
View File
@@ -0,0 +1,36 @@
#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#include <stdint.h>
#include <vector>
#include "rknn_api.h"
#include "common.h"
#include "image_utils.h"
#define OBJ_NAME_MAX_SIZE 64
#define OBJ_NUMB_MAX_SIZE 128
#define OBJ_CLASS_NUM 80
#define NMS_THRESH 0.45
#define BOX_THRESH 0.25
// class rknn_app_context_t;
typedef struct {
image_rect_t box;
float prop;
int cls_id;
} object_detect_result;
typedef struct {
int id;
int count;
object_detect_result results[OBJ_NUMB_MAX_SIZE];
} object_detect_result_list;
int init_post_process();
void deinit_post_process();
char *coco_cls_to_name(int cls_id);
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results);
void deinitPostProcess();
#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
+250
View File
@@ -0,0 +1,250 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "yolov8.h"
#include "common.h"
#include "file_utils.h"
#include "image_utils.h"
static void dump_tensor_attr(rknn_tensor_attr *attr)
{
printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
"zp=%d, scale=%f\n",
attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
}
int init_yolov8_model(const char *model_path, rknn_app_context_t *app_ctx)
{
int ret;
int model_len = 0;
char *model;
rknn_context ctx = 0;
// Load RKNN Model
model_len = read_data_from_file(model_path, &model);
if (model == NULL)
{
printf("load_model fail!\n");
return -1;
}
ret = rknn_init(&ctx, model, model_len, 0, NULL);
free(model);
if (ret < 0)
{
printf("rknn_init fail! ret=%d\n", ret);
return -1;
}
// Get Model Input Output Number
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
// Get Model Input Info
printf("input tensors:\n");
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs[i]));
}
// Get Model Output Info
printf("output tensors:\n");
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC)
{
printf("rknn_query fail! ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(output_attrs[i]));
}
// Set to context
app_ctx->rknn_ctx = ctx;
// TODO
if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type == RKNN_TENSOR_INT8)
{
app_ctx->is_quant = true;
}
else
{
app_ctx->is_quant = false;
}
app_ctx->io_num = io_num;
app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
app_ctx->model_channel = input_attrs[0].dims[1];
app_ctx->model_height = input_attrs[0].dims[2];
app_ctx->model_width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
app_ctx->model_height = input_attrs[0].dims[1];
app_ctx->model_width = input_attrs[0].dims[2];
app_ctx->model_channel = input_attrs[0].dims[3];
}
printf("model input height=%d, width=%d, channel=%d\n",
app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
return 0;
}
int release_yolov8_model(rknn_app_context_t *app_ctx)
{
if (app_ctx->rknn_ctx != 0)
{
rknn_destroy(app_ctx->rknn_ctx);
app_ctx->rknn_ctx = 0;
}
if (app_ctx->input_attrs != NULL)
{
free(app_ctx->input_attrs);
app_ctx->input_attrs = NULL;
}
if (app_ctx->output_attrs != NULL)
{
free(app_ctx->output_attrs);
app_ctx->output_attrs = NULL;
}
return 0;
}
int inference_yolov8_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results)
{
int ret;
image_buffer_t dst_img;
letterbox_t letter_box;
rknn_input inputs[app_ctx->io_num.n_input];
rknn_output outputs[app_ctx->io_num.n_output];
const float nms_threshold = NMS_THRESH; // 默认的NMS阈值
const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
int bg_color = 114;
if ((!app_ctx) || !(img) || (!od_results))
{
return -1;
}
memset(od_results, 0x00, sizeof(*od_results));
memset(&letter_box, 0, sizeof(letterbox_t));
memset(&dst_img, 0, sizeof(image_buffer_t));
memset(inputs, 0, sizeof(inputs));
memset(outputs, 0, sizeof(outputs));
// Pre Process
dst_img.width = app_ctx->model_width;
dst_img.height = app_ctx->model_height;
dst_img.format = IMAGE_FORMAT_RGB888;
dst_img.size = get_image_size(&dst_img);
dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
if (dst_img.virt_addr == NULL)
{
printf("malloc buffer size:%d fail!\n", dst_img.size);
return -1;
}
// letterbox
ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
if (ret < 0)
{
printf("convert_image_with_letterbox fail! ret=%d\n", ret);
return -1;
}
// Set Input Data
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
inputs[0].buf = dst_img.virt_addr;
ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
if (ret < 0)
{
printf("rknn_input_set fail! ret=%d\n", ret);
return -1;
}
// Run
printf("rknn_run\n");
ret = rknn_run(app_ctx->rknn_ctx, nullptr);
if (ret < 0)
{
printf("rknn_run fail! ret=%d\n", ret);
return -1;
}
// Get Output
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < app_ctx->io_num.n_output; i++)
{
outputs[i].index = i;
outputs[i].want_float = (!app_ctx->is_quant);
}
ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
if (ret < 0)
{
printf("rknn_outputs_get fail! ret=%d\n", ret);
goto out;
}
// Post Process
post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
// Remeber to release rknn output
rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
out:
if (dst_img.virt_addr != NULL)
{
free(dst_img.virt_addr);
}
return ret;
}
+42
View File
@@ -0,0 +1,42 @@
// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _RKNN_DEMO_YOLOV8_H_
#define _RKNN_DEMO_YOLOV8_H_
#include "rknn_api.h"
#include "common.h"
typedef struct {
rknn_context rknn_ctx;
rknn_input_output_num io_num;
rknn_tensor_attr* input_attrs;
rknn_tensor_attr* output_attrs;
int model_channel;
int model_width;
int model_height;
bool is_quant;
} rknn_app_context_t;
#include "postprocess.h"
int init_yolov8_model(const char* model_path, rknn_app_context_t* app_ctx);
int release_yolov8_model(rknn_app_context_t* app_ctx);
int inference_yolov8_model(rknn_app_context_t* app_ctx, image_buffer_t* img, object_detect_result_list* od_results);
#endif //_RKNN_DEMO_YOLOV8_H_
+41
View File
@@ -0,0 +1,41 @@
QT += core
CONFIG += c++11 link_pkgconfig concurrent
PKGCONFIG += opencv4
SOURCES += main.cpp \
aiengine.cpp \
aiengineinference.cpp \
aiengineinferenceonnx.cpp \
aienginertsplistener.cpp
opi5 {
message("OPI5 build")
PKGCONFIG += opencv4 librga stb libturbojpeg
INCLUDEPATH += /usr/include/rga # not correct in pkg-config file
QMAKE_CXXFLAGS += -DOPI5_BUILD
LIBS += /usr/local/lib/librknnrt.so
OBJECTS_DIR = objs-opi5
SOURCES += \
./opi5/image_drawing.c \
./opi5/file_utils.c \
./opi5/postprocess.cc \
./opi5/image_utils.c \
./opi5/yolov8.cc
}
else {
message("PC build")
message("You must use YOLOv10 ONNX files")
SOURCES += pc/inference.cpp
QMAKE_CXXFLAGS += -DPC_BUILD
INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
OBJECTS_DIR = objs-pc
QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
}
HEADERS += \
aiengine.h \
aiengineinference.h \
aiengineinferenceonnx.h \
aienginertsplistener.h \
config.h
+200
View File
@@ -0,0 +1,200 @@
#include "inference.h"
#include <algorithm>
#include <iostream>
const std::vector<std::string> InferenceEngine::CLASS_NAMES = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"};
InferenceEngine::InferenceEngine(const std::string &model_path)
: env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"),
session_options(),
session(env, model_path.c_str(), session_options),
input_shape{1, 3, 640, 640}
{
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
}
InferenceEngine::~InferenceEngine() {}
/*
* Function to preprocess the image
*
* @param image_path: path to the image
* @param orig_width: original width of the image
* @param orig_height: original height of the image
*
* @return: vector of floats representing the preprocessed image
*/
std::vector<float> InferenceEngine::preprocessImage(const cv::Mat &image)
{
if (image.empty())
{
throw std::runtime_error("Could not read the image");
}
cv::Mat resized_image;
cv::resize(image, resized_image, cv::Size(input_shape[2], input_shape[3]));
resized_image.convertTo(resized_image, CV_32F, 1.0 / 255);
std::vector<cv::Mat> channels(3);
cv::split(resized_image, channels);
std::vector<float> input_tensor_values;
for (int c = 0; c < 3; ++c)
{
input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]);
}
return input_tensor_values;
}
/*
* Function to filter the detections based on the confidence threshold
*
* @param results: vector of floats representing the output tensor
* @param confidence_threshold: minimum confidence threshold
* @param img_width: width of the input image
* @param img_height: height of the input image
* @param orig_width: original width of the image
* @param orig_height: original height of the image
*
* @return: vector of Detection objects
*/
std::vector<Detection> InferenceEngine::filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height)
{
std::vector<Detection> detections;
const int num_detections = results.size() / 6;
for (int i = 0; i < num_detections; ++i)
{
float left = results[i * 6 + 0];
float top = results[i * 6 + 1];
float right = results[i * 6 + 2];
float bottom = results[i * 6 + 3];
float confidence = results[i * 6 + 4];
int class_id = results[i * 6 + 5];
if (confidence >= confidence_threshold)
{
int x = static_cast<int>(left * orig_width / img_width);
int y = static_cast<int>(top * orig_height / img_height);
int width = static_cast<int>((right - left) * orig_width / img_width);
int height = static_cast<int>((bottom - top) * orig_height / img_height);
detections.push_back(
{confidence,
cv::Rect(x, y, width, height),
class_id,
CLASS_NAMES[class_id]});
}
}
return detections;
}
/*
* Function to run inference
*
* @param input_tensor_values: vector of floats representing the input tensor
*
* @return: vector of floats representing the output tensor
*/
std::vector<float> InferenceEngine::runInference(const std::vector<float> &input_tensor_values)
{
Ort::AllocatorWithDefaultOptions allocator;
std::string input_name = getInputName();
std::string output_name = getOutputName();
const char *input_name_ptr = input_name.c_str();
const char *output_name_ptr = output_name.c_str();
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float *>(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size());
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1);
float *floatarr = output_tensors[0].GetTensorMutableData<float>();
size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
return std::vector<float>(floatarr, floatarr + output_tensor_size);
}
/*
* Function to draw the labels on the image
*
* @param image: input image
* @param detections: vector of Detection objects
*
* @return: image with labels drawn
*/
cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Detection> &detections)
{
cv::Mat result = image.clone();
for (const auto &detection : detections)
{
cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2);
std::string label = detection.class_name + ": " + std::to_string(detection.confidence);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::rectangle(
result,
cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height),
cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine),
cv::Scalar(255, 255, 255),
cv::FILLED);
cv::putText(
result,
label,
cv::Point(
detection.bbox.x,
detection.bbox.y),
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 0, 0),
1);
}
return result;
}
/*
* Function to get the input name
*
* @return: name of the input tensor
*/
std::string InferenceEngine::getInputName()
{
Ort::AllocatorWithDefaultOptions allocator;
Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator);
return std::string(name_allocator.get());
}
/*
* Function to get the output name
*
* @return: name of the output tensor
*/
std::string InferenceEngine::getOutputName()
{
Ort::AllocatorWithDefaultOptions allocator;
Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator);
return std::string(name_allocator.get());
}
+44
View File
@@ -0,0 +1,44 @@
#ifndef INFERENCE_H
#define INFERENCE_H
#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <vector>
#include <string>
struct Detection
{
float confidence;
cv::Rect bbox;
int class_id;
std::string class_name;
};
class InferenceEngine
{
public:
InferenceEngine(const std::string &model_path);
~InferenceEngine();
std::vector<float> preprocessImage(const cv::Mat &image);
std::vector<Detection> filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height);
std::vector<float> runInference(const std::vector<float> &input_tensor_values);
cv::Mat draw_labels(const cv::Mat &image, const std::vector<Detection> &detections);
std::vector<int64_t> input_shape;
private:
Ort::Env env;
Ort::SessionOptions session_options;
Ort::Session session;
std::string getInputName();
std::string getOutputName();
static const std::vector<std::string> CLASS_NAMES;
};
#endif // INFERENCE_H