Renamed opi_player as rtsp_ai_player

2026-06-21 07:11:10 +00:00 · 2024-07-14 19:52:45 +02:00
parent 813251b170
commit 8a7d681e5b
40 changed files with 43 additions and 0 deletions
@@ -0,0 +1,43 @@
+# rtsp_ai_player
+
+`rtsp_ai_player` is an application that listens to an RTSP stream, analyzes images with an AI model, and shows the results visually. It also controls a gimbal camera to zoom in on the recognized objects. Application uses YOLOv8 AI models converted to ONNX format.
+
+## How to convert PT file to ONNX format
+```bash
+yolo export model=azaion-2024-06-28.pt dynamic=False format=onnx imgsz=640,640
+```
+
+## How to use application locally on a Linux PC.
+
+### Install ffmpeg and mediamtx RTSP server:
+```bash
+sudo apt update
+sudo apt install ffmpeg
+wget https://github.com/bluenviron/mediamtx/releases/download/v1.8.4/mediamtx_v1.8.4_linux_amd64.tar.gz
+tar xf -C mediamtx mediamtx_v1.8.4_linux_amd64.tar.gz
+```
+
+### Launch mediamtx RTSP server:
+```bash
+Start mediamtx RSTP server:
+    cd mediamtx
+    ./mediamtx
+```
+
+### Play mp4 video file from RTSP server:
+```bash
+    ffmpeg -re -stream_loop -1 -i $HOME/azaion/models/videos/for_ai_short.mp4 -c copy -f rtsp rtsp://localhost:8554/live.stream
+```
+
+### Test RTSP streaming with ffplay:
+```bash
+    ffplay -rtsp_transport tcp rtsp://localhost:8554/live.stream
+```
+
+### Compile and run rtsp_ai_player:
+```bash
+    cd autopilot/misc/rtsp_ai_player
+    qmake6 && make
+    ./rtsp_ai_player ~/azaion/models/azaion/azaion-2024-06-28.onnx 
+```
+
@@ -0,0 +1,71 @@
+#include <QDebug>
+#include <opencv2/highgui.hpp>
+#include "aiengine.h"
+#include "aiengineinference.h"
+
+#if defined(OPI5_BUILD)
+#include "src-opi5/aiengineinferenceopi5.h"
+#elif defined(OPENCV_BUILD)
+#include "src-opencv-onnx/aiengineinferenceopencvonnx.h"
+#else
+#include "src-onnx-runtime/aiengineinferenceonnxruntime.h"
+#endif
+
+AiEngine::AiEngine(QString modelPath, QObject *parent)
+    : QObject{parent}
+{
+    mRtspListener = new AiEngineRtspListener(this);
+    connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot);
+
+#if defined(OPI5_BUILD)
+    mInference = new AiEngineInferenceOpi5(modelPath);
+#elif defined(OPENCV_BUILD)
+    mInference = new AiEngineInferenceOpencvOnnx(modelPath);
+#else
+    mInference = new AiEngineInferencevOnnxRuntime(modelPath);
+#endif
+
+    QThread *inferenceThread = new QThread(this);
+    mInference->moveToThread(inferenceThread);
+    connect(mInference, &AiEngineInference::resultsReady, this, &AiEngine::inferenceResultsReceivedSlot, Qt::QueuedConnection);
+    connect(this, &AiEngine::inferenceFrame, mInference, &AiEngineInference::performInferenceSlot, Qt::QueuedConnection);
+    inferenceThread->start();
+
+    mGimbalControl =  new AiEngineGimbalControl(this);
+}
+
+
+void AiEngine::start(void)
+{
+    mRtspListener->startListening();
+}
+
+
+void AiEngine::stop(void)
+{
+    mRtspListener->stopListening();
+}
+
+
+void AiEngine::inferenceResultsReceivedSlot(AiEngineInferenceResult result)
+{
+    //qDebug() << "AiEngine got inference results in thread: " << QThread::currentThreadId();
+
+#ifdef OPI5_BUILD
+    mGimbalControl->inferenceResultSlot(result);
+#endif
+
+    cv::imshow("Received Frame", result.frame);
+}
+
+
+void AiEngine::frameReceivedSlot(cv::Mat frame)
+{
+    //qDebug() << "AiEngine got frame from RTSP listener in thread: " << QThread::currentThreadId();
+    //cv::imshow("Received Frame", frame);
+
+    if (mInference->isActive() == false) {
+        //qDebug() << "AiEngine. Inference thread is free. Sending frame to it.";
+        emit inferenceFrame(frame.clone());
+    }
+}
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <QObject>
+#include <opencv2/core.hpp>
+#include <opencv2/videoio.hpp>
+#include "aienginertsplistener.h"
+#include "aiengineinference.h"
+#include "aienginegimbalcontrol.h"
+
+class AiEngine : public QObject
+{
+    Q_OBJECT
+public:
+    explicit AiEngine(QString modelPath, QObject *parent = nullptr);
+    void start(void);
+    void stop(void);
+
+public slots:
+    void frameReceivedSlot(cv::Mat frame);
+    void inferenceResultsReceivedSlot(AiEngineInferenceResult result);
+
+signals:
+    void inferenceFrame(cv::Mat frame);
+
+private:
+    AiEngineRtspListener *mRtspListener;
+    AiEngineInference *mInference;
+    AiEngineGimbalControl *mGimbalControl;
+};
@@ -0,0 +1,73 @@
+#include <QDebug>
+#include <QVector>
+#include "aienginegimbalcontrol.h"
+
+AiEngineGimbalControl::AiEngineGimbalControl(QObject *parent)
+    : QObject{parent}
+{}
+
+
+AiEngineRectangle AiEngineGimbalControl::getGroupCoordinates(QVector<AiEngineObject> &objects)
+{
+    AiEngineRectangle groupRectangle;
+    groupRectangle.top    = 1000000;
+    groupRectangle.left   = 1000000;
+    groupRectangle.bottom = 0;
+    groupRectangle.right  = 0;
+
+    for (int i = 0; i < objects.size(); i++) {
+        const AiEngineRectangle &objectRectangle = objects[i].rectangle;
+
+        if (objectRectangle.top < groupRectangle.top) {
+            groupRectangle.top = objectRectangle.top;
+        }
+        if (objectRectangle.left < groupRectangle.left) {
+            groupRectangle.left = objectRectangle.left;
+        }
+        if (objectRectangle.bottom > groupRectangle.bottom) {
+            groupRectangle.bottom = objectRectangle.bottom;
+        }
+        if (objectRectangle.right > groupRectangle.right) {
+            groupRectangle.right = objectRectangle.right;
+        }
+    }
+
+    return groupRectangle;
+}
+
+
+void AiEngineGimbalControl::inferenceResultSlot(AiEngineInferenceResult result)
+{
+    if (result.objects.size() == 0) {
+        return;
+    }
+
+    // We got list of all recognized objects, but at least for now we will zoom to all objects at
+    // once and not for each invidually. Got minimal coordinates which contains the all objects.
+    AiEngineRectangle groupRect = getGroupCoordinates(result.objects);
+
+    // AI did inference with 640x360 resolution. Scale back to A8's 1280x720 resolution.
+    groupRect.top    *= 2;
+    groupRect.left   *= 2;
+    groupRect.bottom *= 2;
+    groupRect.right  *= 2;
+
+    if (groupRect.top > 720 || groupRect.bottom > 720) {
+        qDebug() << "ERROR! inferenceResultSlot() groupRect.top > 720 || groupRect.bottom > 720";
+        return;
+    }
+
+    if (groupRect.left > 1280 || groupRect.right > 1280) {
+        qDebug() << "ERROR! inferenceResultSlot() groupRect.left > 1280 || groupRect.right > 1280";
+        return;
+    }
+
+    if ((groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)) {
+        qDebug() << "ERROR! (groupRect.bottom <= groupRect.top) || (groupRect.right <= groupRect.left)";
+        return;
+    }
+
+    qDebug() << "TUOMAS!! Zooming to square top=" << groupRect.top << "x" << groupRect.left << "and bottom:" << groupRect.bottom << "x" << groupRect.right;
+
+    mRemoteControl.sendData(groupRect.top, groupRect.left, groupRect.bottom, groupRect.right);
+}
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <QObject>
+#include <QElapsedTimer>
+#include "aiengineinference.h"
+#include "remoteControl.hpp"
+
+class AiEngineGimbalControl : public QObject
+{
+    Q_OBJECT
+public:
+    explicit AiEngineGimbalControl(QObject *parent = nullptr);
+
+private:
+    AiEngineRectangle getGroupCoordinates(QVector<AiEngineObject> &objects);
+    RemoteControl mRemoteControl;
+
+public slots:
+    void inferenceResultSlot(AiEngineInferenceResult results);
+};
@@ -0,0 +1,50 @@
+#include "aiengineinference.h"
+
+
+AiEngineInference::AiEngineInference(QString modelPath, QObject *parent)
+    : QObject{parent},
+      mModelPath(modelPath),
+      mActive(false)
+{}
+
+
+bool AiEngineInference::isActive(void)
+{
+    return mActive;
+}
+
+
+cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src)
+{
+    // Calculate the aspect ratio
+    float aspectRatio = static_cast<float>(src.cols) / src.rows;
+
+    // Determine new size while maintaining aspect ratio
+    int newWidth = src.cols;
+    int newHeight = src.rows;
+    if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) {
+        if (aspectRatio > 1)
+        {
+            // Width is greater than height
+            newWidth = INFERENCE_SQUARE_WIDTH;
+            newHeight = static_cast<int>(INFERENCE_SQUARE_WIDTH / aspectRatio);
+        }
+        else {
+            // Height is greater than or equal to width
+            newHeight = INFERENCE_SQUARE_HEIGHT;
+            newWidth = static_cast<int>(INFERENCE_SQUARE_HEIGHT * aspectRatio);
+        }
+    }
+
+    // Resize the original image if needed
+    cv::Mat resized;
+    cv::resize(src, resized, cv::Size(newWidth, newHeight));
+
+    // Create a new 640x640 image with a black background
+    cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0));
+
+    // Copy the resized image to the top-left corner of the new image
+    resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
+
+    return output;
+}
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <QObject>
+#include <QString>
+#include <QVector>
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+
+const int INFERENCE_SQUARE_WIDTH  = 640;
+const int INFERENCE_SQUARE_HEIGHT = 640;
+
+
+class AiEngineRectangle {
+public:
+    int left;
+    int top;
+    int right;
+    int bottom;
+};
+
+
+class AiEngineObject {
+public:
+    AiEngineRectangle rectangle;
+    float propability;
+    int classId;
+};
+
+
+class AiEngineInferenceResult {
+public:
+    cv::Mat frame;
+    QVector<AiEngineObject> objects;
+};
+
+
+class AiEngineInference : public QObject
+{
+    Q_OBJECT
+public:
+    explicit AiEngineInference(QString modelPath, QObject *parent = nullptr);
+    bool isActive(void);
+
+protected:
+    cv::Mat resizeAndPad(const cv::Mat& src);
+    QString mModelPath;
+    bool mActive;
+
+public slots:
+    virtual void performInferenceSlot(cv::Mat frame) = 0;
+
+signals:
+    void resultsReady(AiEngineInferenceResult results);
+};
@@ -0,0 +1,54 @@
+#include <QDebug>
+#include <QtConcurrent/QtConcurrent>
+#include "aienginertsplistener.h"
+#include "config.h"
+
+
+AiEngineRtspListener::AiEngineRtspListener(QObject *parent)
+    : QObject{parent},
+      mIsListening(false)
+{
+}
+
+
+AiEngineRtspListener::~AiEngineRtspListener()
+{
+    stopListening();
+}
+
+
+void AiEngineRtspListener::startListening(void)
+{
+    mIsListening = true;
+
+    (void)QtConcurrent::run([this](){
+        listenLoop();
+    });
+}
+
+
+void AiEngineRtspListener::stopListening()
+{
+    mIsListening = false;
+
+    if (mCap.isOpened()) {
+        mCap.release();
+    }
+}
+
+
+void AiEngineRtspListener::listenLoop(void)
+{
+    qDebug() << "AiEngineRtspListener loop running in thread: " << QThread::currentThreadId();
+
+    mCap.open(rtspVideoUrl.toStdString());
+    cv::Mat frame;
+
+    while (mIsListening) {
+        mCap >> frame;
+
+        if (frame.empty() == false) {
+            emit frameReceived(frame.clone());
+        }
+    }
+}
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <QThread>
+#include <opencv2/core.hpp>
+#include <opencv2/videoio.hpp>
+
+class AiEngineRtspListener : public QObject
+{
+    Q_OBJECT
+public:
+    explicit AiEngineRtspListener(QObject *parent = nullptr);
+    ~AiEngineRtspListener();
+    void startListening(void);
+    void stopListening(void);
+
+private:
+    void listenLoop(void);
+    bool mIsListening;
+    cv::VideoCapture mCap;
+
+signals:
+    void frameReceived(cv::Mat frame);
+};
@@ -0,0 +1,10 @@
+Armoured_vehicle
+Truck
+Vehicle
+Artillery
+Shadow_artillery
+Tranches
+Military man
+Tyre_tracks
+Additional_protection_tank
+Smoke
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <QString>
+
+#ifdef OPI5_BUILD
+//QString rtspVideoUrl = "rtsp://192.168.168.11:8554/live.stream";
+QString rtspVideoUrl = "rtsp://192.168.0.25:8554/main.264";
+#else
+QString rtspVideoUrl = "rtsp://localhost:8554/live.stream";
+#endif
@@ -0,0 +1 @@
+yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640
@@ -0,0 +1,27 @@
+#include <QCoreApplication>
+#include <QDebug>
+#include <QFile>
+#include <opencv2/opencv.hpp>
+#include "aiengine.h"
+
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication app(argc, argv);
+
+    if (argc != 2) {
+        qDebug() << "\nUsage: " << argv[0] << " <model_file>";
+        return 1;
+    }
+    
+    QString modelFile = argv[1];
+    if (QFile::exists(modelFile) == false) {
+        qDebug() << "\nUsage: " << modelFile << " <model_file>";
+        return 1;
+    }
+
+    AiEngine aiEngine(modelFile);
+    aiEngine.start();
+
+    return app.exec();
+}
@@ -0,0 +1,39 @@
+QT += core network
+QT -= gui
+CONFIG += c++11 link_pkgconfig concurrent console
+PKGCONFIG += opencv4
+MOC_DIR = moc
+OBJECTS_DIR = obj
+
+SOURCES = $$PWD/*.cpp $$PWD/../../misc/camera/a8_remote/remoteControl.cpp
+HEADERS = $$PWD/*.h
+INCLUDEPATH += $$PWD/../../misc/camera/a8_remote
+
+opi5 {
+    message("OPI5 build")
+    PKGCONFIG += opencv4 librga stb libturbojpeg
+    INCLUDEPATH += /usr/include/rga
+    QMAKE_CXXFLAGS += -DOPI5_BUILD
+    LIBS += /usr/local/lib/librknnrt.so
+    SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc
+    HEADERS += $$PWD/src-opi5/*.h
+} else:opencv {
+    message("OpenCV build")
+    message("You must use YOLOv8 ONNX files. Azaion model does not work!")
+    message("OpenCV must be version 4.10.0 installed to /usr/local/")
+    QMAKE_CXXFLAGS += -DOPENCV_BUILD
+    QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
+    SOURCES += $$PWD/src-opencv-onnx/*.cpp
+    HEADERS += $$PWD/src-opencv-onnx/*.h
+}
+else {
+    message("ONNX build")
+    message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.")
+    QMAKE_CXXFLAGS += -DONNX_BUILD
+    INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include
+    LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0
+    QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib
+    QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib
+    SOURCES += $$PWD/src-onnx-runtime/*.cpp
+    HEADERS += $$PWD/src-onnx-runtime/*.h
+}
@@ -0,0 +1,97 @@
+#include <QDebug>
+#include <QThread>
+#include <vector>
+#include "aiengineinferenceonnxruntime.h"
+
+
+static const float confThreshold = 0.4f;
+static const float iouThreshold  = 0.4f;
+static const float maskThreshold = 0.5f;
+
+
+AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) :
+    AiEngineInference{modelPath, parent},
+    mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold)
+{
+    qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath;
+
+    mClassNames = {
+        "Armoured vehicle",
+        "Truck",
+        "Vehicle",
+        "Artillery",
+        "Shadow artillery",
+        "Trenches",
+        "Military man",
+        "Tyre tracks",
+        "Additional protection tank",
+        "Smoke"
+    };
+}
+
+
+cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections)
+{
+    cv::Mat result = image.clone();
+
+    for (const auto &detection : detections)
+    {
+        cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
+        std::string label = mClassNames[detection.classId] + ": " + std::to_string(detection.conf);
+
+        int baseLine;
+        cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+
+        cv::rectangle(
+            result,
+            cv::Point(detection.box.x, detection.box.y - labelSize.height),
+            cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
+            cv::Scalar(255, 255, 255),
+            cv::FILLED);
+
+        cv::putText(
+            result,
+            label,
+            cv::Point(
+                detection.box.x,
+                detection.box.y),
+            cv::FONT_HERSHEY_SIMPLEX,
+            0.5,
+            cv::Scalar(0, 0, 0),
+            1);
+    }
+
+    return result;
+}
+
+
+void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame)
+{
+    mActive = true;
+
+    cv::Mat scaledImage                  = resizeAndPad(frame);
+    std::vector<Yolov8Result> detections = mPredictor.predict(scaledImage);
+    AiEngineInferenceResult result;
+
+    for (uint i = 0; i < detections.size(); i++) {
+        const Yolov8Result &detection = detections[i];
+
+        // Add detected objects to the results
+        AiEngineObject object;
+        object.classId          = detection.classId;
+        object.propability      = detection.conf;
+        object.rectangle.top    = detection.box.y;
+        object.rectangle.left   = detection.box.x;
+        object.rectangle.bottom = detection.box.y + detection.box.height;
+        object.rectangle.right  = detection.box.x + detection.box.width;
+        result.objects.append(object);
+    }
+
+    if (result.objects.empty() == false) {
+        qDebug() << __PRETTY_FUNCTION__ << "detections:" << detections.size();
+        result.frame = drawLabels(scaledImage, detections);
+        emit resultsReady(result);
+    }
+
+    mActive = false;
+}
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <QObject>
+#include "aiengineinference.h"
+#include "yolov8Predictor.h"
+
+class AiEngineInferencevOnnxRuntime : public AiEngineInference
+{
+    Q_OBJECT
+public:
+    explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr);
+
+public slots:
+    void performInferenceSlot(cv::Mat frame) override;
+
+private:
+    cv::Mat drawLabels(const cv::Mat &image, const std::vector<Yolov8Result> &detections);
+    YOLOPredictor mPredictor;
+    std::vector<std::string> mClassNames;
+};
@@ -0,0 +1,167 @@
+#include "utils.h"
+
+size_t utils::vectorProduct(const std::vector<int64_t> &vector)
+{
+    if (vector.empty())
+        return 0;
+
+    size_t product = 1;
+    for (const auto &element : vector)
+        product *= element;
+
+    return product;
+}
+
+std::wstring utils::charToWstring(const char *str)
+{
+    typedef std::codecvt_utf8<wchar_t> convert_type;
+    std::wstring_convert<convert_type, wchar_t> converter;
+
+    return converter.from_bytes(str);
+}
+
+std::vector<std::string> utils::loadNames(const std::string &path)
+{
+    // load class names
+    std::vector<std::string> classNames;
+    std::ifstream infile(path);
+    if (infile.good())
+    {
+        std::string line;
+        while (getline(infile, line))
+        {
+            if (line.back() == '\r')
+                line.pop_back();
+            classNames.emplace_back(line);
+        }
+        infile.close();
+    }
+    else
+    {
+        std::cerr << "ERROR: Failed to access class name path: " << path << std::endl;
+    }
+    // set color
+    srand(time(0));
+
+    for (int i = 0; i < (int)(2 * classNames.size()); i++)
+    {
+        int b = rand() % 256;
+        int g = rand() % 256;
+        int r = rand() % 256;
+        colors.push_back(cv::Scalar(b, g, r));
+    }
+    return classNames;
+}
+
+void utils::visualizeDetection(cv::Mat &im, std::vector<Yolov8Result> &results,
+                               const std::vector<std::string> &classNames)
+{
+    cv::Mat image = im.clone();
+    for (const Yolov8Result &result : results)
+    {
+
+        int x = result.box.x;
+        int y = result.box.y;
+
+        int conf = (int)std::round(result.conf * 100);
+        int classId = result.classId;
+        std::string label = classNames[classId] + " 0." + std::to_string(conf);
+
+        int baseline = 0;
+        cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline);
+        image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
+        cv::rectangle(image, result.box, colors[classId], 2);
+        cv::rectangle(image,
+                      cv::Point(x, y), cv::Point(x + size.width, y + 12),
+                      colors[classId], -1);
+        cv::putText(image, label,
+                    cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
+                    0.4, cv::Scalar(0, 0, 0), 1);
+    }
+    cv::addWeighted(im, 0.4, image, 0.6, 0, im);
+}
+
+void utils::letterbox(const cv::Mat &image, cv::Mat &outImage,
+                      const cv::Size &newShape = cv::Size(640, 640),
+                      const cv::Scalar &color = cv::Scalar(114, 114, 114),
+                      bool auto_ = true,
+                      bool scaleFill = false,
+                      bool scaleUp = true,
+                      int stride = 32)
+{
+    cv::Size shape = image.size();
+    float r = std::min((float)newShape.height / (float)shape.height,
+                       (float)newShape.width / (float)shape.width);
+    if (!scaleUp)
+        r = std::min(r, 1.0f);
+
+    float ratio[2]{r, r};
+
+    int newUnpad[2]{(int)std::round((float)shape.width * r),
+                    (int)std::round((float)shape.height * r)};
+
+    auto dw = (float)(newShape.width - newUnpad[0]);
+    auto dh = (float)(newShape.height - newUnpad[1]);
+
+    if (auto_)
+    {
+        dw = (float)((int)dw % stride);
+        dh = (float)((int)dh % stride);
+    }
+    else if (scaleFill)
+    {
+        dw = 0.0f;
+        dh = 0.0f;
+        newUnpad[0] = newShape.width;
+        newUnpad[1] = newShape.height;
+        ratio[0] = (float)newShape.width / (float)shape.width;
+        ratio[1] = (float)newShape.height / (float)shape.height;
+    }
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+
+    if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
+    {
+        cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
+    }
+
+    int top = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left = int(std::round(dw - 0.1f));
+    int right = int(std::round(dw + 0.1f));
+    cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+}
+
+void utils::scaleCoords(cv::Rect &coords,
+                        cv::Mat &mask,
+                        const float maskThreshold,
+                        const cv::Size &imageShape,
+                        const cv::Size &imageOriginalShape)
+{
+    float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
+                          (float)imageShape.width / (float)imageOriginalShape.width);
+
+    int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
+                  (int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
+
+    coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
+    coords.x = std::max(0, coords.x);
+    coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
+    coords.y = std::max(0, coords.y);
+
+    coords.width = (int)std::round(((float)coords.width / gain));
+    coords.width = std::min(coords.width, imageOriginalShape.width - coords.x);
+    coords.height = (int)std::round(((float)coords.height / gain));
+    coords.height = std::min(coords.height, imageOriginalShape.height - coords.y);
+    mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1]));
+
+    cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR);
+
+    mask = mask(coords) > maskThreshold;
+}
+template <typename T>
+T utils::clip(const T &n, const T &lower, const T &upper)
+{
+    return std::max(lower, std::min(n, upper));
+}
@@ -0,0 +1,38 @@
+#pragma once
+#include <codecvt>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+
+struct Yolov8Result
+{
+    cv::Rect box;
+    cv::Mat boxMask; // mask in box
+    float conf{};
+    int classId{};
+};
+
+namespace utils
+{
+    static std::vector<cv::Scalar> colors;
+
+    size_t vectorProduct(const std::vector<int64_t> &vector);
+    std::wstring charToWstring(const char *str);
+    std::vector<std::string> loadNames(const std::string &path);
+    void visualizeDetection(cv::Mat &image, std::vector<Yolov8Result> &results,
+                            const std::vector<std::string> &classNames);
+
+    void letterbox(const cv::Mat &image, cv::Mat &outImage,
+                   const cv::Size &newShape,
+                   const cv::Scalar &color,
+                   bool auto_,
+                   bool scaleFill,
+                   bool scaleUp,
+                   int stride);
+
+    void scaleCoords(cv::Rect &coords, cv::Mat &mask,
+                     const float maskThreshold,
+                     const cv::Size &imageShape, const cv::Size &imageOriginalShape);
+
+    template <typename T>
+    T clip(const T &n, const T &lower, const T &upper);
+}
@@ -0,0 +1,243 @@
+#include "yolov8Predictor.h"
+
+YOLOPredictor::YOLOPredictor(const std::string &modelPath,
+                             float confThreshold,
+                             float iouThreshold,
+                             float maskThreshold)
+{
+    this->confThreshold = confThreshold;
+    this->iouThreshold = iouThreshold;
+    this->maskThreshold = maskThreshold;
+    env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8");
+    sessionOptions = Ort::SessionOptions();
+
+    std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
+    std::cout << "Inference device: CPU" << std::endl;
+
+    session = Ort::Session(env, modelPath.c_str(), sessionOptions);
+
+    const size_t num_input_nodes = session.GetInputCount();   //==1
+    const size_t num_output_nodes = session.GetOutputCount(); //==1,2
+    if (num_output_nodes > 1)
+    {
+        this->hasMask = true;
+        std::cout << "Instance Segmentation" << std::endl;
+    }
+    else
+        std::cout << "Object Detection" << std::endl;
+
+    Ort::AllocatorWithDefaultOptions allocator;
+    for (int i = 0; i < (int)num_input_nodes; i++)
+    {
+        auto input_name = session.GetInputNameAllocated(i, allocator);
+        this->inputNames.push_back(input_name.get());
+        input_names_ptr.push_back(std::move(input_name));
+
+        Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i);
+        std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
+        this->inputShapes.push_back(inputTensorShape);
+        this->isDynamicInputShape = true;
+        // checking if width and height are dynamic
+        if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1)
+        {
+            std::cout << "Dynamic input shape" << std::endl;
+            this->isDynamicInputShape = true;
+        }
+    }
+    for (int i = 0; i < (int)num_output_nodes; i++)
+    {
+        auto output_name = session.GetOutputNameAllocated(i, allocator);
+        this->outputNames.push_back(output_name.get());
+        output_names_ptr.push_back(std::move(output_name));
+
+        Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i);
+        std::vector<int64_t> outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
+        this->outputShapes.push_back(outputTensorShape);
+        if (i == 0)
+        {
+            if (!this->hasMask)
+                classNums = outputTensorShape[1] - 4;
+            else
+                classNums = outputTensorShape[1] - 4 - 32;
+        }
+    }
+    // for (const char *x : this->inputNames)
+    // {
+    //     std::cout << x << std::endl;
+    // }
+    // for (const char *x : this->outputNames)
+    // {
+    //     std::cout << x << std::endl;
+    // }
+    // std::cout << classNums << std::endl;
+}
+
+void YOLOPredictor::getBestClassInfo(std::vector<float>::iterator it,
+                                     float &bestConf,
+                                     int &bestClassId,
+                                     const int _classNums)
+{
+    // first 4 element are box
+    bestClassId = 4;
+    bestConf = 0;
+
+    for (int i = 4; i < _classNums + 4; i++)
+    {
+        if (it[i] > bestConf)
+        {
+            bestConf = it[i];
+            bestClassId = i - 4;
+        }
+    }
+}
+cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals,
+                               const cv::Mat &maskProtos)
+{
+    cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]});
+
+    cv::Mat matmul_res = (maskProposals * protos).t();
+    cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]});
+    cv::Mat dest;
+
+    // sigmoid
+    cv::exp(-masks, dest);
+    dest = 1.0 / (1.0 + dest);
+    cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR);
+    return dest;
+}
+
+void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape)
+{
+    cv::Mat resizedImage, floatImage;
+    cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB);
+    utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]),
+                     cv::Scalar(114, 114, 114), this->isDynamicInputShape,
+                     false, true, 32);
+
+    inputTensorShape[2] = resizedImage.rows;
+    inputTensorShape[3] = resizedImage.cols;
+
+    resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0);
+    blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
+    cv::Size floatImageSize{floatImage.cols, floatImage.rows};
+
+    // hwc -> chw
+    std::vector<cv::Mat> chw(floatImage.channels());
+    for (int i = 0; i < floatImage.channels(); ++i)
+    {
+        chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
+    }
+    cv::split(floatImage, chw);
+}
+
+std::vector<Yolov8Result> YOLOPredictor::postprocessing(const cv::Size &resizedImageShape,
+                                                        const cv::Size &originalImageShape,
+                                                        std::vector<Ort::Value> &outputTensors)
+{
+
+    // for box
+    std::vector<cv::Rect> boxes;
+    std::vector<float> confs;
+    std::vector<int> classIds;
+
+    float *boxOutput = outputTensors[0].GetTensorMutableData<float>();
+    //[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32]
+    cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t();
+    float *output0ptr = (float *)output0.data;
+    int rows = (int)this->outputShapes[0][2];
+    int cols = (int)this->outputShapes[0][1];
+    // std::cout << rows << cols << std::endl;
+    // if hasMask
+    std::vector<std::vector<float>> picked_proposals;
+    cv::Mat mask_protos;
+
+    for (int i = 0; i < rows; i++)
+    {
+        std::vector<float> it(output0ptr + i * cols, output0ptr + (i + 1) * cols);
+        float confidence;
+        int classId;
+        this->getBestClassInfo(it.begin(), confidence, classId, classNums);
+
+        if (confidence > this->confThreshold)
+        {
+            if (this->hasMask)
+            {
+                std::vector<float> temp(it.begin() + 4 + classNums, it.end());
+                picked_proposals.push_back(temp);
+            }
+            int centerX = (int)(it[0]);
+            int centerY = (int)(it[1]);
+            int width = (int)(it[2]);
+            int height = (int)(it[3]);
+            int left = centerX - width / 2;
+            int top = centerY - height / 2;
+            boxes.emplace_back(left, top, width, height);
+            confs.emplace_back(confidence);
+            classIds.emplace_back(classId);
+        }
+    }
+
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices);
+
+    if (this->hasMask)
+    {
+        float *maskOutput = outputTensors[1].GetTensorMutableData<float>();
+        std::vector<int> mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]};
+        mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput);
+    }
+
+    std::vector<Yolov8Result> results;
+    for (int idx : indices)
+    {
+        Yolov8Result res;
+        res.box = cv::Rect(boxes[idx]);
+        if (this->hasMask)
+            res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos);
+        else
+            res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U);
+
+        utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape);
+        res.conf = confs[idx];
+        res.classId = classIds[idx];
+        results.emplace_back(res);
+    }
+
+    return results;
+}
+
+std::vector<Yolov8Result> YOLOPredictor::predict(cv::Mat &image)
+{
+    float *blob = nullptr;
+    std::vector<int64_t> inputTensorShape{1, 3, -1, -1};
+    this->preprocessing(image, blob, inputTensorShape);
+
+    size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
+
+    std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
+
+    std::vector<Ort::Value> inputTensors;
+
+    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
+        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+
+    inputTensors.push_back(Ort::Value::CreateTensor<float>(
+        memoryInfo, inputTensorValues.data(), inputTensorSize,
+        inputTensorShape.data(), inputTensorShape.size()));
+
+    std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{nullptr},
+                                                              this->inputNames.data(),
+                                                              inputTensors.data(),
+                                                              1,
+                                                              this->outputNames.data(),
+                                                              this->outputNames.size());
+
+    cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]);
+    std::vector<Yolov8Result> result = this->postprocessing(resizedShape,
+                                                            image.size(),
+                                                            outputTensors);
+
+    delete[] blob;
+
+    return result;
+}
@@ -0,0 +1,50 @@
+#pragma once
+#include <opencv2/opencv.hpp>
+#include <onnxruntime_cxx_api.h>
+#include <utility>
+
+#include "utils.h"
+
+class YOLOPredictor
+{
+public:
+    explicit YOLOPredictor(std::nullptr_t){};
+    YOLOPredictor(const std::string &modelPath,
+                  float confThreshold,
+                  float iouThreshold,
+                  float maskThreshold);
+    // ~YOLOPredictor();
+    std::vector<Yolov8Result> predict(cv::Mat &image);
+    int classNums = 10;
+
+private:
+    Ort::Env env{nullptr};
+    Ort::SessionOptions sessionOptions{nullptr};
+    Ort::Session session{nullptr};
+
+    void preprocessing(cv::Mat &image, float *&blob, std::vector<int64_t> &inputTensorShape);
+    std::vector<Yolov8Result> postprocessing(const cv::Size &resizedImageShape,
+                                             const cv::Size &originalImageShape,
+                                             std::vector<Ort::Value> &outputTensors);
+
+    static void getBestClassInfo(std::vector<float>::iterator it,
+                                 float &bestConf,
+                                 int &bestClassId,
+                                 const int _classNums);
+    cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos);
+    bool isDynamicInputShape{};
+
+    std::vector<const char *> inputNames;
+    std::vector<Ort::AllocatedStringPtr> input_names_ptr;
+
+    std::vector<const char *> outputNames;
+    std::vector<Ort::AllocatedStringPtr> output_names_ptr;
+
+    std::vector<std::vector<int64_t>> inputShapes;
+    std::vector<std::vector<int64_t>> outputShapes;
+    float confThreshold = 0.3f;
+    float iouThreshold = 0.4f;
+
+    bool hasMask = false;
+    float maskThreshold = 0.5f;
+};
@@ -0,0 +1,53 @@
+#include <QDebug>
+#include <QThread>
+#include "aiengineinferenceopencvonnx.h"
+
+
+AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent)
+    : AiEngineInference{modelPath, parent},
+      mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt")
+{
+    //qDebug() << "TUOMAS test mModelPath=" << mModelPath;
+    //mEngine = new InferenceEngine(modelPath.toStdString());
+    //mInference = new Inference(modelPath.toStdString(), cv::Size(INFERENCE_SQUARE_WIDTH, INFERENCE_SQUARE_HEIGHT), "classes.txt");
+}
+
+
+void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame)
+{
+    try {
+        //qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
+
+        mActive = true;
+
+        cv::Mat scaledImage               = resizeAndPad(frame);
+        std::vector<Detection> detections = mInference.runInference(scaledImage);
+        AiEngineInferenceResult result;
+
+        //qDebug() << "performInferenceSlot() found " << detections.size() << " objects";
+
+        for (uint i = 0; i < detections.size(); ++i) {
+            const Detection &detection = detections[i];
+
+            // Add detected objects to the results
+            AiEngineObject object;
+            object.classId          = detection.class_id;
+            object.propability      = detection.confidence;
+            object.rectangle.top    = detection.box.y;
+            object.rectangle.left   = detection.box.x;
+            object.rectangle.bottom = detection.box.y + detection.box.height;
+            object.rectangle.right  = detection.box.x + detection.box.width;
+            result.objects.append(object);
+        }
+
+        if (result.objects.empty() == false) {
+            result.frame = mInference.drawLabels(scaledImage, detections);
+            emit resultsReady(result);
+        }
+
+        mActive = false;
+    }
+    catch (const cv::Exception& e) {
+        std::cerr << "performInferenceSlot() Error: " << e.what() << std::endl;
+    }
+}
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <QObject>
+#include "aiengineinference.h"
+#include "src-opencv-onnx/inference.h"
+
+class AiEngineInferenceOpencvOnnx : public AiEngineInference
+{
+    Q_OBJECT
+public:
+    explicit AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent = nullptr);
+
+public slots:
+    void performInferenceSlot(cv::Mat frame) override;
+
+private:
+    Inference mInference;
+};
@@ -0,0 +1,223 @@
+#include "inference.h"
+
+Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
+{
+    modelPath = onnxModelPath;
+    modelShape = modelInputShape;
+    classesPath = classesTxtFile;
+    cudaEnabled = runWithCuda;
+
+    std::cout << "SIZE = " << modelInputShape.width << "x" << modelInputShape.height << std::endl;
+    loadOnnxNetwork();
+    // loadClassesFromFile(); The classes are hard-coded for this example
+}
+
+std::vector<Detection> Inference::runInference(const cv::Mat &input)
+{
+    cv::Mat modelInput = input;
+    if (letterBoxForSquare && modelShape.width == modelShape.height)
+        modelInput = formatToSquare(modelInput);
+
+    cv::Mat blob;
+    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
+    net.setInput(blob);
+
+    std::vector<cv::Mat> outputs;
+    net.forward(outputs, net.getUnconnectedOutLayersNames());
+
+    int rows = outputs[0].size[1];
+    int dimensions = outputs[0].size[2];
+
+    bool yolov8 = false;
+    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
+    {
+        yolov8 = true;
+        rows = outputs[0].size[2];
+        dimensions = outputs[0].size[1];
+
+        outputs[0] = outputs[0].reshape(1, dimensions);
+        cv::transpose(outputs[0], outputs[0]);
+    }
+    float *data = (float *)outputs[0].data;
+
+    float x_factor = modelInput.cols / modelShape.width;
+    float y_factor = modelInput.rows / modelShape.height;
+
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        if (yolov8)
+        {
+            float *classes_scores = data+4;
+
+            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+
+            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+
+            if (maxClassScore > modelScoreThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                class_ids.push_back(class_id.x);
+
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+
+                int left = int((x - 0.5 * w) * x_factor);
+                int top = int((y - 0.5 * h) * y_factor);
+
+                int width = int(w * x_factor);
+                int height = int(h * y_factor);
+
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+        }
+        else // yolov5
+        {
+            float confidence = data[4];
+
+            if (confidence >= modelConfidenceThreshold)
+            {
+                float *classes_scores = data+5;
+
+                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+                cv::Point class_id;
+                double max_class_score;
+
+                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+
+                if (max_class_score > modelScoreThreshold)
+                {
+                    confidences.push_back(confidence);
+                    class_ids.push_back(class_id.x);
+
+                    float x = data[0];
+                    float y = data[1];
+                    float w = data[2];
+                    float h = data[3];
+
+                    int left = int((x - 0.5 * w) * x_factor);
+                    int top = int((y - 0.5 * h) * y_factor);
+
+                    int width = int(w * x_factor);
+                    int height = int(h * y_factor);
+
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+
+        data += dimensions;
+    }
+
+    std::vector<int> nms_result;
+    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
+
+    std::vector<Detection> detections{};
+    for (unsigned long i = 0; i < nms_result.size(); ++i)
+    {
+        int idx = nms_result[i];
+
+        Detection result;
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int> dis(100, 255);
+        result.color = cv::Scalar(dis(gen),
+                                  dis(gen),
+                                  dis(gen));
+
+        result.className = classes[result.class_id];
+        result.box = boxes[idx];
+
+        detections.push_back(result);
+    }
+
+    return detections;
+}
+
+void Inference::loadClassesFromFile()
+{
+    std::ifstream inputFile(classesPath);
+    if (inputFile.is_open())
+    {
+        std::string classLine;
+        while (std::getline(inputFile, classLine))
+            classes.push_back(classLine);
+        inputFile.close();
+    }
+}
+
+void Inference::loadOnnxNetwork()
+{
+    printf("loadOnnxNetwork() starts\n");
+
+    net = cv::dnn::readNetFromONNX(modelPath);
+
+    if (cudaEnabled)
+    {
+        std::cout << "\nRunning on CUDA" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
+    }
+    else
+    {
+        std::cout << "\nRunning on CPU" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
+    }
+}
+
+cv::Mat Inference::formatToSquare(const cv::Mat &source)
+{
+    int col = source.cols;
+    int row = source.rows;
+    int _max = MAX(col, row);
+    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+    source.copyTo(result(cv::Rect(0, 0, col, row)));
+    return result;
+}
+
+cv::Mat Inference::drawLabels(const cv::Mat &image, const std::vector<Detection> &detections)
+{
+    cv::Mat result = image.clone();
+
+    for (const auto &detection : detections)
+    {
+        cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2);
+        std::string label = detection.className + ": " + std::to_string(detection.confidence);
+
+        int baseLine;
+        cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+
+        cv::rectangle(
+            result,
+            cv::Point(detection.box.x, detection.box.y - labelSize.height),
+            cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine),
+            cv::Scalar(255, 255, 255),
+            cv::FILLED);
+
+        cv::putText(
+            result,
+            label,
+            cv::Point(
+                detection.box.x,
+                detection.box.y),
+            cv::FONT_HERSHEY_SIMPLEX,
+            0.5,
+            cv::Scalar(0, 0, 0),
+            1);
+    }
+
+    return result;
+}
@@ -0,0 +1,51 @@
+#pragma once
+
+// Cpp native
+#include <fstream>
+#include <vector>
+#include <string>
+#include <random>
+
+// OpenCV / DNN / Inference
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+struct Detection
+{
+    int class_id{0};
+    std::string className{};
+    float confidence{0.0};
+    cv::Scalar color{};
+    cv::Rect box{};
+};
+
+class Inference
+{
+public:
+    Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = false);
+    std::vector<Detection> runInference(const cv::Mat &input);
+    cv::Mat drawLabels(const cv::Mat &image, const std::vector<Detection> &detections);
+
+private:
+    void loadClassesFromFile();
+    void loadOnnxNetwork();    
+
+    cv::Mat formatToSquare(const cv::Mat &source);
+
+    std::string modelPath{};
+    std::string classesPath{};
+    bool cudaEnabled{};
+
+    std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
+
+    cv::Size2f modelShape{};
+
+    float modelConfidenceThreshold {0.25};
+    float modelScoreThreshold      {0.45};
+    float modelNMSThreshold        {0.50};
+
+    bool letterBoxForSquare = false;
+
+    cv::dnn::Net net;
+};
@@ -0,0 +1,168 @@
+#include <QDebug>
+#include <QThread>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <iomanip>
+#include "aiengineinferenceopi5.h"
+#include "file_utils.h"
+#include "image_drawing.h"
+
+
+AiEngineInferenceOpi5::AiEngineInferenceOpi5(QString modelPath, QObject *parent)
+    : AiEngineInference{modelPath, parent}
+{
+    qDebug() << "AiEngineInferenceOpi5() test mModelPath=" << mModelPath;
+
+    memset(&mRrknnAppCtx, 0, sizeof(rknn_app_context_t));
+    init_post_process();
+
+    int ret = init_yolov8_model(modelPath.toLocal8Bit(), &mRrknnAppCtx);
+    if (ret != 0) {
+        qDebug() << "init_yolov8_model() failure! ret: " << ret << "modelPath = " << modelPath;
+        return;
+    }
+}
+
+
+AiEngineInferenceOpi5::~AiEngineInferenceOpi5()
+{
+    deinit_post_process();
+    release_yolov8_model(&mRrknnAppCtx);
+}
+
+
+image_buffer_t AiEngineInferenceOpi5::convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame)
+{
+    // Convert BGR to RGB
+    cv::Mat rgbFrame;
+    cv::cvtColor(bgrFrame, rgbFrame, cv::COLOR_BGR2RGB);
+
+    image_buffer_t imgBuffer;
+    memset(&imgBuffer, 0, sizeof(image_buffer_t));
+    imgBuffer.width         = rgbFrame.cols;
+    imgBuffer.height        = rgbFrame.rows;
+    imgBuffer.width_stride  = rgbFrame.step;
+    imgBuffer.height_stride = rgbFrame.rows;
+    imgBuffer.format        = IMAGE_FORMAT_RGB888;
+    imgBuffer.size          = rgbFrame.total() * rgbFrame.elemSize();
+
+    imgBuffer.virt_addr = new unsigned char[imgBuffer.size];
+    std::memcpy(imgBuffer.virt_addr, rgbFrame.data, imgBuffer.size);
+
+    return imgBuffer;
+}
+
+
+void AiEngineInferenceOpi5::freeImageBuffer(image_buffer_t& imgBuffer)
+{
+    if (imgBuffer.virt_addr) {
+        delete[] imgBuffer.virt_addr;
+        imgBuffer.virt_addr = nullptr;
+    }
+}
+
+
+cv::Mat AiEngineInferenceOpi5::resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame)
+{
+    // Resize input frame to half size
+    cv::Mat resizedFrame;
+    cv::resize(inputFrame, resizedFrame, cv::Size(), 0.5, 0.5);
+
+    // Create a 640x640 frame to place the resized frame
+    cv::Mat outputFrame = cv::Mat::zeros(640, 640, inputFrame.type());
+
+    // Copy the resized frame to the top-left corner of the output frame
+    cv::Rect roi(0, 0, resizedFrame.cols, resizedFrame.rows);
+    resizedFrame.copyTo(outputFrame(roi));
+
+    return outputFrame;
+}
+
+
+void AiEngineInferenceOpi5::drawObjects(cv::Mat& image, const object_detect_result_list& result_list)
+{
+    for (int i = 0; i < result_list.count; i++) {
+        const object_detect_result& result = result_list.results[i];
+
+        int left = result.box.left;
+        int top = result.box.top;
+        int right = result.box.right;
+        int bottom = result.box.bottom;
+
+        cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
+
+        // Text
+        char c_text[256];
+        sprintf(c_text, "%s %.1f%%", coco_cls_to_name(result.cls_id), result.prop * 100);
+        cv::Point textOrg(left, top - 5);
+        cv::putText(image, std::string(c_text), textOrg, cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 255), 1);
+    }
+}
+
+
+void AiEngineInferenceOpi5::performInferenceSlot(cv::Mat frame)
+{
+    //qDebug() << "performInferenceSlot() in thread: " << QThread::currentThreadId();
+
+    mActive = true;
+
+    cv::Mat scaledFrame      = resizeToHalfAndAssigntoTopLeft640x640(frame);
+    image_buffer_t imgBuffer = convertCV2FrameToImageBuffer(scaledFrame);
+
+    object_detect_result_list od_results;
+    int ret = inference_yolov8_model(&mRrknnAppCtx, &imgBuffer, &od_results);
+    if (ret != 0) {
+        qDebug() << "inference_yolov8_model() failure! ret: " << ret;
+        return;
+    }
+
+    AiEngineInferenceResult result;
+    for (int i = 0; i < od_results.count; i++) {
+        object_detect_result *det_result = &(od_results.results[i]);
+
+        AiEngineObject object;
+        object.classId          = det_result->cls_id;
+        object.propability      = det_result->prop;
+        object.rectangle.top    = det_result->box.top;
+        object.rectangle.left   = det_result->box.left;
+        object.rectangle.bottom = det_result->box.bottom;
+        object.rectangle.right  = det_result->box.right;
+        result.objects.append(object);
+    }
+
+    /*
+    char text[256];
+    for (int i = 0; i < od_results.count; i++) {
+        object_detect_result *det_result = &(od_results.results[i]);
+        printf("%s @ (%d %d %d %d) %.3f\n", coco_cls_to_name(det_result->cls_id),
+               det_result->box.left, det_result->box.top,
+               det_result->box.right, det_result->box.bottom,
+               det_result->prop);
+        int x1 = det_result->box.left;
+        int y1 = det_result->box.top;
+        int x2 = det_result->box.right;
+        int y2 = det_result->box.bottom;
+
+        draw_rectangle(&imgBuffer, x1, y1, x2 - x1, y2 - y1, COLOR_BLUE, 3);
+        sprintf(text, "%s %.1f%%", coco_cls_to_name(det_result->cls_id), det_result->prop * 100);
+        draw_text(&imgBuffer, text, x1, y1 - 20, COLOR_RED, 10);
+    }
+    */
+
+    /*
+    static int imageNum = 0;
+    std::stringstream ss;
+    ss << std::setw(4) << std::setfill('0') << imageNum++;
+    std::string formatted_number = ss.str();
+    std::string filename = "/tmp/out-" + formatted_number + ".png";
+    cv::imwrite(filename, scaledFrame);
+    */
+
+    drawObjects(scaledFrame, od_results);
+    freeImageBuffer(imgBuffer);
+
+    result.frame = scaledFrame.clone();
+    emit resultsReady(result);
+
+    mActive = false;
+}
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <QObject>
+#include "aiengineinference.h"
+#include "yolov8.h"
+#include "image_utils.h"
+
+class AiEngineInferenceOpi5 : public AiEngineInference
+{
+    Q_OBJECT
+public:
+    explicit AiEngineInferenceOpi5(QString modelPath, QObject *parent = nullptr);
+    ~AiEngineInferenceOpi5();
+
+public slots:
+    void performInferenceSlot(cv::Mat frame) override;
+
+private:
+    image_buffer_t convertCV2FrameToImageBuffer(const cv::Mat& bgrFrame);
+    void freeImageBuffer(image_buffer_t& imgBuffer);
+    cv::Mat resizeToHalfAndAssigntoTopLeft640x640(const cv::Mat& inputFrame);
+    void drawObjects(cv::Mat& image, const object_detect_result_list& result_list);
+
+    rknn_app_context_t mRrknnAppCtx;
+};
@@ -0,0 +1,42 @@
+#ifndef _RKNN_MODEL_ZOO_COMMON_H_
+#define _RKNN_MODEL_ZOO_COMMON_H_
+
+/**
+ * @brief Image pixel format
+ * 
+ */
+typedef enum {
+    IMAGE_FORMAT_GRAY8,
+    IMAGE_FORMAT_RGB888,
+    IMAGE_FORMAT_RGBA8888,
+    IMAGE_FORMAT_YUV420SP_NV21,
+    IMAGE_FORMAT_YUV420SP_NV12,
+} image_format_t;
+
+/**
+ * @brief Image buffer
+ * 
+ */
+typedef struct {
+    int width;
+    int height;
+    int width_stride;
+    int height_stride;
+    image_format_t format;
+    unsigned char* virt_addr;
+    int size;
+    int fd;
+} image_buffer_t;
+
+/**
+ * @brief Image rectangle
+ * 
+ */
+typedef struct {
+    int left;
+    int top;
+    int right;
+    int bottom;
+} image_rect_t;
+
+#endif //_RKNN_MODEL_ZOO_COMMON_H_
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_TEXT_LINE_LENGTH 1024
+
+unsigned char* load_model(const char* filename, int* model_size)
+{
+    FILE* fp = fopen(filename, "rb");
+    if (fp == NULL) {
+        printf("fopen %s fail!\n", filename);
+        return NULL;
+    }
+    fseek(fp, 0, SEEK_END);
+    int model_len = ftell(fp);
+    unsigned char* model = (unsigned char*)malloc(model_len);
+    fseek(fp, 0, SEEK_SET);
+    if (model_len != fread(model, 1, model_len, fp)) {
+        printf("fread %s fail!\n", filename);
+        free(model);
+        fclose(fp);
+        return NULL;
+    }
+    *model_size = model_len;
+    fclose(fp);
+    return model;
+}
+
+int read_data_from_file(const char *path, char **out_data)
+{
+    FILE *fp = fopen(path, "rb");
+    if(fp == NULL) {
+        printf("fopen %s fail!\n", path);
+        return -1;
+    }
+    fseek(fp, 0, SEEK_END);
+    int file_size = ftell(fp);
+    char *data = (char *)malloc(file_size+1);
+    data[file_size] = 0;
+    fseek(fp, 0, SEEK_SET);
+    if(file_size != fread(data, 1, file_size, fp)) {
+        printf("fread %s fail!\n", path);
+        free(data);
+        fclose(fp);
+        return -1;
+    }
+    if(fp) {
+        fclose(fp);
+    }
+    *out_data = data;
+    return file_size;
+}
+
+int write_data_to_file(const char *path, const char *data, unsigned int size)
+{
+    FILE *fp;
+
+    fp = fopen(path, "w");
+    if(fp == NULL) {
+        printf("open error: %s\n", path);
+        return -1;
+    }
+
+    fwrite(data, 1, size, fp);
+    fflush(fp);
+
+    fclose(fp);
+    return 0;
+}
+
+int count_lines(FILE* file)
+{
+    int count = 0;
+    char ch;
+
+    while(!feof(file))
+    {
+        ch = fgetc(file);
+        if(ch == '\n')
+        {
+            count++;
+        }
+    }
+    count += 1;
+
+    rewind(file);
+    return count;
+}
+
+char** read_lines_from_file(const char* filename, int* line_count)
+{
+    FILE* file = fopen(filename, "r");
+    if (file == NULL) {
+        printf("Failed to open the file.\n");
+        return NULL;
+    }
+
+    int num_lines = count_lines(file);
+    printf("num_lines=%d\n", num_lines);
+    char** lines = (char**)malloc(num_lines * sizeof(char*));
+    memset(lines, 0, num_lines * sizeof(char*));
+
+    char buffer[MAX_TEXT_LINE_LENGTH];
+    int line_index = 0;
+
+    while (fgets(buffer, sizeof(buffer), file) != NULL) {
+        buffer[strcspn(buffer, "\n")] = '\0';  // 移除换行符
+
+        lines[line_index] = (char*)malloc(strlen(buffer) + 1);
+        strcpy(lines[line_index], buffer);
+
+        line_index++;
+    }
+
+    fclose(file);
+
+    *line_count = num_lines;
+    return lines;
+}
+
+void free_lines(char** lines, int line_count)
+{
+    for (int i = 0; i < line_count; i++) {
+        if (lines[i] != NULL) {
+            free(lines[i]);
+        }
+    }
+    free(lines);
+}
@@ -0,0 +1,48 @@
+#ifndef _RKNN_MODEL_ZOO_FILE_UTILS_H_
+#define _RKNN_MODEL_ZOO_FILE_UTILS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Read data from file
+ * 
+ * @param path [in] File path
+ * @param out_data [out] Read data
+ * @return int -1: error; > 0: Read data size
+ */
+int read_data_from_file(const char *path, char **out_data);
+
+/**
+ * @brief Write data to file
+ * 
+ * @param path [in] File path
+ * @param data [in] Write data
+ * @param size [in] Write data size
+ * @return int 0: success; -1: error
+ */
+int write_data_to_file(const char *path, const char *data, unsigned int size);
+
+/**
+ * @brief Read all lines from text file
+ * 
+ * @param path [in] File path
+ * @param line_count [out] File line count
+ * @return char** String array of all lines, remeber call free_lines() to release after used
+ */
+char** read_lines_from_file(const char* path, int* line_count);
+
+/**
+ * @brief Free lines string array
+ * 
+ * @param lines [in] String array
+ * @param line_count [in] Line count
+ */
+void free_lines(char** lines, int line_count);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif //_RKNN_MODEL_ZOO_FILE_UTILS_H_
@@ -0,0 +1,89 @@
+#ifndef _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
+#define _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "common.h"
+
+// Color Format ARGB8888
+#define COLOR_GREEN     0xFF00FF00
+#define COLOR_BLUE      0xFF0000FF
+#define COLOR_RED       0xFFFF0000
+#define COLOR_YELLOW    0xFFFFFF00
+#define COLOR_ORANGE    0xFFFF4500
+#define COLOR_BLACK     0xFF000000
+#define COLOR_WHITE     0xFFFFFFFF
+
+/**
+ * @brief Draw rectangle
+ * 
+ * @param image [in] Image buffer
+ * @param rx [in] Rectangle top left x
+ * @param ry [in] Rectangle top left y
+ * @param rw [in] Rectangle width
+ * @param rh [in] Rectangle height
+ * @param color [in] Rectangle line color
+ * @param thickness [in] Rectangle line thickness
+ */
+void draw_rectangle(image_buffer_t* image, int rx, int ry, int rw, int rh, unsigned int color,
+                      int thickness);
+
+/**
+ * @brief Draw line
+ * 
+ * @param image [in] Image buffer
+ * @param x0 [in] Line begin point x
+ * @param y0 [in] Line begin point y
+ * @param x1 [in] Line end point x
+ * @param y1 [in] Line end point y
+ * @param color [in] Line color
+ * @param thickness [in] Line thickness
+ */
+void draw_line(image_buffer_t* image, int x0, int y0, int x1, int y1, unsigned int color,
+                 int thickness);
+
+/**
+ * @brief Draw text (only support ASCII char)
+ * 
+ * @param image [in] Image buffer
+ * @param text [in] Text
+ * @param x [in] Text position x
+ * @param y [in] Text position y
+ * @param color [in] Text color
+ * @param fontsize [in] Text fontsize
+ */
+void draw_text(image_buffer_t* image, const char* text, int x, int y, unsigned int color,
+                 int fontsize);
+
+/**
+ * @brief Draw circle
+ * 
+ * @param image [in] Image buffer
+ * @param cx [in] Circle center x
+ * @param cy [in] Circle center y
+ * @param radius [in] Circle radius
+ * @param color [in] Circle color
+ * @param thickness [in] Circle thickness
+ */
+void draw_circle(image_buffer_t* image, int cx, int cy, int radius, unsigned int color,
+                 int thickness);
+
+/**
+ * @brief Draw image
+ * 
+ * @param image [in] Target Image buffer
+ * @param draw_img [in] Image for drawing
+ * @param x [in] Target Image draw position x
+ * @param y [in] Target Image draw position y
+ * @param rw [in] Width of image for drawing
+ * @param rh [in] Height of image for drawing
+ */
+void draw_image(image_buffer_t* image, unsigned char* draw_img, int x, int y, int rw, int rh);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // _RKNN_MODEL_ZOO_IMAGE_DRAWING_H_
@@ -0,0 +1,782 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include "im2d.h"
+#include "drmrga.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#define STBI_NO_THREAD_LOCALS
+#define STBI_ONLY_JPEG
+#define STBI_ONLY_PNG
+#include "stb_image.h"
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
+
+#include "turbojpeg.h"
+
+#include "image_utils.h"
+#include "file_utils.h"
+
+static const char* filter_image_names[] = {
+    "jpg",
+    "jpeg",
+    "JPG",
+    "JPEG",
+    "png",
+    "PNG",
+    "data",
+    NULL
+};
+
+static const char* subsampName[TJ_NUMSAMP] = {"4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1"};
+
+static const char* colorspaceName[TJ_NUMCS] = {"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"};
+
+static int image_file_filter(const struct dirent *entry)
+{
+    const char ** filter;
+
+    for (filter = filter_image_names; *filter; ++filter) {
+        if(strstr(entry->d_name, *filter) != NULL) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static int read_image_jpeg(const char* path, image_buffer_t* image)
+{
+    FILE* jpegFile = NULL;
+    unsigned long jpegSize;
+    int flags = 0;
+    int width, height;
+    int origin_width, origin_height;
+    unsigned char* imgBuf = NULL;
+    unsigned char* jpegBuf = NULL;
+    unsigned long size;
+    unsigned short orientation = 1;
+    struct timeval tv1, tv2;
+
+    if ((jpegFile = fopen(path, "rb")) == NULL) {
+        printf("open input file failure\n");
+    }
+    if (fseek(jpegFile, 0, SEEK_END) < 0 || (size = ftell(jpegFile)) < 0 || fseek(jpegFile, 0, SEEK_SET) < 0) {
+        printf("determining input file size failure\n");
+    }
+    if (size == 0) {
+        printf("determining input file size, Input file contains no data\n");
+    }
+    jpegSize = (unsigned long)size;
+    if ((jpegBuf = (unsigned char*)malloc(jpegSize * sizeof(unsigned char))) == NULL) {
+        printf("allocating JPEG buffer\n");
+    }
+    if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) {
+        printf("reading input file");
+    }
+    fclose(jpegFile);
+    jpegFile = NULL;
+
+    tjhandle handle = NULL;
+    int subsample, colorspace;
+    int padding = 1;
+    int ret = 0;
+
+    handle = tjInitDecompress();
+    ret = tjDecompressHeader3(handle, jpegBuf, size, &origin_width, &origin_height, &subsample, &colorspace);
+    if (ret < 0) {
+        printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
+        return -1;
+    }
+
+    // 对图像做裁剪16对齐，利于后续rga操作
+    int crop_width = origin_width / 16 * 16;
+    int crop_height = origin_height / 16 * 16;
+
+    printf("origin size=%dx%d crop size=%dx%d\n", origin_width, origin_height, crop_width, crop_height);
+
+    // gettimeofday(&tv1, NULL);
+    ret = tjDecompressHeader3(handle, jpegBuf, size, &width, &height, &subsample, &colorspace);
+    if (ret < 0) {
+        printf("header file error, errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
+        return -1;
+    }
+    printf("input image: %d x %d, subsampling: %s, colorspace: %s, orientation: %d\n", 
+            width, height, subsampName[subsample], colorspaceName[colorspace], orientation);
+    int sw_out_size = width * height * 3;
+    unsigned char* sw_out_buf = image->virt_addr;
+    if (sw_out_buf == NULL) {
+        sw_out_buf = (unsigned char*)malloc(sw_out_size * sizeof(unsigned char));
+    }
+    if (sw_out_buf == NULL) {
+        printf("sw_out_buf is NULL\n");
+        goto out;
+    }
+
+    flags |= 0;
+
+    // 错误码为0时，表示警告，错误码为-1时表示错误
+    int pixelFormat = TJPF_RGB;
+    ret = tjDecompress2(handle, jpegBuf, size, sw_out_buf, width, 0, height, pixelFormat, flags);
+    // ret = tjDecompressToYUV2(handle, jpeg_buf, size, dst_buf, *width, padding, *height, flags);
+    if ((0 != tjGetErrorCode(handle)) && (ret < 0)) {
+        printf("error : decompress to yuv failed, errorStr:%s, errorCode:%d\n", tjGetErrorStr(),
+               tjGetErrorCode(handle));
+        goto out;
+    }
+    if ((0 == tjGetErrorCode(handle)) && (ret < 0)) {
+        printf("warning : errorStr:%s, errorCode:%d\n", tjGetErrorStr(), tjGetErrorCode(handle));
+    }
+    tjDestroy(handle);
+    // gettimeofday(&tv2, NULL);
+    // printf("decode time %ld ms\n", (tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000);
+
+    image->width = width;
+    image->height = height;
+    image->format = IMAGE_FORMAT_RGB888;
+    image->virt_addr = sw_out_buf;
+    image->size = sw_out_size;
+out:
+    if (jpegBuf) {
+        free(jpegBuf);
+    }
+    return 0;
+}
+
+static int read_image_raw(const char* path, image_buffer_t* image)
+{
+    FILE *fp = fopen(path, "rb");
+    if(fp == NULL) {
+        printf("fopen %s fail!\n", path);
+        return -1;
+    }
+    fseek(fp, 0, SEEK_END);
+    int file_size = ftell(fp);
+    unsigned char *data = image->virt_addr;
+    if (image->virt_addr == NULL) {
+        data = (unsigned char *)malloc(file_size+1);
+    }
+    data[file_size] = 0;
+    fseek(fp, 0, SEEK_SET);
+    if(file_size != fread(data, 1, file_size, fp)) {
+        printf("fread %s fail!\n", path);
+        free(data);
+        return -1;
+    }
+    if(fp) {
+        fclose(fp);
+    }
+    if (image->virt_addr == NULL) {
+        image->virt_addr = data;
+        image->size = file_size;
+    }
+
+    return 0;
+}
+
+static int write_image_jpeg(const char* path, int quality, const image_buffer_t* image)
+{
+    int ret;
+    int jpegSubsamp = TJSAMP_422;
+    unsigned char* jpegBuf = NULL;
+    unsigned long jpegSize = 0;
+    int flags = 0;
+
+    const unsigned char* data = image->virt_addr;
+    int width = image->width;
+    int height = image->height;
+    int pixelFormat = TJPF_RGB;
+
+	tjhandle handle = tjInitCompress();
+
+    if (image->format == IMAGE_FORMAT_RGB888) {
+        ret = tjCompress2(handle, data, width, 0, height, pixelFormat, &jpegBuf, &jpegSize, jpegSubsamp, quality, flags);
+    } else {
+        printf("write_image_jpeg: pixel format %d not support\n", image->format);
+        return -1;
+    }
+
+	// printf("ret=%d jpegBuf=%p jpegSize=%d\n", ret, jpegBuf, jpegSize);
+    if (jpegBuf != NULL && jpegSize > 0) {
+        write_data_to_file(path, (const char*)jpegBuf, jpegSize);
+        tjFree(jpegBuf);
+    }
+    tjDestroy(handle);
+
+	return 0;
+}
+
+static int read_image_stb(const char* path, image_buffer_t* image)
+{
+    // 默认图像为3通道
+    int w, h, c;
+    unsigned char* pixeldata = stbi_load(path, &w, &h, &c, 0);
+    if (!pixeldata) {
+        printf("error: read image %s fail\n", path);
+        return -1;
+    }
+    // printf("load image wxhxc=%dx%dx%d path=%s\n", w, h, c, path);
+    int size = w * h * c;
+
+    // 设置图像数据
+    if (image->virt_addr != NULL) {
+        memcpy(image->virt_addr, pixeldata, size);
+        stbi_image_free(pixeldata);
+    } else {
+        image->virt_addr = pixeldata;
+    }
+    image->width = w;
+    image->height = h;
+    if (c == 4) {
+        image->format = IMAGE_FORMAT_RGBA8888;
+    } else if (c == 1) {
+        image->format = IMAGE_FORMAT_GRAY8;
+    } else {
+        image->format = IMAGE_FORMAT_RGB888;
+    }
+    return 0;
+}
+
+int read_image(const char* path, image_buffer_t* image)
+{
+    const char* _ext = strrchr(path, '.');
+    if (!_ext) {
+        // missing extension
+        return -666;
+    }
+    if (strcmp(_ext, ".data") == 0) {
+        return read_image_raw(path, image);
+    } else if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
+        strcmp(_ext, ".JPEG") == 0) {
+        return read_image_jpeg(path, image);
+    } else {
+        return read_image_stb(path, image);
+    }
+}
+
+int write_image(const char* path, const image_buffer_t* img)
+{
+    int ret;
+    int width = img->width;
+    int height = img->height;
+    int channel = 3;
+    void* data = img->virt_addr;
+    printf("write_image path: %s width=%d height=%d channel=%d data=%p\n",
+        path, width, height, channel, data);
+
+    const char* _ext = strrchr(path, '.');
+    if (!_ext) {
+        // missing extension
+        return -1;
+    }
+    if (strcmp(_ext, ".jpg") == 0 || strcmp(_ext, ".jpeg") == 0 || strcmp(_ext, ".JPG") == 0 ||
+        strcmp(_ext, ".JPEG") == 0) {
+        int quality = 95;
+        ret = write_image_jpeg(path, quality, img);
+    } else if (strcmp(_ext, ".png") == 0 | strcmp(_ext, ".PNG") == 0) {
+        ret = stbi_write_png(path, width, height, channel, data, 0);
+    } else if (strcmp(_ext, ".data") == 0 | strcmp(_ext, ".DATA") == 0) {
+        int size = get_image_size(img);
+        ret = write_data_to_file(path, data, size);
+    } else {
+        // unknown extension type
+        return -1;
+    }
+    return ret;
+}
+
+static int crop_and_scale_image_c(int channel, unsigned char *src, int src_width, int src_height,
+                                    int crop_x, int crop_y, int crop_width, int crop_height,
+                                    unsigned char *dst, int dst_width, int dst_height,
+                                    int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
+    if (dst == NULL) {
+        printf("dst buffer is null\n");
+        return -1;
+    }
+
+    float x_ratio = (float)crop_width / (float)dst_box_width;
+    float y_ratio = (float)crop_height / (float)dst_box_height;
+
+    // printf("src_width=%d src_height=%d crop_x=%d crop_y=%d crop_width=%d crop_height=%d\n",
+    //     src_width, src_height, crop_x, crop_y, crop_width, crop_height);
+    // printf("dst_width=%d dst_height=%d dst_box_x=%d dst_box_y=%d dst_box_width=%d dst_box_height=%d\n",
+    //     dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
+    // printf("channel=%d x_ratio=%f y_ratio=%f\n", channel, x_ratio, y_ratio);
+
+    // 从原图指定区域取数据，双线性缩放到目标指定区域
+    for (int dst_y = dst_box_y; dst_y < dst_box_y + dst_box_height; dst_y++) {
+        for (int dst_x = dst_box_x; dst_x < dst_box_x + dst_box_width; dst_x++) {
+            int dst_x_offset = dst_x - dst_box_x;
+            int dst_y_offset = dst_y - dst_box_y;
+
+            int src_x = (int)(dst_x_offset * x_ratio) + crop_x;
+            int src_y = (int)(dst_y_offset * y_ratio) + crop_y;
+
+            float x_diff = (dst_x_offset * x_ratio) - (src_x - crop_x);
+            float y_diff = (dst_y_offset * y_ratio) - (src_y - crop_y);
+
+            int index1 = src_y * src_width * channel + src_x * channel;
+            int index2 = index1 + src_width * channel;    // down
+            if (src_y == src_height - 1) {
+                // 如果到图像最下边缘，变成选择上面的像素
+                index2 = index1 - src_width * channel;
+            }
+            int index3 = index1 + 1 * channel;            // right
+            int index4 = index2 + 1 * channel;            // down right
+            if (src_x == src_width - 1) {
+                // 如果到图像最右边缘，变成选择左边的像素
+                index3 = index1 - 1 * channel;
+                index4 = index2 - 1 * channel;
+            }
+
+            // printf("dst_x=%d dst_y=%d dst_x_offset=%d dst_y_offset=%d src_x=%d src_y=%d x_diff=%f y_diff=%f src index=%d %d %d %d\n",
+            //     dst_x, dst_y, dst_x_offset, dst_y_offset,
+            //     src_x, src_y, x_diff, y_diff,
+            //     index1, index2, index3, index4);
+
+            for (int c = 0; c < channel; c++) {
+                unsigned char A = src[index1+c];
+                unsigned char B = src[index3+c];
+                unsigned char C = src[index2+c];
+                unsigned char D = src[index4+c];
+
+                unsigned char pixel = (unsigned char)(
+                    A * (1 - x_diff) * (1 - y_diff) +
+                    B * x_diff * (1 - y_diff) +
+                    C * y_diff * (1 - x_diff) +
+                    D * x_diff * y_diff
+                );
+
+                dst[(dst_y * dst_width  + dst_x) * channel + c] = pixel;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int crop_and_scale_image_yuv420sp(unsigned char *src, int src_width, int src_height,
+                                    int crop_x, int crop_y, int crop_width, int crop_height,
+                                    unsigned char *dst, int dst_width, int dst_height,
+                                    int dst_box_x, int dst_box_y, int dst_box_width, int dst_box_height) {
+
+    unsigned char* src_y = src;
+    unsigned char* src_uv = src + src_width * src_height;
+
+    unsigned char* dst_y = dst;
+    unsigned char* dst_uv = dst + dst_width * dst_height;
+
+    crop_and_scale_image_c(1, src_y, src_width, src_height, crop_x, crop_y, crop_width, crop_height,
+        dst_y, dst_width, dst_height, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
+    
+    crop_and_scale_image_c(2, src_uv, src_width / 2, src_height / 2, crop_x / 2, crop_y / 2, crop_width / 2, crop_height / 2,
+        dst_uv, dst_width / 2, dst_height / 2, dst_box_x, dst_box_y, dst_box_width, dst_box_height);
+
+    return 0;
+}
+
+static int convert_image_cpu(image_buffer_t *src, image_buffer_t *dst, image_rect_t *src_box, image_rect_t *dst_box, char color) {
+    int ret;
+    if (dst->virt_addr == NULL) {
+        return -1;
+    }
+    if (src->virt_addr == NULL) {
+        return -1;
+    }
+    if (src->format != dst->format) {
+        return -1;
+    }
+
+    int src_box_x = 0;
+    int src_box_y = 0;
+    int src_box_w = src->width;
+    int src_box_h = src->height;
+    if (src_box != NULL) {
+        src_box_x = src_box->left;
+        src_box_y = src_box->top;
+        src_box_w = src_box->right - src_box->left + 1;
+        src_box_h = src_box->bottom - src_box->top + 1;
+    }
+    int dst_box_x = 0;
+    int dst_box_y = 0;
+    int dst_box_w = dst->width;
+    int dst_box_h = dst->height;
+    if (dst_box != NULL) {
+        dst_box_x = dst_box->left;
+        dst_box_y = dst_box->top;
+        dst_box_w = dst_box->right - dst_box->left + 1;
+        dst_box_h = dst_box->bottom - dst_box->top + 1;
+    }
+
+    // fill pad color
+    if (dst_box_w != dst->width || dst_box_h != dst->height) {
+        int dst_size = get_image_size(dst);
+        memset(dst->virt_addr, color, dst_size);
+    }
+
+    int need_release_dst_buffer = 0;
+    int reti = 0;
+    if (src->format == IMAGE_FORMAT_RGB888) {
+        reti = crop_and_scale_image_c(3, src->virt_addr, src->width, src->height,
+            src_box_x, src_box_y, src_box_w, src_box_h,
+            dst->virt_addr, dst->width, dst->height,
+            dst_box_x, dst_box_y, dst_box_w, dst_box_h);
+    } else if (src->format == IMAGE_FORMAT_RGBA8888) {
+        reti = crop_and_scale_image_c(4, src->virt_addr, src->width, src->height,
+            src_box_x, src_box_y, src_box_w, src_box_h,
+            dst->virt_addr, dst->width, dst->height,
+            dst_box_x, dst_box_y, dst_box_w, dst_box_h);
+    } else if (src->format == IMAGE_FORMAT_GRAY8) {
+        reti = crop_and_scale_image_c(1, src->virt_addr, src->width, src->height,
+            src_box_x, src_box_y, src_box_w, src_box_h,
+            dst->virt_addr, dst->width, dst->height,
+            dst_box_x, dst_box_y, dst_box_w, dst_box_h);
+    } else if (src->format == IMAGE_FORMAT_YUV420SP_NV12 || src->format == IMAGE_FORMAT_YUV420SP_NV12) {
+        reti = crop_and_scale_image_yuv420sp(src->virt_addr, src->width, src->height,
+            src_box_x, src_box_y, src_box_w, src_box_h,
+            dst->virt_addr, dst->width, dst->height,
+            dst_box_x, dst_box_y, dst_box_w, dst_box_h);
+    } else {
+        printf("no support format %d\n", src->format);
+    }
+    if (reti != 0) {
+        printf("convert_image_cpu fail %d\n", reti);
+        return -1;
+    }
+    printf("finish\n");
+    return 0;
+}
+
+static int get_rga_fmt(image_format_t fmt) {
+    switch (fmt)
+    {
+    case IMAGE_FORMAT_RGB888:
+        return RK_FORMAT_RGB_888;
+    case IMAGE_FORMAT_RGBA8888:
+        return RK_FORMAT_RGBA_8888;
+    case IMAGE_FORMAT_YUV420SP_NV12:
+        return RK_FORMAT_YCbCr_420_SP;
+    case IMAGE_FORMAT_YUV420SP_NV21:
+        return RK_FORMAT_YCrCb_420_SP;
+    default:
+        return -1;
+    }
+}
+
+int get_image_size(image_buffer_t* image)
+{
+    if (image == NULL) {
+        return 0;
+    }
+    switch (image->format)
+    {
+    case IMAGE_FORMAT_GRAY8:
+        return image->width * image->height;
+    case IMAGE_FORMAT_RGB888:
+        return image->width * image->height * 3;    
+    case IMAGE_FORMAT_RGBA8888:
+        return image->width * image->height * 4;
+    case IMAGE_FORMAT_YUV420SP_NV12:
+    case IMAGE_FORMAT_YUV420SP_NV21:
+        return image->width * image->height * 3 / 2;
+    default:
+        break;
+    }
+}
+
+static int convert_image_rga(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
+{
+    int ret = 0;
+
+    int srcWidth = src_img->width;
+    int srcHeight = src_img->height;
+    void *src = src_img->virt_addr;
+    int src_fd = src_img->fd;
+    void *src_phy = NULL;
+    int srcFmt = get_rga_fmt(src_img->format);
+
+    int dstWidth = dst_img->width;
+    int dstHeight = dst_img->height;
+    void *dst = dst_img->virt_addr;
+    int dst_fd = dst_img->fd;
+    void *dst_phy = NULL;
+    int dstFmt = get_rga_fmt(dst_img->format);
+
+    int rotate = 0;
+
+    int use_handle = 0;
+#if defined(LIBRGA_IM2D_HANDLE)
+    use_handle = 1;
+#endif
+
+    // printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
+    //     srcWidth, srcHeight, srcFmt, src, src_fd);
+    // printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
+    //     dstWidth, dstHeight, dstFmt, dst, dst_fd);
+    // printf("rotate=%d\n", rotate);
+
+    int usage = 0;
+    IM_STATUS ret_rga = IM_STATUS_NOERROR;
+
+    // set rga usage
+    usage |= rotate;
+
+    // set rga rect
+    im_rect srect;
+    im_rect drect;
+    im_rect prect;
+    memset(&prect, 0, sizeof(im_rect));
+
+    if (src_box != NULL) {
+        srect.x = src_box->left;
+        srect.y = src_box->top;
+        srect.width = src_box->right - src_box->left + 1;
+        srect.height = src_box->bottom - src_box->top + 1;
+    } else {
+        srect.x = 0;
+        srect.y = 0;
+        srect.width = srcWidth;
+        srect.height = srcHeight;
+    }
+
+    if (dst_box != NULL) {
+        drect.x = dst_box->left;
+        drect.y = dst_box->top;
+        drect.width = dst_box->right - dst_box->left + 1;
+        drect.height = dst_box->bottom - dst_box->top + 1;
+    } else {
+        drect.x = 0;
+        drect.y = 0;
+        drect.width = dstWidth;
+        drect.height = dstHeight;
+    }
+
+    // set rga buffer
+    rga_buffer_t rga_buf_src;
+    rga_buffer_t rga_buf_dst;
+    rga_buffer_t pat;
+    rga_buffer_handle_t rga_handle_src = 0;
+    rga_buffer_handle_t rga_handle_dst = 0;
+    memset(&pat, 0, sizeof(rga_buffer_t));
+
+    im_handle_param_t in_param;
+    in_param.width = srcWidth;
+    in_param.height = srcHeight;
+    in_param.format = srcFmt;
+
+    im_handle_param_t dst_param;
+    dst_param.width = dstWidth;
+    dst_param.height = dstHeight;
+    dst_param.format = dstFmt;
+
+    if (use_handle) {
+        if (src_phy != NULL) {
+            rga_handle_src = importbuffer_physicaladdr((uint64_t)src_phy, &in_param);
+        } else if (src_fd > 0) {
+            rga_handle_src = importbuffer_fd(src_fd, &in_param);
+        } else {
+            rga_handle_src = importbuffer_virtualaddr(src, &in_param);
+        }
+        if (rga_handle_src <= 0) {
+            printf("src handle error %d\n", rga_handle_src);
+            ret = -1;
+            goto err;
+        }
+        rga_buf_src = wrapbuffer_handle(rga_handle_src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
+    } else {
+        if (src_phy != NULL) {
+            rga_buf_src = wrapbuffer_physicaladdr(src_phy, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
+        } else if (src_fd > 0) {
+            rga_buf_src = wrapbuffer_fd(src_fd, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
+        } else {
+            rga_buf_src = wrapbuffer_virtualaddr(src, srcWidth, srcHeight, srcFmt, srcWidth, srcHeight);
+        }
+    }
+
+    if (use_handle) {
+        if (dst_phy != NULL) {
+            rga_handle_dst = importbuffer_physicaladdr((uint64_t)dst_phy, &dst_param);
+        } else if (dst_fd > 0) {
+            rga_handle_dst = importbuffer_fd(dst_fd, &dst_param);
+        } else {
+            rga_handle_dst = importbuffer_virtualaddr(dst, &dst_param);
+        }
+        if (rga_handle_dst <= 0) {
+            printf("dst handle error %d\n", rga_handle_dst);
+            ret = -1;
+            goto err;
+        }
+        rga_buf_dst = wrapbuffer_handle(rga_handle_dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
+    } else {
+        if (dst_phy != NULL) {
+            rga_buf_dst = wrapbuffer_physicaladdr(dst_phy, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
+        } else if (dst_fd > 0) {
+            rga_buf_dst = wrapbuffer_fd(dst_fd, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
+        } else {
+            rga_buf_dst = wrapbuffer_virtualaddr(dst, dstWidth, dstHeight, dstFmt, dstWidth, dstHeight);
+        }
+    }
+
+    if (drect.width != dstWidth || drect.height != dstHeight) {
+        im_rect dst_whole_rect = {0, 0, dstWidth, dstHeight};
+        int imcolor;
+        char* p_imcolor = &imcolor;
+        p_imcolor[0] = color;
+        p_imcolor[1] = color;
+        p_imcolor[2] = color;
+        p_imcolor[3] = color;
+        printf("fill dst image (x y w h)=(%d %d %d %d) with color=0x%x\n",
+            dst_whole_rect.x, dst_whole_rect.y, dst_whole_rect.width, dst_whole_rect.height, imcolor);
+        ret_rga = imfill(rga_buf_dst, dst_whole_rect, imcolor);
+        if (ret_rga <= 0) {
+            if (dst != NULL) {
+                size_t dst_size = get_image_size(dst_img);
+                memset(dst, color, dst_size);
+            } else {
+                printf("Warning: Can not fill color on target image\n");
+            }
+        }
+    }
+
+    // rga process
+    ret_rga = improcess(rga_buf_src, rga_buf_dst, pat, srect, drect, prect, usage);
+    if (ret_rga <= 0) {
+        printf("Error on improcess STATUS=%d\n", ret_rga);
+        printf("RGA error message: %s\n", imStrError((IM_STATUS)ret_rga));
+        ret = -1;
+    }
+
+err:
+    if (rga_handle_src > 0) {
+        releasebuffer_handle(rga_handle_src);
+    }
+
+    if (rga_handle_dst > 0) {
+        releasebuffer_handle(rga_handle_dst);
+    }
+
+    // printf("finish\n");
+    return ret;
+}
+
+int convert_image(image_buffer_t* src_img, image_buffer_t* dst_img, image_rect_t* src_box, image_rect_t* dst_box, char color)
+{
+    int ret;
+ 
+    printf("src width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
+        src_img->width, src_img->height, src_img->format, src_img->virt_addr, src_img->fd);
+    printf("dst width=%d height=%d fmt=0x%x virAddr=0x%p fd=%d\n",
+        dst_img->width, dst_img->height, dst_img->format, dst_img->virt_addr, dst_img->fd);
+    if (src_box != NULL) {
+        printf("src_box=(%d %d %d %d)\n", src_box->left, src_box->top, src_box->right, src_box->bottom);
+    }
+    if (dst_box != NULL) {
+        printf("dst_box=(%d %d %d %d)\n", dst_box->left, dst_box->top, dst_box->right, dst_box->bottom);
+    }
+    printf("color=0x%x\n", color);
+
+    ret = convert_image_rga(src_img, dst_img, src_box, dst_box, color);
+    if (ret != 0) {
+        printf("try convert image use cpu\n");
+        ret = convert_image_cpu(src_img, dst_img, src_box, dst_box, color);
+    }
+    return ret;
+}
+
+int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color)
+{
+    int ret = 0;
+    int allow_slight_change = 1;
+    int src_w = src_image->width;
+    int src_h = src_image->height;
+    int dst_w = dst_image->width;
+    int dst_h = dst_image->height;
+    int resize_w = dst_w;
+    int resize_h = dst_h;
+
+    int padding_w = 0;
+    int padding_h = 0;
+
+    int _left_offset = 0;
+    int _top_offset = 0;
+    float scale = 1.0;
+
+    image_rect_t src_box;
+    src_box.left = 0;
+    src_box.top = 0;
+    src_box.right = src_image->width - 1;
+    src_box.bottom = src_image->height - 1;
+
+    image_rect_t dst_box;
+    dst_box.left = 0;
+    dst_box.top = 0;
+    dst_box.right = dst_image->width - 1;
+    dst_box.bottom = dst_image->height - 1;
+
+    float _scale_w = (float)dst_w / src_w;
+    float _scale_h = (float)dst_h / src_h;
+    if(_scale_w < _scale_h) {
+        scale = _scale_w;
+        resize_h = (int) src_h*scale;
+    } else {
+        scale = _scale_h;
+        resize_w = (int) src_w*scale;
+    }
+    // slight change image size for align
+    if (allow_slight_change == 1 && (resize_w % 4 != 0)) {
+        resize_w -= resize_w % 4;
+    }
+    if (allow_slight_change == 1 && (resize_h % 2 != 0)) {
+        resize_h -= resize_h % 2;
+    }
+    // padding
+    padding_h = dst_h - resize_h;
+    padding_w = dst_w - resize_w;
+    // center
+    if (_scale_w < _scale_h) {
+        dst_box.top = padding_h / 2;
+        if (dst_box.top % 2 != 0) {
+            dst_box.top -= dst_box.top % 2;
+            if (dst_box.top < 0) {
+                dst_box.top = 0;
+            }
+        }
+        dst_box.bottom = dst_box.top + resize_h - 1;
+        _top_offset = dst_box.top;
+    } else {
+        dst_box.left = padding_w / 2;
+        if (dst_box.left % 2 != 0) {
+            dst_box.left -= dst_box.left % 2;
+            if (dst_box.left < 0) {
+                dst_box.left = 0;
+            }
+        }
+        dst_box.right = dst_box.left + resize_w - 1;
+        _left_offset = dst_box.left;
+    }
+    printf("scale=%f dst_box=(%d %d %d %d) allow_slight_change=%d _left_offset=%d _top_offset=%d padding_w=%d padding_h=%d\n",
+        scale, dst_box.left, dst_box.top, dst_box.right, dst_box.bottom, allow_slight_change,
+        _left_offset, _top_offset, padding_w, padding_h);
+
+    //set offset and scale
+    if(letterbox != NULL){
+        letterbox->scale = scale;
+        letterbox->x_pad = _left_offset;
+        letterbox->y_pad = _top_offset;
+    }
+    // alloc memory buffer for dst image,
+    // remember to free
+    if (dst_image->virt_addr == NULL && dst_image->fd <= 0) {
+        int dst_size = get_image_size(dst_image);
+        dst_image->virt_addr = (uint8_t *)malloc(dst_size);
+        if (dst_image->virt_addr == NULL) {
+            printf("malloc size %d error\n", dst_size);
+            return -1;
+        }
+    }
+    ret = convert_image(src_image, dst_image, &src_box, &dst_box, color);
+    return ret;
+}
@@ -0,0 +1,73 @@
+#ifndef _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
+#define _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "common.h"
+
+/**
+ * @brief LetterBox
+ * 
+ */
+typedef struct {
+    int x_pad;
+    int y_pad;
+    float scale;
+} letterbox_t;
+
+/**
+ * @brief Read image file (support png/jpeg/bmp)
+ * 
+ * @param path [in] Image path
+ * @param image [out] Read image
+ * @return int 0: success; -1: error
+ */
+int read_image(const char* path, image_buffer_t* image);
+
+/**
+ * @brief Write image file (support jpg/png)
+ * 
+ * @param path [in] Image path
+ * @param image [in] Image for write (only support IMAGE_FORMAT_RGB888)
+ * @return int 0: success; -1: error
+ */
+int write_image(const char* path, const image_buffer_t* image);
+
+/**
+ * @brief Convert image for resize and pixel format change
+ * 
+ * @param src_image [in] Source Image
+ * @param dst_image [out] Target Image
+ * @param src_box [in] Crop rectangle on source image
+ * @param dst_box [in] Crop rectangle on target image
+ * @param color [in] Pading color if dst_box can not fill target image
+ * @return int 
+ */
+int convert_image(image_buffer_t* src_image, image_buffer_t* dst_image, image_rect_t* src_box, image_rect_t* dst_box, char color);
+
+/**
+ * @brief Convert image with letterbox
+ * 
+ * @param src_image [in] Source Image
+ * @param dst_image [out] Target Image
+ * @param letterbox [out] Letterbox
+ * @param color [in] Fill color on target image
+ * @return int 
+ */
+int convert_image_with_letterbox(image_buffer_t* src_image, image_buffer_t* dst_image, letterbox_t* letterbox, char color);
+
+/**
+ * @brief Get the image size
+ * 
+ * @param image [in] Image
+ * @return int image size
+ */
+int get_image_size(image_buffer_t* image);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif // _RKNN_MODEL_ZOO_IMAGE_UTILS_H_
@@ -0,0 +1,500 @@
+// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "yolov8.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include <set>
+#include <vector>
+
+#ifdef OPI5_BUILD
+#define LABEL_NALE_TXT_PATH "azaion_10_labels_list.txt"
+#else
+#define LABEL_NALE_TXT_PATH "coco_80_labels_list.txt"
+#endif
+
+
+static char *labels[OBJ_CLASS_NUM];
+
+inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
+
+static char *readLine(FILE *fp, char *buffer, int *len)
+{
+    int ch;
+    int i = 0;
+    size_t buff_len = 0;
+
+    buffer = (char *)malloc(buff_len + 1);
+    if (!buffer)
+        return NULL; // Out of memory
+
+    while ((ch = fgetc(fp)) != '\n' && ch != EOF)
+    {
+        buff_len++;
+        void *tmp = realloc(buffer, buff_len + 1);
+        if (tmp == NULL)
+        {
+            free(buffer);
+            return NULL; // Out of memory
+        }
+        buffer = (char *)tmp;
+
+        buffer[i] = (char)ch;
+        i++;
+    }
+    buffer[i] = '\0';
+
+    *len = buff_len;
+
+    // Detect end
+    if (ch == EOF && (i == 0 || ferror(fp)))
+    {
+        free(buffer);
+        return NULL;
+    }
+    return buffer;
+}
+
+static int readLines(const char *fileName, char *lines[], int max_line)
+{
+    FILE *file = fopen(fileName, "r");
+    char *s;
+    int i = 0;
+    int n = 0;
+
+    if (file == NULL)
+    {
+        printf("Open %s fail!\n", fileName);
+        return -1;
+    }
+
+    while ((s = readLine(file, s, &n)) != NULL)
+    {
+        lines[i++] = s;
+        if (i >= max_line)
+            break;
+    }
+    fclose(file);
+    return i;
+}
+
+static int loadLabelName(const char *locationFilename, char *label[])
+{
+    printf("load lable %s\n", locationFilename);
+    readLines(locationFilename, label, OBJ_CLASS_NUM);
+    return 0;
+}
+
+static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
+                              float ymax1)
+{
+    float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
+    float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
+    float i = w * h;
+    float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
+    return u <= 0.f ? 0.f : (i / u);
+}
+
+static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
+               int filterId, float threshold)
+{
+    for (int i = 0; i < validCount; ++i)
+    {
+        if (order[i] == -1 || classIds[i] != filterId)
+        {
+            continue;
+        }
+        int n = order[i];
+        for (int j = i + 1; j < validCount; ++j)
+        {
+            int m = order[j];
+            if (m == -1 || classIds[i] != filterId)
+            {
+                continue;
+            }
+            float xmin0 = outputLocations[n * 4 + 0];
+            float ymin0 = outputLocations[n * 4 + 1];
+            float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
+            float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
+
+            float xmin1 = outputLocations[m * 4 + 0];
+            float ymin1 = outputLocations[m * 4 + 1];
+            float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
+            float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
+
+            float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
+
+            if (iou > threshold)
+            {
+                order[j] = -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
+{
+    float key;
+    int key_index;
+    int low = left;
+    int high = right;
+    if (left < right)
+    {
+        key_index = indices[left];
+        key = input[left];
+        while (low < high)
+        {
+            while (low < high && input[high] <= key)
+            {
+                high--;
+            }
+            input[low] = input[high];
+            indices[low] = indices[high];
+            while (low < high && input[low] >= key)
+            {
+                low++;
+            }
+            input[high] = input[low];
+            indices[high] = indices[low];
+        }
+        input[low] = key;
+        indices[low] = key_index;
+        quick_sort_indice_inverse(input, left, low - 1, indices);
+        quick_sort_indice_inverse(input, low + 1, right, indices);
+    }
+    return low;
+}
+
+static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
+
+static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
+
+inline static int32_t __clip(float val, float min, float max)
+{
+    float f = val <= min ? min : (val >= max ? max : val);
+    return f;
+}
+
+static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
+{
+    float dst_val = (f32 / scale) + zp;
+    int8_t res = (int8_t)__clip(dst_val, -128, 127);
+    return res;
+}
+
+static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
+
+
+void compute_dfl(float* tensor, int dfl_len, float* box){
+    for (int b=0; b<4; b++){
+        float exp_t[dfl_len];
+        float exp_sum=0;
+        float acc_sum=0;
+        for (int i=0; i< dfl_len; i++){
+            exp_t[i] = exp(tensor[i+b*dfl_len]);
+            exp_sum += exp_t[i];
+        }
+        
+        for (int i=0; i< dfl_len; i++){
+            acc_sum += exp_t[i]/exp_sum *i;
+        }
+        box[b] = acc_sum;
+    }
+}
+
+
+static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
+                      int8_t *score_tensor, int32_t score_zp, float score_scale,
+                      int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
+                      int grid_h, int grid_w, int stride, int dfl_len,
+                      std::vector<float> &boxes, 
+                      std::vector<float> &objProbs, 
+                      std::vector<int> &classId, 
+                      float threshold)
+{
+    int validCount = 0;
+    int grid_len = grid_h * grid_w;
+    int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
+    int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
+
+    for (int i = 0; i < grid_h; i++)
+    {
+        for (int j = 0; j < grid_w; j++)
+        {
+            int offset = i* grid_w + j;
+            int max_class_id = -1;
+
+            // 通过 score sum 起到快速过滤的作用
+            if (score_sum_tensor != nullptr){
+                if (score_sum_tensor[offset] < score_sum_thres_i8){
+                    continue;
+                }
+            }
+
+            int8_t max_score = -score_zp;
+            for (int c= 0; c< OBJ_CLASS_NUM; c++){
+                if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
+                {
+                    max_score = score_tensor[offset];
+                    max_class_id = c;
+                }
+                offset += grid_len;
+            }
+
+            // compute box
+            if (max_score> score_thres_i8){
+                offset = i* grid_w + j;
+                float box[4];
+                float before_dfl[dfl_len*4];
+                for (int k=0; k< dfl_len*4; k++){
+                    before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
+                    offset += grid_len;
+                }
+                compute_dfl(before_dfl, dfl_len, box);
+
+                float x1,y1,x2,y2,w,h;
+                x1 = (-box[0] + j + 0.5)*stride;
+                y1 = (-box[1] + i + 0.5)*stride;
+                x2 = (box[2] + j + 0.5)*stride;
+                y2 = (box[3] + i + 0.5)*stride;
+                w = x2 - x1;
+                h = y2 - y1;
+                boxes.push_back(x1);
+                boxes.push_back(y1);
+                boxes.push_back(w);
+                boxes.push_back(h);
+
+                objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
+                classId.push_back(max_class_id);
+                validCount ++;
+            }
+        }
+    }
+    return validCount;
+}
+
+static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor, 
+                        int grid_h, int grid_w, int stride, int dfl_len,
+                        std::vector<float> &boxes, 
+                        std::vector<float> &objProbs, 
+                        std::vector<int> &classId, 
+                        float threshold)
+{
+    int validCount = 0;
+    int grid_len = grid_h * grid_w;
+    for (int i = 0; i < grid_h; i++)
+    {
+        for (int j = 0; j < grid_w; j++)
+        {
+            int offset = i* grid_w + j;
+            int max_class_id = -1;
+
+            // 通过 score sum 起到快速过滤的作用
+            if (score_sum_tensor != nullptr){
+                if (score_sum_tensor[offset] < threshold){
+                    continue;
+                }
+            }
+
+            float max_score = 0;
+            for (int c= 0; c< OBJ_CLASS_NUM; c++){
+                if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
+                {
+                    max_score = score_tensor[offset];
+                    max_class_id = c;
+                }
+                offset += grid_len;
+            }
+
+            // compute box
+            if (max_score> threshold){
+                offset = i* grid_w + j;
+                float box[4];
+                float before_dfl[dfl_len*4];
+                for (int k=0; k< dfl_len*4; k++){
+                    before_dfl[k] = box_tensor[offset];
+                    offset += grid_len;
+                }
+                compute_dfl(before_dfl, dfl_len, box);
+
+                float x1,y1,x2,y2,w,h;
+                x1 = (-box[0] + j + 0.5)*stride;
+                y1 = (-box[1] + i + 0.5)*stride;
+                x2 = (box[2] + j + 0.5)*stride;
+                y2 = (box[3] + i + 0.5)*stride;
+                w = x2 - x1;
+                h = y2 - y1;
+                boxes.push_back(x1);
+                boxes.push_back(y1);
+                boxes.push_back(w);
+                boxes.push_back(h);
+
+                objProbs.push_back(max_score);
+                classId.push_back(max_class_id);
+                validCount ++;
+            }
+        }
+    }
+    return validCount;
+}
+
+
+int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
+{
+    std::vector<float> filterBoxes;
+    std::vector<float> objProbs;
+    std::vector<int> classId;
+    int validCount = 0;
+    int stride = 0;
+    int grid_h = 0;
+    int grid_w = 0;
+    int model_in_w = app_ctx->model_width;
+    int model_in_h = app_ctx->model_height;
+
+    memset(od_results, 0, sizeof(object_detect_result_list));
+
+    // default 3 branch
+    int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
+    int output_per_branch = app_ctx->io_num.n_output / 3;
+    for (int i = 0; i < 3; i++)
+    {
+
+        void *score_sum = nullptr;
+        int32_t score_sum_zp = 0;
+        float score_sum_scale = 1.0;
+        if (output_per_branch == 3){
+            score_sum = outputs[i*output_per_branch + 2].buf;
+            score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
+            score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
+        }
+        int box_idx = i*output_per_branch;
+        int score_idx = i*output_per_branch + 1;
+
+        grid_h = app_ctx->output_attrs[box_idx].dims[2];
+        grid_w = app_ctx->output_attrs[box_idx].dims[3];
+        stride = model_in_h / grid_h;
+
+        if (app_ctx->is_quant)
+        {
+            validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
+                                     (int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
+                                     (int8_t *)score_sum, score_sum_zp, score_sum_scale,
+                                     grid_h, grid_w, stride, dfl_len, 
+                                     filterBoxes, objProbs, classId, conf_threshold);
+        }
+        else
+        {
+            validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
+                                       grid_h, grid_w, stride, dfl_len, 
+                                       filterBoxes, objProbs, classId, conf_threshold);
+        }
+
+    }
+
+    // no object detect
+    if (validCount <= 0)
+    {
+        return 0;
+    }
+    std::vector<int> indexArray;
+    for (int i = 0; i < validCount; ++i)
+    {
+        indexArray.push_back(i);
+    }
+    quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
+
+    std::set<int> class_set(std::begin(classId), std::end(classId));
+
+    for (auto c : class_set)
+    {
+        nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
+    }
+
+    int last_count = 0;
+    od_results->count = 0;
+
+    /* box valid detect target */
+    for (int i = 0; i < validCount; ++i)
+    {
+        if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE)
+        {
+            continue;
+        }
+        int n = indexArray[i];
+
+        float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
+        float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
+        float x2 = x1 + filterBoxes[n * 4 + 2];
+        float y2 = y1 + filterBoxes[n * 4 + 3];
+        int id = classId[n];
+        float obj_conf = objProbs[i];
+
+        od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
+        od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
+        od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
+        od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
+        od_results->results[last_count].prop = obj_conf;
+        od_results->results[last_count].cls_id = id;
+        last_count++;
+    }
+    od_results->count = last_count;
+    return 0;
+}
+
+int init_post_process()
+{
+    int ret = 0;
+    ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
+    if (ret < 0)
+    {
+        printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
+        return -1;
+    }
+    return 0;
+}
+
+char *coco_cls_to_name(int cls_id)
+{
+
+    if (cls_id >= OBJ_CLASS_NUM)
+    {
+        return "null";
+    }
+
+    if (labels[cls_id])
+    {
+        return labels[cls_id];
+    }
+
+    return "null";
+}
+
+void deinit_post_process()
+{
+    for (int i = 0; i < OBJ_CLASS_NUM; i++)
+    {
+        if (labels[i] != nullptr)
+        {
+            free(labels[i]);
+            labels[i] = nullptr;
+        }
+    }
+}
@@ -0,0 +1,36 @@
+#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
+#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
+
+#include <stdint.h>
+#include <vector>
+#include "rknn_api.h"
+#include "common.h"
+#include "image_utils.h"
+
+#define OBJ_NAME_MAX_SIZE 64
+#define OBJ_NUMB_MAX_SIZE 128
+#define OBJ_CLASS_NUM 80
+#define NMS_THRESH 0.45
+#define BOX_THRESH 0.25
+
+// class rknn_app_context_t;
+
+typedef struct {
+    image_rect_t box;
+    float prop;
+    int cls_id;
+} object_detect_result;
+
+typedef struct {
+    int id;
+    int count;
+    object_detect_result results[OBJ_NUMB_MAX_SIZE];
+} object_detect_result_list;
+
+int init_post_process();
+void deinit_post_process();
+char *coco_cls_to_name(int cls_id);
+int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results);
+
+void deinitPostProcess();
+#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
@@ -0,0 +1,250 @@
+// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "yolov8.h"
+#include "common.h"
+#include "file_utils.h"
+#include "image_utils.h"
+
+static void dump_tensor_attr(rknn_tensor_attr *attr)
+{
+    printf("  index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
+           "zp=%d, scale=%f\n",
+           attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
+           attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
+           get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
+}
+
+int init_yolov8_model(const char *model_path, rknn_app_context_t *app_ctx)
+{
+    int ret;
+    int model_len = 0;
+    char *model;
+    rknn_context ctx = 0;
+
+    // Load RKNN Model
+    model_len = read_data_from_file(model_path, &model);
+    if (model == NULL)
+    {
+        printf("load_model fail!\n");
+        return -1;
+    }
+
+    ret = rknn_init(&ctx, model, model_len, 0, NULL);
+    free(model);
+    if (ret < 0)
+    {
+        printf("rknn_init fail! ret=%d\n", ret);
+        return -1;
+    }
+
+    // Get Model Input Output Number
+    rknn_input_output_num io_num;
+    ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
+    if (ret != RKNN_SUCC)
+    {
+        printf("rknn_query fail! ret=%d\n", ret);
+        return -1;
+    }
+    printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
+
+    // Get Model Input Info
+    printf("input tensors:\n");
+    rknn_tensor_attr input_attrs[io_num.n_input];
+    memset(input_attrs, 0, sizeof(input_attrs));
+    for (int i = 0; i < io_num.n_input; i++)
+    {
+        input_attrs[i].index = i;
+        ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
+        if (ret != RKNN_SUCC)
+        {
+            printf("rknn_query fail! ret=%d\n", ret);
+            return -1;
+        }
+        dump_tensor_attr(&(input_attrs[i]));
+    }
+
+    // Get Model Output Info
+    printf("output tensors:\n");
+    rknn_tensor_attr output_attrs[io_num.n_output];
+    memset(output_attrs, 0, sizeof(output_attrs));
+    for (int i = 0; i < io_num.n_output; i++)
+    {
+        output_attrs[i].index = i;
+        ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
+        if (ret != RKNN_SUCC)
+        {
+            printf("rknn_query fail! ret=%d\n", ret);
+            return -1;
+        }
+        dump_tensor_attr(&(output_attrs[i]));
+    }
+
+    // Set to context
+    app_ctx->rknn_ctx = ctx;
+
+    // TODO
+    if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type == RKNN_TENSOR_INT8)
+    {
+        app_ctx->is_quant = true;
+    }
+    else
+    {
+        app_ctx->is_quant = false;
+    }
+
+    app_ctx->io_num = io_num;
+    app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
+    memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
+    app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
+    memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
+
+    if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
+    {
+        printf("model is NCHW input fmt\n");
+        app_ctx->model_channel = input_attrs[0].dims[1];
+        app_ctx->model_height = input_attrs[0].dims[2];
+        app_ctx->model_width = input_attrs[0].dims[3];
+    }
+    else
+    {
+        printf("model is NHWC input fmt\n");
+        app_ctx->model_height = input_attrs[0].dims[1];
+        app_ctx->model_width = input_attrs[0].dims[2];
+        app_ctx->model_channel = input_attrs[0].dims[3];
+    }
+    printf("model input height=%d, width=%d, channel=%d\n",
+           app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
+
+    return 0;
+}
+
+int release_yolov8_model(rknn_app_context_t *app_ctx)
+{
+    if (app_ctx->rknn_ctx != 0)
+    {
+        rknn_destroy(app_ctx->rknn_ctx);
+        app_ctx->rknn_ctx = 0;
+    }
+    if (app_ctx->input_attrs != NULL)
+    {
+        free(app_ctx->input_attrs);
+        app_ctx->input_attrs = NULL;
+    }
+    if (app_ctx->output_attrs != NULL)
+    {
+        free(app_ctx->output_attrs);
+        app_ctx->output_attrs = NULL;
+    }
+    return 0;
+}
+
+int inference_yolov8_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results)
+{
+    int ret;
+    image_buffer_t dst_img;
+    letterbox_t letter_box;
+    rknn_input inputs[app_ctx->io_num.n_input];
+    rknn_output outputs[app_ctx->io_num.n_output];
+    const float nms_threshold = NMS_THRESH;      // 默认的NMS阈值
+    const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
+    int bg_color = 114;
+
+    if ((!app_ctx) || !(img) || (!od_results))
+    {
+        return -1;
+    }
+
+    memset(od_results, 0x00, sizeof(*od_results));
+    memset(&letter_box, 0, sizeof(letterbox_t));
+    memset(&dst_img, 0, sizeof(image_buffer_t));
+    memset(inputs, 0, sizeof(inputs));
+    memset(outputs, 0, sizeof(outputs));
+
+    // Pre Process
+    dst_img.width = app_ctx->model_width;
+    dst_img.height = app_ctx->model_height;
+    dst_img.format = IMAGE_FORMAT_RGB888;
+    dst_img.size = get_image_size(&dst_img);
+    dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
+    if (dst_img.virt_addr == NULL)
+    {
+        printf("malloc buffer size:%d fail!\n", dst_img.size);
+        return -1;
+    }
+
+    // letterbox
+    ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
+    if (ret < 0)
+    {
+        printf("convert_image_with_letterbox fail! ret=%d\n", ret);
+        return -1;
+    }
+
+    // Set Input Data
+    inputs[0].index = 0;
+    inputs[0].type = RKNN_TENSOR_UINT8;
+    inputs[0].fmt = RKNN_TENSOR_NHWC;
+    inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
+    inputs[0].buf = dst_img.virt_addr;
+
+    ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
+    if (ret < 0)
+    {
+        printf("rknn_input_set fail! ret=%d\n", ret);
+        return -1;
+    }
+
+    // Run
+    printf("rknn_run\n");
+    ret = rknn_run(app_ctx->rknn_ctx, nullptr);
+    if (ret < 0)
+    {
+        printf("rknn_run fail! ret=%d\n", ret);
+        return -1;
+    }
+
+    // Get Output
+    memset(outputs, 0, sizeof(outputs));
+    for (int i = 0; i < app_ctx->io_num.n_output; i++)
+    {
+        outputs[i].index = i;
+        outputs[i].want_float = (!app_ctx->is_quant);
+    }
+    ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
+    if (ret < 0)
+    {
+        printf("rknn_outputs_get fail! ret=%d\n", ret);
+        goto out;
+    }
+
+    // Post Process
+    post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
+
+    // Remeber to release rknn output
+    rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
+
+out:
+    if (dst_img.virt_addr != NULL)
+    {
+        free(dst_img.virt_addr);
+    }
+
+    return ret;
+}
@@ -0,0 +1,42 @@
+// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef _RKNN_DEMO_YOLOV8_H_
+#define _RKNN_DEMO_YOLOV8_H_
+
+#include "rknn_api.h"
+#include "common.h"
+
+typedef struct {
+    rknn_context rknn_ctx;
+    rknn_input_output_num io_num;
+    rknn_tensor_attr* input_attrs;
+    rknn_tensor_attr* output_attrs;
+    int model_channel;
+    int model_width;
+    int model_height;
+    bool is_quant;
+} rknn_app_context_t;
+
+#include "postprocess.h"
+
+
+int init_yolov8_model(const char* model_path, rknn_app_context_t* app_ctx);
+
+int release_yolov8_model(rknn_app_context_t* app_ctx);
+
+int inference_yolov8_model(rknn_app_context_t* app_ctx, image_buffer_t* img, object_detect_result_list* od_results);
+
+#endif //_RKNN_DEMO_YOLOV8_H_
				`@@ -0,0 +1 @@`
				`yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640`