diff --git a/tmp/opi_rtsp/aiengine.cpp b/tmp/opi_rtsp/aiengine.cpp index d184dd6..82a135c 100644 --- a/tmp/opi_rtsp/aiengine.cpp +++ b/tmp/opi_rtsp/aiengine.cpp @@ -3,10 +3,12 @@ #include "aiengine.h" #include "aiengineinference.h" -#ifdef OPI5_BUILD +#if defined(OPI5_BUILD) #include "src-opi5/aiengineinferenceopi5.h" -#else +#elif defined(OPENCV_BUILD) #include "src-opencv-onnx/aiengineinferenceopencvonnx.h" +#else +#include "src-onnx-runtime/aiengineinferenceonnxruntime.h" #endif AiEngine::AiEngine(QString modelPath, QObject *parent) @@ -15,10 +17,12 @@ AiEngine::AiEngine(QString modelPath, QObject *parent) mRtspListener = new AiEngineRtspListener(this); connect(mRtspListener, &AiEngineRtspListener::frameReceived, this, &AiEngine::frameReceivedSlot); -#ifdef OPI5_BUILD +#if defined(OPI5_BUILD) mInference = new AiEngineInferenceOpi5(modelPath); -#else +#elif defined(OPENCV_BUILD) mInference = new AiEngineInferenceOpencvOnnx(modelPath); +#else + mInference = new AiEngineInferencevOnnxRuntime(modelPath); #endif QThread *inferenceThread = new QThread(this); diff --git a/tmp/opi_rtsp/aiengineinference.cpp b/tmp/opi_rtsp/aiengineinference.cpp index cbb101f..9a8f968 100644 --- a/tmp/opi_rtsp/aiengineinference.cpp +++ b/tmp/opi_rtsp/aiengineinference.cpp @@ -12,3 +12,39 @@ bool AiEngineInference::isActive(void) { return mActive; } + + +cv::Mat AiEngineInference::resizeAndPad(const cv::Mat& src) +{ + // Calculate the aspect ratio + float aspectRatio = static_cast(src.cols) / src.rows; + + // Determine new size while maintaining aspect ratio + int newWidth = src.cols; + int newHeight = src.rows; + if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) { + if (aspectRatio > 1) + { + // Width is greater than height + newWidth = INFERENCE_SQUARE_WIDTH; + newHeight = static_cast(INFERENCE_SQUARE_WIDTH / aspectRatio); + } + else { + // Height is greater than or equal to width + newHeight = INFERENCE_SQUARE_HEIGHT; + newWidth = static_cast(INFERENCE_SQUARE_HEIGHT * aspectRatio); + } + } + + // Resize the original image if needed + cv::Mat resized; + cv::resize(src, resized, cv::Size(newWidth, newHeight)); + + // Create a new 640x640 image with a black background + cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0)); + + // Copy the resized image to the top-left corner of the new image + resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows))); + + return output; +} diff --git a/tmp/opi_rtsp/aiengineinference.h b/tmp/opi_rtsp/aiengineinference.h index 49f96e0..8bf708d 100644 --- a/tmp/opi_rtsp/aiengineinference.h +++ b/tmp/opi_rtsp/aiengineinference.h @@ -4,6 +4,11 @@ #include #include #include +#include + + +const int INFERENCE_SQUARE_WIDTH = 640; +const int INFERENCE_SQUARE_HEIGHT = 640; class AiEngineRectangle { @@ -38,6 +43,7 @@ public: bool isActive(void); protected: + cv::Mat resizeAndPad(const cv::Mat& src); QString mModelPath; bool mActive; diff --git a/tmp/opi_rtsp/get_yolo_mode.sh b/tmp/opi_rtsp/get_yolo_model.sh similarity index 100% rename from tmp/opi_rtsp/get_yolo_mode.sh rename to tmp/opi_rtsp/get_yolo_model.sh diff --git a/tmp/opi_rtsp/opi_rtsp.pro b/tmp/opi_rtsp/opi_rtsp.pro index 4bc6ce8..c899769 100644 --- a/tmp/opi_rtsp/opi_rtsp.pro +++ b/tmp/opi_rtsp/opi_rtsp.pro @@ -17,14 +17,23 @@ opi5 { LIBS += /usr/local/lib/librknnrt.so SOURCES += $$PWD/src-opi5/*.c $$PWD/src-opi5/*.cpp $$PWD/src-opi5/*.cc HEADERS += $$PWD/src-opi5/*.h -} else { +} else:opencv { + message("OpenCV build") + message("You must use YOLOv8 ONNX files. Azaion model does not work!") + message("OpenCV must be version 4.10.0 installed to /usr/local/") + QMAKE_CXXFLAGS += -DOPENCV_BUILD + QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib + SOURCES += $$PWD/src-opencv-onnx/*.cpp + HEADERS += $$PWD/src-opencv-onnx/*.h +} +else { message("ONNX build") - message("You must use YOLOv8 ONNX files") + message("You must use YOLOv8 ONNX files. Azaion ONNX model also works fine.") QMAKE_CXXFLAGS += -DONNX_BUILD INCLUDEPATH += /opt/onnxruntime-linux-x64-1.18.0/include LIBS += /opt/onnxruntime-linux-x64-1.18.0/lib/libonnxruntime.so.1.18.0 QMAKE_LFLAGS += -Wl,-rpath,/opt/onnxruntime-linux-x64-1.18.0/lib QMAKE_LFLAGS += -Wl,-rpath,/usr/local/lib - SOURCES += $$PWD/src-opencv-onnx/*.cpp - HEADERS += $$PWD/src-opencv-onnx/*.h + SOURCES += $$PWD/src-onnx-runtime/*.cpp + HEADERS += $$PWD/src-onnx-runtime/*.h } diff --git a/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.cpp b/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.cpp new file mode 100644 index 0000000..f28518b --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include "aiengineinferenceonnxruntime.h" + + +static const float confThreshold = 0.4f; +static const float iouThreshold = 0.4f; +static const float maskThreshold = 0.5f; + + +AiEngineInferencevOnnxRuntime::AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent) : + AiEngineInference{modelPath, parent}, + mPredictor(modelPath.toStdString(), confThreshold, iouThreshold, maskThreshold) +{ + qDebug() << "TUOMAS AiEngineInferencevOnnxRuntime() mModelPath=" << mModelPath; + + mClassNames = { + "Armoured vehicle", + "Truck", + "Vehicle", + "Artillery", + "Shadow artillery", + "Trenches", + "Military man", + "Tyre tracks", + "Additional protection tank", + "Smoke" + }; +} + + +cv::Mat AiEngineInferencevOnnxRuntime::drawLabels(const cv::Mat &image, const std::vector &detections) +{ + cv::Mat result = image.clone(); + + for (const auto &detection : detections) + { + cv::rectangle(result, detection.box, cv::Scalar(0, 255, 0), 2); + std::string label = mClassNames[detection.classId] + ": " + std::to_string(detection.conf); + + int baseLine; + cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + cv::rectangle( + result, + cv::Point(detection.box.x, detection.box.y - labelSize.height), + cv::Point(detection.box.x + labelSize.width, detection.box.y + baseLine), + cv::Scalar(255, 255, 255), + cv::FILLED); + + cv::putText( + result, + label, + cv::Point( + detection.box.x, + detection.box.y), + cv::FONT_HERSHEY_SIMPLEX, + 0.5, + cv::Scalar(0, 0, 0), + 1); + } + + return result; +} + + +void AiEngineInferencevOnnxRuntime::performInferenceSlot(cv::Mat frame) +{ + mActive = true; + + cv::Mat scaledImage = resizeAndPad(frame); + std::vector detections = mPredictor.predict(scaledImage); + AiEngineInferenceResult result; + + for (uint i = 0; i < detections.size(); i++) { + const Yolov8Result &detection = detections[i]; + + // Add detected objects to the results + AiEngineObject object; + object.classId = detection.classId; + object.propability = detection.conf; + object.rectangle.top = detection.box.y; + object.rectangle.left = detection.box.x; + object.rectangle.bottom = detection.box.y + detection.box.height; + object.rectangle.right = detection.box.x + detection.box.width; + result.objects.append(object); + } + + if (result.objects.empty() == false) { + qDebug() << __PRETTY_FUNCTION__ << "detections:" << detections.size(); + result.frame = drawLabels(scaledImage, detections); + emit resultsReady(result); + } + + mActive = false; +} diff --git a/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.h b/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.h new file mode 100644 index 0000000..a1fa887 --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/aiengineinferenceonnxruntime.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include "aiengineinference.h" +#include "yolov8Predictor.h" + +class AiEngineInferencevOnnxRuntime : public AiEngineInference +{ + Q_OBJECT +public: + explicit AiEngineInferencevOnnxRuntime(QString modelPath, QObject *parent = nullptr); + +public slots: + void performInferenceSlot(cv::Mat frame) override; + +private: + cv::Mat drawLabels(const cv::Mat &image, const std::vector &detections); + YOLOPredictor mPredictor; + std::vector mClassNames; +}; diff --git a/tmp/opi_rtsp/src-onnx-runtime/utils.cpp b/tmp/opi_rtsp/src-onnx-runtime/utils.cpp new file mode 100644 index 0000000..0accacb --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/utils.cpp @@ -0,0 +1,167 @@ +#include "utils.h" + +size_t utils::vectorProduct(const std::vector &vector) +{ + if (vector.empty()) + return 0; + + size_t product = 1; + for (const auto &element : vector) + product *= element; + + return product; +} + +std::wstring utils::charToWstring(const char *str) +{ + typedef std::codecvt_utf8 convert_type; + std::wstring_convert converter; + + return converter.from_bytes(str); +} + +std::vector utils::loadNames(const std::string &path) +{ + // load class names + std::vector classNames; + std::ifstream infile(path); + if (infile.good()) + { + std::string line; + while (getline(infile, line)) + { + if (line.back() == '\r') + line.pop_back(); + classNames.emplace_back(line); + } + infile.close(); + } + else + { + std::cerr << "ERROR: Failed to access class name path: " << path << std::endl; + } + // set color + srand(time(0)); + + for (int i = 0; i < (int)(2 * classNames.size()); i++) + { + int b = rand() % 256; + int g = rand() % 256; + int r = rand() % 256; + colors.push_back(cv::Scalar(b, g, r)); + } + return classNames; +} + +void utils::visualizeDetection(cv::Mat &im, std::vector &results, + const std::vector &classNames) +{ + cv::Mat image = im.clone(); + for (const Yolov8Result &result : results) + { + + int x = result.box.x; + int y = result.box.y; + + int conf = (int)std::round(result.conf * 100); + int classId = result.classId; + std::string label = classNames[classId] + " 0." + std::to_string(conf); + + int baseline = 0; + cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 1, &baseline); + image(result.box).setTo(colors[classId + classNames.size()], result.boxMask); + cv::rectangle(image, result.box, colors[classId], 2); + cv::rectangle(image, + cv::Point(x, y), cv::Point(x + size.width, y + 12), + colors[classId], -1); + cv::putText(image, label, + cv::Point(x, y - 3 + 12), cv::FONT_ITALIC, + 0.4, cv::Scalar(0, 0, 0), 1); + } + cv::addWeighted(im, 0.4, image, 0.6, 0, im); +} + +void utils::letterbox(const cv::Mat &image, cv::Mat &outImage, + const cv::Size &newShape = cv::Size(640, 640), + const cv::Scalar &color = cv::Scalar(114, 114, 114), + bool auto_ = true, + bool scaleFill = false, + bool scaleUp = true, + int stride = 32) +{ + cv::Size shape = image.size(); + float r = std::min((float)newShape.height / (float)shape.height, + (float)newShape.width / (float)shape.width); + if (!scaleUp) + r = std::min(r, 1.0f); + + float ratio[2]{r, r}; + + int newUnpad[2]{(int)std::round((float)shape.width * r), + (int)std::round((float)shape.height * r)}; + + auto dw = (float)(newShape.width - newUnpad[0]); + auto dh = (float)(newShape.height - newUnpad[1]); + + if (auto_) + { + dw = (float)((int)dw % stride); + dh = (float)((int)dh % stride); + } + else if (scaleFill) + { + dw = 0.0f; + dh = 0.0f; + newUnpad[0] = newShape.width; + newUnpad[1] = newShape.height; + ratio[0] = (float)newShape.width / (float)shape.width; + ratio[1] = (float)newShape.height / (float)shape.height; + } + + dw /= 2.0f; + dh /= 2.0f; + + if (shape.width != newUnpad[0] && shape.height != newUnpad[1]) + { + cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1])); + } + + int top = int(std::round(dh - 0.1f)); + int bottom = int(std::round(dh + 0.1f)); + int left = int(std::round(dw - 0.1f)); + int right = int(std::round(dw + 0.1f)); + cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); +} + +void utils::scaleCoords(cv::Rect &coords, + cv::Mat &mask, + const float maskThreshold, + const cv::Size &imageShape, + const cv::Size &imageOriginalShape) +{ + float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height, + (float)imageShape.width / (float)imageOriginalShape.width); + + int pad[2] = {(int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f), + (int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)}; + + coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain)); + coords.x = std::max(0, coords.x); + coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain)); + coords.y = std::max(0, coords.y); + + coords.width = (int)std::round(((float)coords.width / gain)); + coords.width = std::min(coords.width, imageOriginalShape.width - coords.x); + coords.height = (int)std::round(((float)coords.height / gain)); + coords.height = std::min(coords.height, imageOriginalShape.height - coords.y); + mask = mask(cv::Rect(pad[0], pad[1], imageShape.width - 2 * pad[0], imageShape.height - 2 * pad[1])); + + cv::resize(mask, mask, imageOriginalShape, cv::INTER_LINEAR); + + mask = mask(coords) > maskThreshold; +} +template +T utils::clip(const T &n, const T &lower, const T &upper) +{ + return std::max(lower, std::min(n, upper)); +} diff --git a/tmp/opi_rtsp/src-onnx-runtime/utils.h b/tmp/opi_rtsp/src-onnx-runtime/utils.h new file mode 100644 index 0000000..802a84d --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/utils.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include +#include + +struct Yolov8Result +{ + cv::Rect box; + cv::Mat boxMask; // mask in box + float conf{}; + int classId{}; +}; + +namespace utils +{ + static std::vector colors; + + size_t vectorProduct(const std::vector &vector); + std::wstring charToWstring(const char *str); + std::vector loadNames(const std::string &path); + void visualizeDetection(cv::Mat &image, std::vector &results, + const std::vector &classNames); + + void letterbox(const cv::Mat &image, cv::Mat &outImage, + const cv::Size &newShape, + const cv::Scalar &color, + bool auto_, + bool scaleFill, + bool scaleUp, + int stride); + + void scaleCoords(cv::Rect &coords, cv::Mat &mask, + const float maskThreshold, + const cv::Size &imageShape, const cv::Size &imageOriginalShape); + + template + T clip(const T &n, const T &lower, const T &upper); +} diff --git a/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.cpp b/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.cpp new file mode 100644 index 0000000..0e53df6 --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.cpp @@ -0,0 +1,243 @@ +#include "yolov8Predictor.h" + +YOLOPredictor::YOLOPredictor(const std::string &modelPath, + float confThreshold, + float iouThreshold, + float maskThreshold) +{ + this->confThreshold = confThreshold; + this->iouThreshold = iouThreshold; + this->maskThreshold = maskThreshold; + env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YOLOV8"); + sessionOptions = Ort::SessionOptions(); + + std::vector availableProviders = Ort::GetAvailableProviders(); + std::cout << "Inference device: CPU" << std::endl; + + session = Ort::Session(env, modelPath.c_str(), sessionOptions); + + const size_t num_input_nodes = session.GetInputCount(); //==1 + const size_t num_output_nodes = session.GetOutputCount(); //==1,2 + if (num_output_nodes > 1) + { + this->hasMask = true; + std::cout << "Instance Segmentation" << std::endl; + } + else + std::cout << "Object Detection" << std::endl; + + Ort::AllocatorWithDefaultOptions allocator; + for (int i = 0; i < (int)num_input_nodes; i++) + { + auto input_name = session.GetInputNameAllocated(i, allocator); + this->inputNames.push_back(input_name.get()); + input_names_ptr.push_back(std::move(input_name)); + + Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(i); + std::vector inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); + this->inputShapes.push_back(inputTensorShape); + this->isDynamicInputShape = true; + // checking if width and height are dynamic + if (inputTensorShape[2] == -1 && inputTensorShape[3] == -1) + { + std::cout << "Dynamic input shape" << std::endl; + this->isDynamicInputShape = true; + } + } + for (int i = 0; i < (int)num_output_nodes; i++) + { + auto output_name = session.GetOutputNameAllocated(i, allocator); + this->outputNames.push_back(output_name.get()); + output_names_ptr.push_back(std::move(output_name)); + + Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(i); + std::vector outputTensorShape = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); + this->outputShapes.push_back(outputTensorShape); + if (i == 0) + { + if (!this->hasMask) + classNums = outputTensorShape[1] - 4; + else + classNums = outputTensorShape[1] - 4 - 32; + } + } + // for (const char *x : this->inputNames) + // { + // std::cout << x << std::endl; + // } + // for (const char *x : this->outputNames) + // { + // std::cout << x << std::endl; + // } + // std::cout << classNums << std::endl; +} + +void YOLOPredictor::getBestClassInfo(std::vector::iterator it, + float &bestConf, + int &bestClassId, + const int _classNums) +{ + // first 4 element are box + bestClassId = 4; + bestConf = 0; + + for (int i = 4; i < _classNums + 4; i++) + { + if (it[i] > bestConf) + { + bestConf = it[i]; + bestClassId = i - 4; + } + } +} +cv::Mat YOLOPredictor::getMask(const cv::Mat &maskProposals, + const cv::Mat &maskProtos) +{ + cv::Mat protos = maskProtos.reshape(0, {(int)this->outputShapes[1][1], (int)this->outputShapes[1][2] * (int)this->outputShapes[1][3]}); + + cv::Mat matmul_res = (maskProposals * protos).t(); + cv::Mat masks = matmul_res.reshape(1, {(int)this->outputShapes[1][2], (int)this->outputShapes[1][3]}); + cv::Mat dest; + + // sigmoid + cv::exp(-masks, dest); + dest = 1.0 / (1.0 + dest); + cv::resize(dest, dest, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), cv::INTER_LINEAR); + return dest; +} + +void YOLOPredictor::preprocessing(cv::Mat &image, float *&blob, std::vector &inputTensorShape) +{ + cv::Mat resizedImage, floatImage; + cv::cvtColor(image, resizedImage, cv::COLOR_BGR2RGB); + utils::letterbox(resizedImage, resizedImage, cv::Size((int)this->inputShapes[0][2], (int)this->inputShapes[0][3]), + cv::Scalar(114, 114, 114), this->isDynamicInputShape, + false, true, 32); + + inputTensorShape[2] = resizedImage.rows; + inputTensorShape[3] = resizedImage.cols; + + resizedImage.convertTo(floatImage, CV_32FC3, 1 / 255.0); + blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()]; + cv::Size floatImageSize{floatImage.cols, floatImage.rows}; + + // hwc -> chw + std::vector chw(floatImage.channels()); + for (int i = 0; i < floatImage.channels(); ++i) + { + chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height); + } + cv::split(floatImage, chw); +} + +std::vector YOLOPredictor::postprocessing(const cv::Size &resizedImageShape, + const cv::Size &originalImageShape, + std::vector &outputTensors) +{ + + // for box + std::vector boxes; + std::vector confs; + std::vector classIds; + + float *boxOutput = outputTensors[0].GetTensorMutableData(); + //[1,4+n,8400]=>[1,8400,4+n] or [1,4+n+32,8400]=>[1,8400,4+n+32] + cv::Mat output0 = cv::Mat(cv::Size((int)this->outputShapes[0][2], (int)this->outputShapes[0][1]), CV_32F, boxOutput).t(); + float *output0ptr = (float *)output0.data; + int rows = (int)this->outputShapes[0][2]; + int cols = (int)this->outputShapes[0][1]; + // std::cout << rows << cols << std::endl; + // if hasMask + std::vector> picked_proposals; + cv::Mat mask_protos; + + for (int i = 0; i < rows; i++) + { + std::vector it(output0ptr + i * cols, output0ptr + (i + 1) * cols); + float confidence; + int classId; + this->getBestClassInfo(it.begin(), confidence, classId, classNums); + + if (confidence > this->confThreshold) + { + if (this->hasMask) + { + std::vector temp(it.begin() + 4 + classNums, it.end()); + picked_proposals.push_back(temp); + } + int centerX = (int)(it[0]); + int centerY = (int)(it[1]); + int width = (int)(it[2]); + int height = (int)(it[3]); + int left = centerX - width / 2; + int top = centerY - height / 2; + boxes.emplace_back(left, top, width, height); + confs.emplace_back(confidence); + classIds.emplace_back(classId); + } + } + + std::vector indices; + cv::dnn::NMSBoxes(boxes, confs, this->confThreshold, this->iouThreshold, indices); + + if (this->hasMask) + { + float *maskOutput = outputTensors[1].GetTensorMutableData(); + std::vector mask_protos_shape = {1, (int)this->outputShapes[1][1], (int)this->outputShapes[1][2], (int)this->outputShapes[1][3]}; + mask_protos = cv::Mat(mask_protos_shape, CV_32F, maskOutput); + } + + std::vector results; + for (int idx : indices) + { + Yolov8Result res; + res.box = cv::Rect(boxes[idx]); + if (this->hasMask) + res.boxMask = this->getMask(cv::Mat(picked_proposals[idx]).t(), mask_protos); + else + res.boxMask = cv::Mat::zeros((int)this->inputShapes[0][2], (int)this->inputShapes[0][3], CV_8U); + + utils::scaleCoords(res.box, res.boxMask, this->maskThreshold, resizedImageShape, originalImageShape); + res.conf = confs[idx]; + res.classId = classIds[idx]; + results.emplace_back(res); + } + + return results; +} + +std::vector YOLOPredictor::predict(cv::Mat &image) +{ + float *blob = nullptr; + std::vector inputTensorShape{1, 3, -1, -1}; + this->preprocessing(image, blob, inputTensorShape); + + size_t inputTensorSize = utils::vectorProduct(inputTensorShape); + + std::vector inputTensorValues(blob, blob + inputTensorSize); + + std::vector inputTensors; + + Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu( + OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + + inputTensors.push_back(Ort::Value::CreateTensor( + memoryInfo, inputTensorValues.data(), inputTensorSize, + inputTensorShape.data(), inputTensorShape.size())); + + std::vector outputTensors = this->session.Run(Ort::RunOptions{nullptr}, + this->inputNames.data(), + inputTensors.data(), + 1, + this->outputNames.data(), + this->outputNames.size()); + + cv::Size resizedShape = cv::Size((int)inputTensorShape[3], (int)inputTensorShape[2]); + std::vector result = this->postprocessing(resizedShape, + image.size(), + outputTensors); + + delete[] blob; + + return result; +} diff --git a/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.h b/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.h new file mode 100644 index 0000000..b9e1180 --- /dev/null +++ b/tmp/opi_rtsp/src-onnx-runtime/yolov8Predictor.h @@ -0,0 +1,50 @@ +#pragma once +#include +#include +#include + +#include "utils.h" + +class YOLOPredictor +{ +public: + explicit YOLOPredictor(std::nullptr_t){}; + YOLOPredictor(const std::string &modelPath, + float confThreshold, + float iouThreshold, + float maskThreshold); + // ~YOLOPredictor(); + std::vector predict(cv::Mat &image); + int classNums = 10; + +private: + Ort::Env env{nullptr}; + Ort::SessionOptions sessionOptions{nullptr}; + Ort::Session session{nullptr}; + + void preprocessing(cv::Mat &image, float *&blob, std::vector &inputTensorShape); + std::vector postprocessing(const cv::Size &resizedImageShape, + const cv::Size &originalImageShape, + std::vector &outputTensors); + + static void getBestClassInfo(std::vector::iterator it, + float &bestConf, + int &bestClassId, + const int _classNums); + cv::Mat getMask(const cv::Mat &maskProposals, const cv::Mat &maskProtos); + bool isDynamicInputShape{}; + + std::vector inputNames; + std::vector input_names_ptr; + + std::vector outputNames; + std::vector output_names_ptr; + + std::vector> inputShapes; + std::vector> outputShapes; + float confThreshold = 0.3f; + float iouThreshold = 0.4f; + + bool hasMask = false; + float maskThreshold = 0.5f; +}; diff --git a/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.cpp b/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.cpp index b5bf0c2..9c88b3d 100644 --- a/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.cpp +++ b/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.cpp @@ -3,10 +3,6 @@ #include "aiengineinferenceopencvonnx.h" -const int INFERENCE_SQUARE_WIDTH = 640; -const int INFERENCE_SQUARE_HEIGHT = 640; - - AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObject *parent) : AiEngineInference{modelPath, parent}, mInference(modelPath.toStdString(), cv::Size(640, 640), "classes.txt") @@ -17,42 +13,6 @@ AiEngineInferenceOpencvOnnx::AiEngineInferenceOpencvOnnx(QString modelPath, QObj } -cv::Mat resizeAndPad(const cv::Mat& src) -{ - // Calculate the aspect ratio - float aspectRatio = static_cast(src.cols) / src.rows; - - // Determine new size while maintaining aspect ratio - int newWidth = src.cols; - int newHeight = src.rows; - if (src.cols > INFERENCE_SQUARE_WIDTH || src.rows > INFERENCE_SQUARE_HEIGHT) { - if (aspectRatio > 1) - { - // Width is greater than height - newWidth = INFERENCE_SQUARE_WIDTH; - newHeight = static_cast(INFERENCE_SQUARE_WIDTH / aspectRatio); - } - else { - // Height is greater than or equal to width - newHeight = INFERENCE_SQUARE_HEIGHT; - newWidth = static_cast(INFERENCE_SQUARE_HEIGHT * aspectRatio); - } - } - - // Resize the original image if needed - cv::Mat resized; - cv::resize(src, resized, cv::Size(newWidth, newHeight)); - - // Create a new 640x640 image with a black background - cv::Mat output(INFERENCE_SQUARE_HEIGHT, INFERENCE_SQUARE_WIDTH, src.type(), cv::Scalar(0, 0, 0)); - - // Copy the resized image to the top-left corner of the new image - resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows))); - - return output; -} - - void AiEngineInferenceOpencvOnnx::performInferenceSlot(cv::Mat frame) { try { diff --git a/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.h b/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.h index 7c46666..c2cbdbd 100644 --- a/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.h +++ b/tmp/opi_rtsp/src-opencv-onnx/aiengineinferenceopencvonnx.h @@ -14,6 +14,5 @@ public slots: void performInferenceSlot(cv::Mat frame) override; private: - //InferenceEngine *mEngine; Inference mInference; };