#include #include #include #include #include #define CONFIDENCE_THRESHOLD 0.2 using namespace std; using namespace cv; // function to create vector of class names std::vector createClaseNames() { std::vector classNames; classNames.push_back("background"); classNames.push_back("aeroplane"); classNames.push_back("bicycle"); classNames.push_back("bird"); classNames.push_back("boat"); classNames.push_back("bottle"); classNames.push_back("bus"); classNames.push_back("car"); classNames.push_back("cat"); classNames.push_back("chair"); classNames.push_back("cow"); classNames.push_back("diningtable"); classNames.push_back("dog"); classNames.push_back("horse"); classNames.push_back("motorbike"); classNames.push_back("person"); classNames.push_back("pottedplant"); classNames.push_back("sheep"); return classNames; }; int main(int argc, char *argv[]) { if (argc < 3) { qDebug() << "Give ONNX model as first argument and image as second one."; return 1; } std::vector classNames = createClaseNames(); std::string model_filename = argv[1]; std::string image_filename = argv[2]; // Load the ONNX model cv::dnn::Net net = cv::dnn::readNetFromONNX(model_filename); // Set backend to OpenCL net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT); //net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL); // Load an image and preprocess it cv::Mat image = cv::imread(image_filename); cv::Mat blob = cv::dnn::blobFromImage(image, 1/255.0, cv::Size(640, 640), cv::Scalar(), true, false); cv::imwrite("tmpInput.png", image); // Set the input for the network net.setInput(blob); std::vector outputs; net.forward(outputs); cv::Mat output = outputs[0]; std::vector layerNames = net.getLayerNames(); std::vector outLayerIndices = net.getUnconnectedOutLayers(); for (int index : outLayerIndices) { std::cout << layerNames[index - 1] << std::endl; } cv::Mat detections = net.forward("output0"); // print some information about detections std::cout << "dims: " << outputs[0].dims << std::endl; std::cout << "size: " << outputs[0].size << std::endl; int num_detections0 = outputs.size(); int num_classes0 = outputs[0].size[1]; int rows = outputs[0].size[1]; qDebug() << "num_detections0:" << num_detections0 << "num_classes0:" << num_classes0 << "rows:" << rows; int num_detections = detections.size[2]; // 8400 int num_attributes = detections.size[1]; // 14 qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes; detections = detections.reshape(1, num_detections); for (int i = 0; i < num_detections; i++) { Mat row = detections.row(i); float box_confidence = row.at(4); float cls_confidence = row.at(5); if (box_confidence > 0.5 && cls_confidence > 0.5) { std::cout << "box_confidence " << box_confidence << " cls_confidence: " << cls_confidence << std::endl; } } /* // Output tensor processing // Assuming output tensor has shape [1, 14, 8400] int num_detections = detections.size[2]; // 8400 int num_attributes = detections.size[1]; // 14 qDebug() << "num_detections:" << num_detections << "num_attributes:" << num_attributes; // Extract and print confidence for each detection const float* data = (float*)output.data; for (int i = 0; i < num_detections; i++) { // Confidence value is at index 4 (based on YOLO-like model output format) float confidence = data[i * num_attributes + 3]; // Print confidence value if (confidence > 0.5f) { std::cout << "Detection " << i << " confidence: " << confidence << std::endl; } } */ /* // Assuming outputs is a vector of cv::Mat containing the model's output for (int i = 0; i < outputs[0].size[0]; ++i) { for (int j = 0; j < outputs[0].size[1]; ++j) { float confidence = outputs[0].at(i, j); // Print or use the confidence value std::cout << "Confidence: " << confidence << std::endl; } } */ /* // Output processing variables std::vector class_ids; std::vector confidences; std::vector boxes; // Analyze each output for (const auto& output : outputs) { // Each row is a detection: [center_x, center_y, width, height, conf, class1_score, class2_score, ...] const auto* data = (float*)output.data; for (int i = 0; i < output.rows; i++, data += output.cols) { float confidence = data[4]; // Objectness confidence if (confidence >= 0.5) { // Apply a confidence threshold cv::Mat scores = output.row(i).colRange(5, output.cols); cv::Point class_id_point; double max_class_score; cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point); int class_id = class_id_point.x; if (max_class_score >= 0.5) { // Apply a class score threshold // Get bounding box int center_x = (int)(data[0] * image.cols); int center_y = (int)(data[1] * image.rows); int width = (int)(data[2] * image.cols); int height = (int)(data[3] * image.rows); int left = center_x - width / 2; int top = center_y - height / 2; // Store the results class_ids.push_back(class_id); confidences.push_back(confidence); boxes.push_back(cv::Rect(left, top, width, height)); } } } } */ /* for (int i = 0; i < num_detections; ++i) { for (int a = 0; a < 10; a++) { qDebug() << "confidence:" << confidence << "class_id" << class_id; } //float confidence = outputs[0].at(i, 4); //int class_id = outputs[0].at(i, 5); qDebug() << "confidence:" << confidence << "class_id" << class_id; } */ /* std::vector class_ids; std::vector confidences; std::vector boxes; // Output tensor processing for (size_t i = 0; i < outputs.size(); i++) { float* data = (float*)outputs[i].data; for (int j = 0; j < outputs[i].rows; ++j) { float confidence = data[4]; // Objectness confidence qDebug() <<" Confidence: " << confidence; if (confidence >= CONFIDENCE_THRESHOLD) { // Get class with the highest score cv::Mat scores = outputs[i].row(j).colRange(5, num_classes + 5); cv::Point class_id_point; double max_class_score; cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id_point); qDebug() << "Class ID: " << class_id_point.x << " Confidence: " << confidence; if (max_class_score > CONFIDENCE_THRESHOLD) { int center_x = (int)(data[0] * image.cols); int center_y = (int)(data[1] * image.rows); int width = (int)(data[2] * image.cols); int height = (int)(data[3] * image.rows); int left = center_x - width / 2; int top = center_y - height / 2; // Store the results class_ids.push_back(class_id_point.x); confidences.push_back((float)max_class_score); boxes.push_back(cv::Rect(left, top, width, height)); } } data += num_classes + 5; // Move to the next detection } } */ /* // Process the output for (int i = 0; i < num_detections; ++i) { float confidence = outputs[0].at(i, 4); int class_id = outputs[0].at(i, 5); // ... extract bounding box coordinates float x_center = outputs[0].at(i, 0); float y_center = outputs[0].at(i, 1); float width = outputs[0].at(i, 2); float height = outputs[0].at(i, 3); // Calculate bounding box corners int x = (int)(x_center - width / 2.0); int y = (int)(y_center - height / 2.0); int width_int = (int)width; int height_int = (int)height; // Print or use the detected information qDebug() << "Class ID: " << class_id << " Confidence: " << confidence << " Bounding Box: (" << x << ", " << y << ", " << width_int << ", " << height_int << ")"; } */ /* // Perform forward pass for (int i = 0; i < 10; i++) { std::vector outputs; QElapsedTimer timer; timer.start(); net.forward(outputs); qDebug() << "Inference completed in" << timer.elapsed() << "ms."; } */ // Process the output (YOLO-specific post-processing like NMS is needed) // Outputs contain class scores, bounding boxes, etc. return 0; }