Files
ANSCORE/modules/ANSODEngine/ANSYOLOV10OVOD.cpp

977 lines
34 KiB
C++

#include "ANSYOLOV10OVOD.h"
#include "Utility.h"
namespace ANSCENTER
{
bool ANSOYOLOV10OVOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
return false;
}
if (FileExist(_modelFilePath)) {
std::string modelName = GetFileNameWithoutExtension(_modelFilePath);
std::string binaryModelName = modelName + ".bin";
std::string modelFolder = GetParentFolder(_modelFilePath);
std::string optimizedModelPath = CreateFilePath(modelFolder, binaryModelName);
if (FileExist(optimizedModelPath)) {
this->_logger.LogDebug("ANSOYOLOV10OVOD::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
optimizedModelFolder = modelFolder;
return true;
}
else {
this->_logger.LogFatal("ANSOYOLOV10OVOD::OptimizeModel", "This model can not be optimized.", __FILE__, __LINE__);
optimizedModelFolder = modelFolder;
return false;
}
}
else {
this->_logger.LogFatal("ANSOYOLOV10OVOD::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
optimizedModelFolder = "";
return false;
}
}
bool ANSOYOLOV10OVOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
_modelConfig.modelType = ModelType::OPENVINO;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
if (_modelConfig.modelMNSThreshold < 0.2)
_modelConfig.modelMNSThreshold = 0.5;
if (_modelConfig.modelConfThreshold < 0.2)
_modelConfig.modelConfThreshold = 0.5;
// 0. Check if the configuration file exist
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
std::string xmlModelFile = CreateFilePath(_modelFolder, "train_last.xml");//yolov8n.xml
if (std::filesystem::exists(xmlModelFile)) {
_modelFilePath = xmlModelFile;
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
}
else {
this->_logger.LogError("ANSOYOLOV10OVOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
return false;
}
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// Load Model from Here
InitialModel(_modelFilePath);
_isInitialized = true;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSOYOLOV10OVOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className,modelFolder, labelMap);
if (!result) return false;
std::string _modelName = modelName;
if (_modelName.empty()) {
_modelName = "train_last";
}
std::string modelFullName = _modelName + ".xml";
// Parsing for YOLO only here
_modelConfig = modelConfig;
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
_modelConfig.modelType = ModelType::OPENVINO;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
if (_modelConfig.modelMNSThreshold < 0.2)
_modelConfig.modelMNSThreshold = 0.5;
if (_modelConfig.modelConfThreshold < 0.2)
_modelConfig.modelConfThreshold = 0.5;
// 0. Check if the configuration file exist
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
std::string xmlModelFile = CreateFilePath(_modelFolder, modelFullName);//yolov8n.xml
if (std::filesystem::exists(xmlModelFile)) {
_modelFilePath = xmlModelFile;
_classFilePath = CreateFilePath(_modelFolder, className);
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
}
else {
this->_logger.LogError("ANSOYOLOV10OVOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
return false;
}
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// 1. Load labelMap and engine
labelMap.clear();
if (!_classes.empty())
labelMap = VectorToCommaSeparatedString(_classes);
// Load Model from Here
InitialModel(_modelFilePath);
_isInitialized = true;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSOYOLOV10OVOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::string openVINOVersion = ov::get_openvino_version().buildNumber;
std::cout << "OpenVINO version: " << openVINOVersion << std::endl;
//this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. OpenVINO version", openVINOVersion, __FILE__, __LINE__);
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) return false;
// Parsing for YOLO only here
_modelConfig = modelConfig;
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
_modelConfig.modelType = ModelType::OPENVINO;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
// 0. Check if the configuration file exist
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
std::string xmlModelFile = CreateFilePath(_modelFolder, "train_last.xml");//yolov8n.xml
if (std::filesystem::exists(xmlModelFile)) {
_modelFilePath = xmlModelFile;
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
}
else {
this->_logger.LogError("ANSOYOLOV10OVOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
return false;
}
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSOYOLOV10OVOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// 1. Load labelMap and engine
labelMap.clear();
if (!_classes.empty())
labelMap = VectorToCommaSeparatedString(_classes);
// Load Model from Here
InitialModel(_modelFilePath);
_isInitialized = true;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<Object> ANSOYOLOV10OVOD::RunInference(const cv::Mat& input) {
return RunInference(input, "OpenVINO10Cam");
}
std::vector<Object> ANSOYOLOV10OVOD::RunInference(const cv::Mat& input, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
// Validate license and initialization status
if (!_licenseValid) {
_logger.LogError("ANSOYOLOV10OVOD::RunInference", "Invalid License", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
_logger.LogError("ANSOYOLOV10OVOD::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
// Validate input
if (input.empty()) {
_logger.LogError("ANSOYOLOV10OVOD::RunInference", "Input frame is empty", __FILE__, __LINE__);
return {};
}
if (input.cols < 10 || input.rows < 10) {
return {};
}
try {
m_imgWidth = static_cast<float>(input.cols);
m_imgHeight = static_cast<float>(input.rows);
constexpr int imageSize = 640;
float factor = 0.0f;
// Shallow copy - Preprocessing may modify headers but not original data
this->_frameBuffer = input;
// Reuse member buffer for input data
this->_inputData.resize(imageSize * imageSize * 3);
// Preprocess the input frame
Preprocessing(&this->_frameBuffer, imageSize, &factor, this->_inputData);
// Ensure request is valid
if (!request) {
request = compiled_model.create_infer_request();
request.get_input_tensor().set_shape({ 1, 3,
static_cast<unsigned long>(imageSize),
static_cast<unsigned long>(imageSize) });
}
// Get and validate input tensor
auto inputTensor = request.get_input_tensor();
float* tensorData = inputTensor.data<float>();
if (!tensorData) {
_logger.LogError("ANSOYOLOV10OVOD::RunInference", "Input tensor data pointer is null", __FILE__, __LINE__);
return {};
}
// Copy input data into the tensor
std::memcpy(tensorData, this->_inputData.data(), this->_inputData.size() * sizeof(float));
// Run inference
request.infer();
// Retrieve and validate output tensor
auto outputTensor = request.get_output_tensor();
float* output_data = outputTensor.data<float>();
if (!output_data) {
_logger.LogError("ANSOYOLOV10OVOD::RunInference", "Output tensor data pointer is null", __FILE__, __LINE__);
return {};
}
// Post-process and return
auto ret = PostProcessing(output_data, factor, 300, camera_id);
if (_trackerEnabled) {
ret = ApplyTracking(ret, camera_id);
if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id);
}
return ret;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSOYOLOV10OVOD::RunInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
_logger.LogFatal("ANSOYOLOV10OVOD::RunInference", "An unknown error occurred", __FILE__, __LINE__);
}
return {};
}
cv::Mat ANSOYOLOV10OVOD::resizeKeepAspectRatioPadRightBottom(const cv::Mat& input,size_t height, size_t width,const cv::Scalar& bgcolor) {
// Ensure input is valid
if (input.empty()) {
return cv::Mat();
}
// Calculate aspect ratio and unpadded dimensions
float r = std::min(static_cast<float>(width) / input.cols, static_cast<float>(height) / input.rows);
int unpad_w = static_cast<int>(r * input.cols);
int unpad_h = static_cast<int>(r * input.rows);
// Resize the input image
cv::Mat resized;
cv::resize(input, resized, cv::Size(unpad_w, unpad_h), 0, 0, cv::INTER_CUBIC);
// Create the output image and fill with the background color
cv::Mat output(height, width, input.type(), bgcolor);
// Copy the resized content into the top-left corner of the output image
resized.copyTo(output(cv::Rect(0, 0, resized.cols, resized.rows)));
return output;
}
ANSOYOLOV10OVOD::~ANSOYOLOV10OVOD() {
try {
if (FolderExist(_modelFolder)) {
if (!DeleteFolder(_modelFolder)) {
this->_logger.LogDebug("ANSOYOLOV10OVOD::~ANSOYOLOV10OVOD", "Failed to delete OpenVINO Models", __FILE__, __LINE__);
}
}
}
catch (std::exception& e) {
this->_logger.LogError("ANSOYOLOV10OVOD::~ANSOYOLOV10OVOD()", "Failed to release OPENVINO Models", __FILE__, __LINE__);
}
}
bool ANSOYOLOV10OVOD::Destroy() {
try {
if (FolderExist(_modelFolder)) {
if (!DeleteFolder(_modelFolder)) {
this->_logger.LogDebug("ANSOYOLOV10OVOD::Destroy", "Failed to delete OpenVINO Models", __FILE__, __LINE__);
}
}
return true;
}
catch (std::exception& e) {
this->_logger.LogError("ANSOYOLOV10OVOD::Destroy()", "Failed to release OPENVINO Models", __FILE__, __LINE__);
return false;
}
}
//private
void ANSOYOLOV10OVOD::InitialModel(const std::string& model_path)
{
if((_modelConfig.gpuOptBatchSize==1)&&(_modelConfig.gpuMaxBatchSize==1)) {
InitModelStaticBatchSize(model_path);
}
else {// Dynamic batch size is not supported in OpenVINO for Yolov10
try {
// Step 1: Initialize OpenVINO Runtime Core
int maxBatchSize = _modelConfig.gpuMaxBatchSize;
ov::Core core;
auto model = core.read_model(model_path);
// Step 2: Configure model for dynamic batch size
_logger.LogDebug("ANSOYOLOV10OVOD::InitialModel",
"Configuring model for dynamic batch (max: " + std::to_string(maxBatchSize) + ")",
__FILE__, __LINE__);
// Get input info
auto input = model->get_parameters()[0];
auto input_shape = input->get_partial_shape();
// Set batch dimension to dynamic range [1, maxBatchSize]
input_shape[0] = ov::Dimension(1, maxBatchSize);
// Reshape model for dynamic batch processing
model->reshape({ {input->get_friendly_name(), input_shape} });
// Step 3: Get Available Devices and Log
std::vector<std::string> available_devices = core.get_available_devices();
_logger.LogDebug("ANSOYOLOV10OVOD::InitialModel",
"Available devices: " + std::to_string(available_devices.size()),
__FILE__, __LINE__);
for (const auto& device : available_devices) {
_logger.LogDebug("ANSOYOLOV10OVOD::InitialModel", " - " + device, __FILE__, __LINE__);
}
// Define device priority: GPU > CPU
std::vector<std::string> priority_devices = { "GPU", "CPU" };
bool device_found = false;
std::string selected_device;
// Iterate over prioritized devices
for (const auto& device : priority_devices) {
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
selected_device = device;
// Configure device-specific properties
ov::AnyMap config;
if (device == "GPU") {
// GPU-specific optimizations for batch processing
config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
config[ov::hint::inference_precision.name()] = ov::element::f16;
config["NUM_STREAMS"] = "1"; // Single stream for batching
}
else if (device == "CPU") {
// CPU-specific optimizations for batch processing
config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
config["NUM_STREAMS"] = "1"; // Single stream for batching
}
compiled_model = core.compile_model(model, device, config);
device_found = true;
_logger.LogDebug("ANSOYOLOV10OVOD::InitialModel",
"Model compiled on device: " + device, __FILE__, __LINE__);
break;
}
}
// Fallback: Default to CPU if no devices found
if (!device_found) {
_logger.LogError("ANSOYOLOV10OVOD::InitialModel",
"No supported devices found. Falling back to CPU.", __FILE__, __LINE__);
ov::AnyMap cpu_config;
cpu_config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
compiled_model = core.compile_model(model, "CPU", cpu_config);
selected_device = "CPU";
}
// Step 4: Create Inference Request
request = compiled_model.create_infer_request();
// Set initial shape to batch size 1
request.get_input_tensor().set_shape({ 1, 3, 640, 640 });
_logger.LogDebug("ANSOYOLOV10OVOD::InitialModel",
"Model initialization complete on " + selected_device +
" with dynamic batch support (1-" + std::to_string(maxBatchSize) + ")",
__FILE__, __LINE__);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSOYOLOV10OVOD::InitialModel", e.what(), __FILE__, __LINE__);
}
}
}
void ANSOYOLOV10OVOD::InitModelStaticBatchSize(const std::string& model_path) {
try {
// Step 1: Initialize OpenVINO Runtime Core
ov::Core core;
auto model = core.read_model(model_path);
// Step 2: Get Available Devices and Log
std::vector<std::string> available_devices = core.get_available_devices();
// Yolov10 does not support NPU, so we remove it from the list
// Define device priority: GPU > CPU
std::vector<std::string> priority_devices = { "GPU","CPU" };
bool device_found = false;
// Iterate over prioritized devices
for (const auto& device : priority_devices) {
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
compiled_model = core.compile_model(_modelFilePath, device);
device_found = true;
break;
}
}
// Fallback: Default to CPU if no devices found
if (!device_found) {
_logger.LogError("OPENVINOOD::InitialModel", "No supported devices found. Falling back to CPU.", __FILE__, __LINE__);
compiled_model = core.compile_model(model, "CPU");
}
// Step 3: Create Inference Request
request = compiled_model.create_infer_request();
request.get_input_tensor().set_shape({ 1, 3, 640, 640 });
}
catch (const std::exception& e) {
// Log any errors
this->_logger.LogFatal("OPENVINOOD::InitialModel", e.what(), __FILE__, __LINE__);
}
}
void ANSOYOLOV10OVOD::Preprocessing(cv::Mat* img, int length, float* factor, std::vector<float>& data) {
try {
if (!img || img->empty()) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::Preprocessing", "Invalid or empty image", __FILE__, __LINE__);
return;
}
// Convert grayscale images to 3-channel BGR
cv::Mat processedImage;
if (img->channels() == 1) {
cv::cvtColor(*img, processedImage, cv::COLOR_GRAY2BGR);
}
else {
processedImage = *img;
}
// Convert image to RGB
cv::Mat rgbImage;
cv::cvtColor(processedImage, rgbImage, cv::COLOR_BGR2RGB);
// Determine the maximum image length
int max_image_length = std::max(rgbImage.cols, rgbImage.rows);
// Create a square image with the maximum length and fill it with white
cv::Mat max_image = cv::Mat::ones(max_image_length, max_image_length, CV_8UC3) * 255;
// Copy the original image to the top-left corner of the square image
cv::Rect roi(0, 0, rgbImage.cols, rgbImage.rows);
rgbImage.copyTo(max_image(roi));
// Resize the image to the desired length
cv::Mat resizedImage;
cv::resize(max_image, resizedImage, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_CUBIC);
// Calculate the scaling factor
*factor = static_cast<float>(max_image_length) / static_cast<float>(length);
// Convert the image to float and normalize
resizedImage.convertTo(resizedImage, CV_32FC3, 1.0 / 255.0);
// Pre-allocate the data vector
data.resize(length * length * 3);
// Split channels and copy data
std::vector<cv::Mat> channels(3);
cv::split(resizedImage, channels);
for (int i = 0; i < 3; ++i) {
std::memcpy(data.data() + i * length * length, channels[i].data, length * length * sizeof(float));
}
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::Preprocessing", e.what(), __FILE__, __LINE__);
}
}
std::vector<Object> ANSOYOLOV10OVOD::PostProcessing(float* result, float factor, int outputLength, const std::string& camera_id) {
std::vector<Object> objects;
try {
std::vector<cv::Rect> position_boxes;
std::vector<int> class_ids;
std::vector<float> confidences;
// Reserve memory to avoid multiple reallocations
position_boxes.reserve(outputLength);
class_ids.reserve(outputLength);
confidences.reserve(outputLength);
for (int i = 0; i < outputLength; ++i) {
int s = 6 * i;
float confidence = result[s + 4];
if (confidence > _modelConfig.detectionScoreThreshold) {
float cx = result[s + 0];
float cy = result[s + 1];
float dx = result[s + 2];
float dy = result[s + 3];
int x = static_cast<int>(cx * factor);
int y = static_cast<int>(cy * factor);
int width = static_cast<int>((dx - cx) * factor);
int height = static_cast<int>((dy - cy) * factor);
x = std::max(0, x);
y = std::max(0, y);
width = std::min(static_cast<int>(m_imgWidth) - x, width);
height = std::min(static_cast<int>(m_imgHeight) - y, height);
cv::Rect box(x, y, width, height);
position_boxes.emplace_back(box);
class_ids.emplace_back(static_cast<int>(result[s + 5]));
confidences.emplace_back(confidence);
}
}
int classNameSize = _classes.size();
objects.reserve(position_boxes.size()); // Reserve memory for objects
for (size_t i = 0; i < position_boxes.size(); ++i) {
Object obj;
obj.classId = class_ids[i];
if (!_classes.empty()) {
if (obj.classId < classNameSize) {
obj.className = _classes[obj.classId];
}
else {
obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
}
}
else {
obj.className = "Unknown"; // Fallback if _classes is empty
}
obj.confidence = confidences[i];
obj.box = position_boxes[i];
obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, m_imgWidth, m_imgHeight);
obj.cameraId = camera_id;
objects.emplace_back(std::move(obj));
}
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSOYOLOV10OVOD::PostProcessing", e.what(), __FILE__, __LINE__);
}
//EnqueueDetection(objects,camera_id);
return objects;
}
void ANSOYOLOV10OVOD::PreprocessingBatch(const std::vector<cv::Mat>& images,
int length,
std::vector<float>& factors,
std::vector<float>& data) {
try {
if (images.empty()) {
_logger.LogFatal("ANSOYOLOV10OVOD::PreprocessingBatch", "Empty images vector", __FILE__, __LINE__);
return;
}
int batchSize = static_cast<int>(images.size());
// Store original image dimensions for each image in batch
m_batchImgHeights.resize(batchSize);
m_batchImgWidths.resize(batchSize);
factors.resize(batchSize);
// Pre-allocate data vector for entire batch: [batch, channels, height, width]
data.resize(batchSize * 3 * length * length);
// Process each image in the batch
for (int b = 0; b < batchSize; ++b) {
const cv::Mat& img = images[b];
if (img.empty()) {
_logger.LogFatal("ANSOYOLOV10OVOD::PreprocessingBatch",
"Empty image at index " + std::to_string(b), __FILE__, __LINE__);
return;
}
// Convert grayscale images to 3-channel BGR
cv::Mat processedImage;
if (img.channels() == 1) {
cv::cvtColor(img, processedImage, cv::COLOR_GRAY2BGR);
}
else {
processedImage = img;
}
// Store original dimensions
m_batchImgWidths[b] = processedImage.cols;
m_batchImgHeights[b] = processedImage.rows;
// Convert image to RGB
cv::Mat rgbImage;
cv::cvtColor(processedImage, rgbImage, cv::COLOR_BGR2RGB);
// Determine the maximum image length
int max_image_length = std::max(rgbImage.cols, rgbImage.rows);
// Create a square image with the maximum length and fill it with white
cv::Mat max_image = cv::Mat::ones(max_image_length, max_image_length, CV_8UC3) * 255;
// Copy the original image to the top-left corner of the square image
cv::Rect roi(0, 0, rgbImage.cols, rgbImage.rows);
rgbImage.copyTo(max_image(roi));
// Resize the image to the desired length
cv::Mat resizedImage;
cv::resize(max_image, resizedImage, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_CUBIC);
// Calculate the scaling factor for this image
factors[b] = static_cast<float>(max_image_length) / static_cast<float>(length);
// Convert the image to float and normalize
resizedImage.convertTo(resizedImage, CV_32FC3, 1.0 / 255.0);
// Split channels and copy data to correct batch position
std::vector<cv::Mat> channels(3);
cv::split(resizedImage, channels);
// Calculate offset for this batch element
int batch_offset = b * 3 * length * length;
for (int c = 0; c < 3; ++c) {
std::memcpy(data.data() + batch_offset + c * length * length,
channels[c].data,
length * length * sizeof(float));
}
}
_logger.LogDebug("ANSOYOLOV10OVOD::PreprocessingBatch",
"Preprocessed batch of " + std::to_string(batchSize) + " images",
__FILE__, __LINE__);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSOYOLOV10OVOD::PreprocessingBatch", e.what(), __FILE__, __LINE__);
}
}
std::vector<Object> ANSOYOLOV10OVOD::PostProcessingBatch(float* result,
const std::vector<float>& factors,
int outputLength,
int batchSize,
const std::string& camera_id) {
std::vector<Object> allObjects;
try {
// Process each image's results in the batch
for (int b = 0; b < batchSize; ++b) {
std::vector<cv::Rect> position_boxes;
std::vector<int> class_ids;
std::vector<float> confidences;
// Reserve memory to avoid multiple reallocations
position_boxes.reserve(outputLength);
class_ids.reserve(outputLength);
confidences.reserve(outputLength);
// Get factor and dimensions for this specific image
float factor = factors[b];
int imgWidth = m_batchImgWidths[b];
int imgHeight = m_batchImgHeights[b];
// Calculate offset for this batch element's output
// Output format: [batch, num_detections, 6] where 6 = [x1, y1, x2, y2, conf, class]
int batch_offset = b * outputLength * 6;
for (int i = 0; i < outputLength; ++i) {
int s = batch_offset + 6 * i;
float confidence = result[s + 4];
if (confidence > _modelConfig.detectionScoreThreshold) {
float cx = result[s + 0];
float cy = result[s + 1];
float dx = result[s + 2];
float dy = result[s + 3];
int x = static_cast<int>(cx * factor);
int y = static_cast<int>(cy * factor);
int width = static_cast<int>((dx - cx) * factor);
int height = static_cast<int>((dy - cy) * factor);
// Clamp to image boundaries using batch-specific dimensions
x = std::max(0, x);
y = std::max(0, y);
width = std::min(imgWidth - x, width);
height = std::min(imgHeight - y, height);
cv::Rect box(x, y, width, height);
position_boxes.emplace_back(box);
class_ids.emplace_back(static_cast<int>(result[s + 5]));
confidences.emplace_back(confidence);
}
}
// Convert to Object format
int classNameSize = static_cast<int>(_classes.size());
for (size_t i = 0; i < position_boxes.size(); ++i) {
Object obj;
obj.classId = class_ids[i];
if (!_classes.empty()) {
if (obj.classId < classNameSize) {
obj.className = _classes[obj.classId];
}
else {
obj.className = _classes[classNameSize - 1];
}
}
else {
obj.className = "Unknown";
}
obj.confidence = confidences[i];
obj.box = position_boxes[i];
obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, imgWidth, imgHeight);
obj.cameraId = camera_id;
allObjects.emplace_back(std::move(obj));
}
}
_logger.LogDebug("ANSOYOLOV10OVOD::PostProcessingBatch",
"Batch post-processing complete. Total detections: " + std::to_string(allObjects.size()),
__FILE__, __LINE__);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSOYOLOV10OVOD::PostProcessingBatch", e.what(), __FILE__, __LINE__);
}
return allObjects;
}
std::vector<Object> ANSOYOLOV10OVOD::DetectObjectsBatch(const std::vector<cv::Mat>& inputImages,
const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<Object> allObjects;
try {
if (inputImages.empty()) {
_logger.LogFatal("ANSOYOLOV10OVOD::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);
return allObjects;
}
int batchSize = static_cast<int>(inputImages.size());
// Validate batch size
if (batchSize > _modelConfig.gpuMaxBatchSize) {
_logger.LogError("ANSOYOLOV10OVOD::DetectObjectsBatch",
"Batch size " + std::to_string(batchSize) +
" exceeds configured maximum " + std::to_string(_modelConfig.gpuMaxBatchSize),
__FILE__, __LINE__);
return allObjects;
}
_logger.LogDebug("ANSOYOLOV10OVOD::DetectObjectsBatch",
"Processing batch of " + std::to_string(batchSize) + " images",
__FILE__, __LINE__);
// Preprocess all images
std::vector<float> factors;
std::vector<float> inputData;
PreprocessingBatch(inputImages, 640, factors, inputData);
if (inputData.empty()) {
_logger.LogFatal("ANSOYOLOV10OVOD::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__);
return allObjects;
}
// Update input tensor shape if batch size changed
auto current_shape = request.get_input_tensor().get_shape();
if (current_shape[0] != static_cast<size_t>(batchSize)) {
request.get_input_tensor().set_shape({
static_cast<size_t>(batchSize),
3,
640,
640
});
_logger.LogDebug("ANSOYOLOV10OVOD::DetectObjectsBatch",
"Input tensor reshaped to [" + std::to_string(batchSize) + ", 3, 640, 640]",
__FILE__, __LINE__);
}
// Copy input data to tensor
auto input_tensor = request.get_input_tensor();
std::memcpy(input_tensor.data<float>(), inputData.data(), inputData.size() * sizeof(float));
// Run inference
request.infer();
// Get output tensor
auto output_tensor = request.get_output_tensor();
float* output_data = output_tensor.data<float>();
// Get output shape to determine number of detections
auto output_shape = output_tensor.get_shape();
int outputLength = static_cast<int>(output_shape[1]); // Number of detections per image
// Post-process results
allObjects = PostProcessingBatch(output_data, factors, outputLength, batchSize, camera_id);
_logger.LogDebug("ANSOYOLOV10OVOD::DetectObjectsBatch",
"Batch processing complete. Total detections: " + std::to_string(allObjects.size()),
__FILE__, __LINE__);
return allObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSOYOLOV10OVOD::DetectObjectsBatch", e.what(), __FILE__, __LINE__);
return allObjects;
}
}
}
//std::vector<Object> ANSOYOLOV10OVOD::RunInference(const cv::Mat& input, const std::string& camera_id) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<Object> output;
// // Validate license and initialization status
// if (!_licenseValid) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Invalid License", __FILE__, __LINE__);
// return output;
// }
// if (!_isInitialized) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Model is not initialized", __FILE__, __LINE__);
// return output;
// }
// try {
// // Validate input
// if (input.empty()) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Input frame is empty", __FILE__, __LINE__);
// return output;
// }
// if ((input.cols < 10) || (input.rows < 10)) return output;
// m_imgWidth = static_cast<float>(input.cols);
// m_imgHeight = static_cast<float>(input.rows);
// float factor = 0.0f;
// int imageSize = 640;
// cv::Mat frame = input;// input.clone(); // Clone the input to avoid modifying the original
// //int maxImageSize = std::max(frame.cols, frame.rows);
// //if (maxImageSize < imageSize)imageSize = maxImageSize;
// std::vector<float> inputData(imageSize * imageSize * 3);
// // Preprocess the input frame
// Preprocessing(&frame, imageSize, &factor, inputData);
// // Ensure input tensor is valid
// auto inputTensor = request.get_input_tensor();
// if (!inputTensor) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Failed to retrieve input tensor", __FILE__, __LINE__);
// frame.release();
// return output;
// }
// if (!inputTensor.data<float>()) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Input tensor data pointer is null", __FILE__, __LINE__);
// frame.release();
// return output;
// }
// // Ensure inputData is not empty before performing memcpy
// if (inputData.empty()) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Input data is empty", __FILE__, __LINE__);
// frame.release();
// return output;
// }
// // Copy input data into the tensor
// std::memcpy(inputTensor.data<float>(), inputData.data(), inputData.size() * sizeof(float));
// // Run inference
// if (!request) {
// request = compiled_model.create_infer_request();
// request.get_input_tensor().set_shape({ 1, 3, unsigned long(imageSize), unsigned long(imageSize) });
// }
// request.infer();
// // Retrieve output tensor
// auto outputTensor = request.get_output_tensor();
// if (!outputTensor) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Failed to retrieve output tensor", __FILE__, __LINE__);
// frame.release();
// return output;
// }
// // Get output data
// float* output_data = outputTensor.data<float>();
// if (!output_data) {
// _logger.LogError("ANSOYOLOV10OVOD::RunInference", "Output tensor data pointer is null", __FILE__, __LINE__);
// frame.release();
// return output;
// }
// // Post-process the output data
// output = PostProcessing(output_data, factor, 300, camera_id);
// frame.release();
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSOYOLOV10OVOD::RunInference", e.what(), __FILE__, __LINE__);
// }
// catch (...) {
// _logger.LogFatal("ANSOYOLOV10OVOD::RunInference", "An unknown error occurred", __FILE__, __LINE__);
// }
// return output;
//}