Files
ANSCORE/modules/ANSODEngine/ANSODEngine.cpp

2229 lines
82 KiB
C++

#pragma once
#include <set>
#include <cmath>
#include <json.hpp>
#include "ANSODEngine.h"
#include "ANSLicense.h" // ANS_DBG macro
#include "ANSYOLOOD.h"
#include "ANSTENSORRTOD.h"
#include "ANSTENSORRTCL.h"
#include "ANSOPENVINOCL.h"
#include "ANSOPENVINOOD.h"
#include "ANSYOLOV10RTOD.h"
#include "ANSYOLOV10OVOD.h"
#include "ANSCUSTOMDetector.h"
#include "ANSFD.h"
#include "ANSANOMALIB.h"
#include "ANSPOSE.h"
#include "ANSSAM.h"
#include <pipelines/metadata.h>
#include <models/input_data.h>
#include "utils/visualizer.hpp"
#include <turbojpeg.h>
#include <vector>
#include <map>
#include <string>
static bool ansodLicenceValid = false;
// Global once_flag to protect license checking
static std::once_flag ansodLicenseOnceFlag;
namespace ANSCENTER
{
//--------------------------------------------------------------------------------------------------------------------------------
// Base class
//--------------------------------------------------------------------------------------------------------------------------------
static void VerifyGlobalANSODLicense(const std::string& licenseKey) {
try {
ansodLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1002, "ODHUB-LV");//Default productId=1002 (ODHUB)
if (!ansodLicenceValid) { // we also support ANSTS license
ansodLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1003, "ANSVIS");//Default productId=1003 (ANSVIS)
}
if (!ansodLicenceValid) { // we also support ANSTS license
ansodLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1008, "ANSTS");//Default productId=1008 (ANSTS)
}
}
catch (std::exception& e) {
ansodLicenceValid = false;
}
}
void ANSODBase::CheckLicense() {
try {
// Check once globally
std::call_once(ansodLicenseOnceFlag, [this]() {
VerifyGlobalANSODLicense(_licenseKey);
});
// Update this instance's local license flag
_licenseValid = ansodLicenceValid;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSODBase::CheckLicense. Error:", e.what(), __FILE__, __LINE__);
}
}
bool ANSODBase::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)
{
try {
ANSCENTER::ANSLibsLoader::Initialize();
_modelFolder = "";
_modelConfigFile = "";
_modelFolder.clear();
_modelConfigFile.clear();
// 0. Check if the modelZipFilePath exist?
if (!FileExist(modelZipFilePath)) {
if (modelZipFilePath.empty()) {// Support motion detector that does not need model file
_isInitialized = true;
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
else {
this->_logger.LogFatal("ANSODBase::LoadModel", "Model zip file is not exist", __FILE__, __LINE__);
return false;
}
}
// 1. Unzip model zip file to a special location with folder name as model file (and version)
std::string outputFolder;
std::vector<std::string> passwordArray;
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
passwordArray.push_back("AnsCustomModels20@$");
passwordArray.push_back("AnsDemoModels20@!");
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
//this->_logger.LogDebug("ANSODBase::LoadModel. Model name", modelName, __FILE__, __LINE__);
size_t vectorSize = passwordArray.size();
for (size_t i = 0; i < vectorSize; i++) {
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
break; // Break the loop when the condition is met.
}
// 2. Check if the outputFolder exist
if (!std::filesystem::exists(_modelFolder)) {
this->_logger.LogError("ANSODBase::LoadModel. Output model folder is not exist", _modelFolder, __FILE__, __LINE__);
return false; // That means the model file is not exist or the password is not correct
}
// 3. Check if the model has the configuration file
std::string modelConfigName = "model_config.json";
_modelConfigFile = CreateFilePath(_modelFolder, modelConfigName);
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
try {
ANSCENTER::ANSLibsLoader::Initialize();
_licenseKey = licenseKey;
_licenseValid = false;
_modelFolder = "";
_modelConfigFile = "";
_modelFolder.clear();
_modelConfigFile.clear();
_modelConfig = modelConfig;
CheckLicense();
if (!_licenseValid) {
this->_logger.LogError("ANSODBase::Initialize", "Invalid License", __FILE__, __LINE__);
return false;
}
// 0. Check if the modelZipFilePath exist?
if (!FileExist(modelZipFilePath)) {
if (modelZipFilePath.empty()) {// Support motion detector that does not need model file
_isInitialized = true;
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
else {
this->_logger.LogFatal("ANSODBase::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
return false;
}
}
// 1. Unzip model zip file to a special location with folder name as model file (and version)
std::string outputFolder;
std::vector<std::string> passwordArray;
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
passwordArray.push_back("AnsCustomModels20@$");
passwordArray.push_back("AnsDemoModels20@!");
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
//this->_logger.LogDebug("ANSODBase::Initialize. Model name", modelName, __FILE__, __LINE__);
size_t vectorSize = passwordArray.size();
for (size_t i = 0; i < vectorSize; i++) {
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
break; // Break the loop when the condition is met.
}
// 2. Check if the outputFolder exist
if (!std::filesystem::exists(_modelFolder)) {
this->_logger.LogError("ANSODBase::Initialize. Output model folder is not exist", _modelFolder, __FILE__, __LINE__);
return false; // That means the model file is not exist or the password is not correct
}
// 3. Check if the model has the configuration file
std::string modelConfigName = "model_config.json";
_modelConfigFile = CreateFilePath(_modelFolder, modelConfigName);
_isInitialized = true;
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
try {
// Call the library loader's optimization function
ANSCENTER::ANSLibsLoader::Initialize();
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::OptimizeModel", e.what(), __FILE__, __LINE__);
return false;
}
}
void ANSODBase::LoadClassesFromString() {
try {
std::string regex_str = ";";
auto tokens = Split(_classFilePath, regex_str);
_classes.clear();
for (auto& item : tokens) {
_classes.push_back(item);
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::LoadClassesFromString()", e.what(), __FILE__, __LINE__);
}
}
void ANSODBase::LoadClassesFromFile() {
try {
std::ifstream inputFile(_classFilePath);
if (inputFile.is_open())
{
_classes.clear();
std::string classLine;
while (std::getline(inputFile, classLine))
_classes.push_back(classLine);
inputFile.close();
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::LoadClassesFromFile()", e.what(), __FILE__, __LINE__);
}
}
bool ANSODBase::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
ANSCENTER::ANSLibsLoader::Initialize();
_licenseKey = licenseKey;
_licenseValid = false;
_modelFolder = modelFolder;
_modelConfigFile = "";
_modelConfigFile.clear();
_modelConfig = modelConfig;
// 0. Check license
CheckLicense();
if (!_licenseValid) {
this->_logger.LogError("LoadModelFromFolder::Initialize", "Invalid License", __FILE__, __LINE__);
return false;
}
// 1. Check if the modelFolder exist?
if (!FolderExist(modelFolder)) {
if (modelFolder.empty()) {// Support motion detector that does not need model file
_isInitialized = true;
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
else {
this->_logger.LogFatal("LoadModelFromFolder::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
return false;
}
}
// 3. Check if the model has the configuration file
std::string modelConfigName = "model_config.json";
_modelConfigFile = CreateFilePath(_modelFolder, modelConfigName);
_isInitialized = true;
// Auto-enable tracker + stabilization by default (BYTETRACK)
SetTracker(TrackerType::BYTETRACK, true);
return true;
}
bool ANSODBase::isSimilarObject(const Object& obj1, const Object& obj2) {
try {
// Compare based on classId and className
if (obj1.classId != obj2.classId ||
obj1.className != obj2.className ||
obj1.cameraId != obj2.cameraId) {
return false;
}
// Define thresholds for similarity
const double positionThreshold = 50.0;
const double confidenceThreshold = 0.2;
const double sizeThreshold = 0.2; // Allows a 20% size difference
// Check if centers are within positionThreshold
cv::Point2f center1 = (obj1.box.tl() + obj1.box.br()) * 0.5;
cv::Point2f center2 = (obj2.box.tl() + obj2.box.br()) * 0.5;
if (cv::norm(center1 - center2) >= positionThreshold) {
return false;
}
// Check if bounding box sizes are similar within the size threshold
if (obj2.box.width == 0 || obj2.box.height == 0) return false;
double widthRatio = static_cast<double>(obj1.box.width) / obj2.box.width;
double heightRatio = static_cast<double>(obj1.box.height) / obj2.box.height;
if (std::abs(1.0 - widthRatio) > sizeThreshold || std::abs(1.0 - heightRatio) > sizeThreshold) {
return false;
}
// Check if confidence scores are within an acceptable range
if (std::abs(obj1.confidence - obj2.confidence) >= confidenceThreshold) {
return false;
}
// If all checks pass, the objects are considered similar
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::isSimilarObject()", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::isOverlayObject(const Object& obj1, const Object& obj2) {
try {
// Compare based on classId and className
if (obj1.classId != obj2.classId ||
obj1.className != obj2.className ||
obj1.cameraId != obj2.cameraId) {
return false;
}
//// Define thresholds for similarity
//const double positionThreshold = 50.0;
//// Check if centers are within positionThreshold
//cv::Point2f center1 = (obj1.box.tl() + obj1.box.br()) * 0.5;
//cv::Point2f center2 = (obj2.box.tl() + obj2.box.br()) * 0.5;
//if (cv::norm(center1 - center2) >= positionThreshold) {
// return false;
//}
// Check if bounding boxes overlap
if ((obj1.box & obj2.box).area() == 0) {
return false; // No overlap between the bounding boxes
}
// If all checks pass, the objects are considered similar
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::isOverlayObject()", e.what(), __FILE__, __LINE__);
return false;
}
}
void ANSODBase::EnqueueDetection(const std::vector<Object>& detectedObjects, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
CameraData& camera = GetCameraData(cameraId);// Retrieve camera data
if (camera._detectionQueue.size() == QUEUE_SIZE) {
camera._detectionQueue.pop_front(); // Remove the oldest element if the queue is full
}
camera._detectionQueue.push_back(detectedObjects); // Add the new detection, even if empty
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::EnqueueDetection()", e.what(), __FILE__, __LINE__);
}
}
std::deque<std::vector<Object>>ANSODBase::DequeueDetection(const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
CameraData& camera = GetCameraData(cameraId);// Retrieve camera data
return camera._detectionQueue;
}
std::string ANSODBase::GetOpenVINODevice(ov::Core& core) {
try {
std::vector<std::string> available_devices = core.get_available_devices();
bool device_found = false;
std::string deviceName = "CPU";
// Search for NPU
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
if (it != available_devices.end()) {
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
deviceName = "AUTO:NPU,GPU";
device_found = true;
return deviceName;
}
// If NPU not found, search for GPU
if (!device_found) {
it = std::find(available_devices.begin(), available_devices.end(), "GPU");
if (it != available_devices.end()) {
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
deviceName = "GPU";
device_found = true;
return deviceName;
}
}
// If GPU not found, search for GPU.0
if (!device_found) {
it = std::find(available_devices.begin(), available_devices.end(), "GPU.0");
if (it != available_devices.end()) {
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
deviceName = "GPU";
device_found = true;
return deviceName;
}
}
// If neither NPU nor GPU found, default to CPU
if (!device_found) {
core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
deviceName = "CPU";
return deviceName;
}
return deviceName;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::GetOpenVINODevice()", e.what(), __FILE__, __LINE__);
core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
return "CPU";
}
}
// ── Multi-object tracker (MOT) ──────────────────────────────────────
bool ANSODBase::SetTracker(TrackerType trackerType, bool enabled) {
try {
std::lock_guard<std::recursive_mutex> lock(_mutex);
// If type changed, release all per-camera trackers so they'll be
// lazily recreated with the new type on next ApplyTracking call.
if (_trackerType != trackerType) {
for (auto& [key, cam] : _cameras) {
if (cam._tracker) {
ReleaseANSMOTHandle(&cam._tracker);
cam._tracker = nullptr;
}
}
}
_trackerEnabled = enabled;
_trackerType = trackerType;
if (!enabled) return true;
// Store config so per-camera trackers can be created lazily
_trackerMotType = 1; // default BYTETRACK
_trackerParams = R"({"parameters":{"frame_rate":"15","track_buffer":"300","track_threshold":"0.500000","high_threshold":"0.600000","match_thresold":"0.980000"}})";
switch (trackerType) {
case TrackerType::BYTETRACK: {
_trackerMotType = 1;
break;
}
case TrackerType::UCMC: {
_trackerMotType = 3;
break;
}
case TrackerType::OCSORT: {
_trackerMotType = 4;
_trackerParams = R"({"parameters":{"det_thresh":"0.000000","max_age" : "30","min_hits" : "3","iou_threshold" : "0.300000","delta_t" : "3","asso_func" : "0","inertia":"3.000000","use_byte":"0"}})";
break;
}
default: {
_trackerMotType = 1;
break;
}
}
// Auto-enable stabilization with defaults when tracker is enabled
if (!_stabilizationEnabled) {
_stabilizationEnabled = true;
}
return true;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::SetTracker", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::SetStabilization(bool enabled, int historySize, int maxMisses) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (enabled) {
// Auto-enable tracker if not already enabled
if (!_trackerEnabled) {
if (!SetTracker(_trackerType, true)) {
_logger.LogError("ANSODBase::SetStabilization",
"Failed to auto-enable tracker for stabilization", __FILE__, __LINE__);
return false;
}
}
_stabilizationEnabled = true;
_stabilizationQueueSize = static_cast<size_t>(std::max(historySize, 5));
_stabilizationMaxConsecutiveMisses = std::max(maxMisses, 1);
QUEUE_SIZE = _stabilizationQueueSize;
}
else {
_stabilizationEnabled = false;
}
return true;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::SetStabilization", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::SetStabilizationParameters(const std::string& jsonParams) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
auto j = nlohmann::json::parse(jsonParams);
if (j.contains("hysteresis_enter"))
_hysteresisEnterThreshold = j["hysteresis_enter"].get<float>();
if (j.contains("hysteresis_keep"))
_hysteresisKeepThreshold = j["hysteresis_keep"].get<float>();
if (j.contains("ema_alpha"))
_emaAlpha = std::clamp(j["ema_alpha"].get<float>(), 0.01f, 1.0f);
if (j.contains("track_boost_min_frames"))
_trackBoostMinFrames = std::max(j["track_boost_min_frames"].get<int>(), 1);
if (j.contains("track_boost_amount"))
_trackBoostAmount = std::clamp(j["track_boost_amount"].get<float>(), 0.0f, 0.5f);
if (j.contains("class_consistency_frames"))
_classConsistencyMinFrames = std::max(j["class_consistency_frames"].get<int>(), 1);
if (j.contains("confidence_decay"))
_stabilizationConfidenceDecay = std::clamp(j["confidence_decay"].get<float>(), 0.1f, 1.0f);
if (j.contains("min_confidence"))
_stabilizationMinConfidence = std::clamp(j["min_confidence"].get<float>(), 0.0f, 1.0f);
return true;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::SetStabilizationParameters", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::SetTrackerParameters(const std::string& jsonParams) {
try {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_trackerEnabled) return false;
// Store params for future per-camera tracker creation
_trackerParams = jsonParams;
// Apply to all existing per-camera trackers
for (auto& [key, cam] : _cameras) {
if (cam._tracker) {
cam._tracker->UpdateParameters(jsonParams);
}
}
return true;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::SetTrackerParameters", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<Object> ANSODBase::ApplyTracking(std::vector<Object>& detections, const std::string& camera_id) {
if (!_trackerEnabled) return detections;
if (detections.empty() && !_stabilizationEnabled) return detections;
// Lock: protects lazy-creation and same-camera concurrent calls.
// Different cameras use separate tracker instances, so contention is
// limited to same-camera threads (which must be serialized anyway).
std::lock_guard<std::recursive_mutex> trackLock(_mutex);
try {
// Get or lazily create per-camera tracker
CameraData& camera = GetCameraData(camera_id);
if (!camera._tracker) {
int result = CreateANSMOTHandle(&camera._tracker, _trackerMotType);
if (result == 0 || !camera._tracker) {
_logger.LogError("ANSODBase::ApplyTracking",
"Failed to create per-camera tracker for " + camera_id, __FILE__, __LINE__);
return detections;
}
if (!_trackerParams.empty()) {
camera._tracker->UpdateParameters(_trackerParams);
}
}
// Step 1: Convert Object[] → TrackerObject[]
std::vector<TrackerObject> trackerInput;
trackerInput.reserve(detections.size());
for (const auto& obj : detections) {
TrackerObject tobj{};
tobj.track_id = 0;
tobj.class_id = obj.classId;
tobj.prob = obj.confidence;
tobj.x = static_cast<float>(obj.box.x);
tobj.y = static_cast<float>(obj.box.y);
tobj.width = static_cast<float>(obj.box.width);
tobj.height = static_cast<float>(obj.box.height);
tobj.left = tobj.x;
tobj.top = tobj.y;
tobj.right = tobj.x + tobj.width;
tobj.bottom = tobj.y + tobj.height;
tobj.object_id = camera_id;
trackerInput.push_back(tobj);
}
// Step 2: Run per-camera tracker
auto trackerOutput = camera._tracker->UpdateTracker(0, trackerInput);
// Step 3: IoU-match tracked objects back to detections, assign trackId
for (const auto& tobj : trackerOutput) {
cv::Rect trkBox(static_cast<int>(tobj.x), static_cast<int>(tobj.y),
static_cast<int>(tobj.width), static_cast<int>(tobj.height));
float bestIoU = 0.f;
int bestIdx = -1;
for (int i = 0; i < static_cast<int>(detections.size()); ++i) {
cv::Rect inter = trkBox & detections[i].box;
float interArea = static_cast<float>(inter.area());
float unionArea = static_cast<float>(trkBox.area() + detections[i].box.area()) - interArea;
float iou = (unionArea > 0.f) ? interArea / unionArea : 0.f;
if (iou > bestIoU) {
bestIoU = iou;
bestIdx = i;
}
}
if (bestIdx >= 0 && bestIoU > 0.3f) {
detections[bestIdx].trackId = tobj.track_id;
}
}
// Step 4: If stabilization enabled, inject ghost objects for unmatched tracker predictions
if (_stabilizationEnabled) {
// Collect trackIds already assigned to real detections
std::set<int> matchedTrackIds;
for (const auto& det : detections) {
if (det.trackId > 0) matchedTrackIds.insert(det.trackId);
}
// Tracker-predicted objects not matched to any detection are "ghosts"
for (const auto& tobj : trackerOutput) {
if (tobj.track_id <= 0) continue;
if (matchedTrackIds.count(tobj.track_id) > 0) continue;
Object ghost;
ghost.trackId = tobj.track_id;
ghost.classId = tobj.class_id;
ghost.confidence = tobj.prob;
ghost.box = cv::Rect(
static_cast<int>(tobj.x), static_cast<int>(tobj.y),
static_cast<int>(tobj.width), static_cast<int>(tobj.height));
ghost.cameraId = camera_id;
TagStabilized(ghost.extraInfo);
detections.push_back(ghost);
}
}
return detections;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::ApplyTracking", e.what(), __FILE__, __LINE__);
return detections;
}
}
// ── extraInfo stabilization tag helpers ─────────────────────────────────
void ANSODBase::TagStabilized(std::string& extraInfo) {
if (IsTaggedStabilized(extraInfo)) return;
if (extraInfo.empty())
extraInfo = "stabilized";
else
extraInfo += ";stabilized";
}
void ANSODBase::UntagStabilized(std::string& extraInfo) {
// Remove ";stabilized" or standalone "stabilized"
std::string::size_type pos = extraInfo.find(";stabilized");
if (pos != std::string::npos) {
extraInfo.erase(pos, 11); // remove ";stabilized"
return;
}
pos = extraInfo.find("stabilized;");
if (pos != std::string::npos) {
extraInfo.erase(pos, 11); // remove "stabilized;"
return;
}
if (extraInfo == "stabilized") {
extraInfo.clear();
}
}
bool ANSODBase::IsTaggedStabilized(const std::string& extraInfo) {
return extraInfo.find("stabilized") != std::string::npos;
}
// ── Detection stabilization ─────────────────────────────────────────────
// Implements:
// #1 Confidence hysteresis — enter vs keep thresholds
// #2 EMA confidence smoothing — temporal averaging per track
// #5 Track-aware boost — established tracks get confidence bonus
// Ghost interpolation — keeps objects alive through brief misses
std::vector<Object> ANSODBase::StabilizeDetections(
std::vector<Object>& detections, const std::string& camera_id)
{
// _cameras map and per-camera state are not thread-safe.
std::lock_guard<std::recursive_mutex> stabLock(_mutex);
try {
CameraData& camera = GetCameraData(camera_id);
camera._stabilizationFrameCounter++;
auto& tracks = camera._trackHistories;
std::set<int> seenTrackIds;
// ── Resolve hysteresis thresholds (auto from model config if 0) ──
const float enterThreshold = (_hysteresisEnterThreshold > 0.f)
? _hysteresisEnterThreshold
: _modelConfig.detectionScoreThreshold;
const float keepThreshold = (_hysteresisKeepThreshold > 0.f)
? _hysteresisKeepThreshold
: enterThreshold * 0.65f;
// Phase 1: Update histories, apply EMA and boost for real detections
for (auto& obj : detections) {
if (obj.trackId <= 0) continue;
seenTrackIds.insert(obj.trackId);
bool isGhost = IsTaggedStabilized(obj.extraInfo);
auto it = tracks.find(obj.trackId);
if (it == tracks.end()) {
// ── New track ──
CameraData::TrackedObjectHistory hist;
hist.classId = obj.classId;
hist.className = obj.className;
hist.extraInfo = isGhost ? "" : obj.extraInfo;
hist.lastBox = obj.box;
hist.lastConfidence = obj.confidence;
hist.smoothedConfidence = obj.confidence; // #2 EMA init
hist.consecutiveMisses = isGhost ? 1 : 0;
hist.totalDetections = isGhost ? 0 : 1;
hist.frameFirstSeen = camera._stabilizationFrameCounter;
hist.isEstablished = false;
tracks[obj.trackId] = hist;
}
else {
auto& hist = it->second;
if (isGhost) {
// Ghost: increment misses, apply confidence decay from smoothed
hist.consecutiveMisses++;
obj.confidence = hist.smoothedConfidence *
std::pow(_stabilizationConfidenceDecay,
static_cast<float>(hist.consecutiveMisses));
obj.className = hist.className;
obj.extraInfo = hist.extraInfo;
TagStabilized(obj.extraInfo);
hist.lastBox = obj.box;
}
else {
// ── Real detection ──
hist.consecutiveMisses = 0;
hist.totalDetections++;
hist.lastBox = obj.box;
hist.extraInfo = obj.extraInfo;
UntagStabilized(obj.extraInfo);
// #7 Class consistency: resist sudden class switches
// on established tracks. Require N consecutive frames
// of the new class before accepting the change.
if (hist.isEstablished && obj.classId != hist.classId) {
// Class differs from established track
if (obj.classId == hist.pendingClassId) {
hist.pendingClassStreak++;
}
else {
// Different candidate — reset streak
hist.pendingClassId = obj.classId;
hist.pendingClassName = obj.className;
hist.pendingClassStreak = 1;
}
if (hist.pendingClassStreak >= _classConsistencyMinFrames) {
// Confirmed class change: accept it
hist.classId = obj.classId;
hist.className = obj.className;
hist.pendingClassId = -1;
hist.pendingClassStreak = 0;
}
else {
// Not confirmed yet: override output with established class
obj.classId = hist.classId;
obj.className = hist.className;
}
}
else {
// Same class as history or track not yet established: accept directly
hist.classId = obj.classId;
hist.className = obj.className;
hist.pendingClassId = -1;
hist.pendingClassStreak = 0;
}
// #2 EMA confidence smoothing
hist.smoothedConfidence = _emaAlpha * obj.confidence
+ (1.0f - _emaAlpha) * hist.smoothedConfidence;
hist.lastConfidence = obj.confidence;
// #5 Track-aware boost for established tracks
if (hist.totalDetections >= _trackBoostMinFrames) {
hist.isEstablished = true;
}
// Publish the EMA-smoothed confidence as the output
// (smoothed is more stable than raw single-frame value)
obj.confidence = hist.smoothedConfidence;
// Apply boost on top of smoothed confidence for established tracks
if (hist.isEstablished) {
obj.confidence = std::min(1.0f,
obj.confidence + _trackBoostAmount);
}
}
}
}
// Phase 2: Hysteresis + ghost pruning
// - New objects must exceed enterThreshold to appear
// - Established tracked objects use the lower keepThreshold
// - Ghosts are pruned by max misses, min confidence, and detection count
detections.erase(
std::remove_if(detections.begin(), detections.end(),
[&](const Object& obj) {
if (obj.trackId <= 0) return false;
bool isGhostObj = IsTaggedStabilized(obj.extraInfo);
auto it = tracks.find(obj.trackId);
if (isGhostObj) {
// Ghost pruning
if (it == tracks.end()) return true;
if (it->second.consecutiveMisses > _stabilizationMaxConsecutiveMisses) return true;
if (obj.confidence < _stabilizationMinConfidence) return true;
if (it->second.totalDetections < 3) return true;
return false;
}
else {
// #1 Hysteresis: apply different thresholds
if (it != tracks.end() && it->second.isEstablished) {
// Established track: use lower keep threshold
return obj.confidence < keepThreshold;
}
else {
// New/young track: must meet enter threshold
return obj.confidence < enterThreshold;
}
}
}),
detections.end());
// Phase 3: Prune stale track histories (not seen for 2x maxMisses)
for (auto it = tracks.begin(); it != tracks.end(); ) {
if (seenTrackIds.count(it->first) == 0) {
it->second.consecutiveMisses++;
}
if (it->second.consecutiveMisses > _stabilizationMaxConsecutiveMisses * 2) {
it = tracks.erase(it);
}
else {
++it;
}
}
// Phase 4: Enqueue stabilized results for history
if (camera._detectionQueue.size() >= _stabilizationQueueSize) {
camera._detectionQueue.pop_front();
}
camera._detectionQueue.push_back(detections);
return detections;
}
catch (std::exception& e) {
_logger.LogFatal("ANSODBase::StabilizeDetections", e.what(), __FILE__, __LINE__);
return detections;
}
}
ANSODBase::~ANSODBase() {
try {
// Per-camera trackers are released by CameraData::clear()
if (!_cameras.empty())
{
for (auto& [key, cameraData] : _cameras)
{
cameraData.clear(); // Releases per-camera tracker + clears state
}
_cameras.clear();
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::~ANSODBase()", e.what(), __FILE__, __LINE__);
}
}
// Very slow and not recommended for real-time applications
std::vector<Object> ANSODBase::RunInferences(const cv::Mat& input, int tiledWidth, int tiledHeight, double overLap, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<Object> allResults;
allResults.clear();
try {
ANS_DBG("ODEngine", "SAHI START: %dx%d tile=%dx%d overlap=%.1f cam=%s",
input.cols, input.rows, tiledWidth, tiledHeight, overLap, camera_id.c_str());
auto _sahiStart = std::chrono::steady_clock::now();
cv::Mat image = input.clone();
if (image.empty() || !image.data || !image.u) {
return allResults;
}
if ((image.cols < 5) || (image.rows < 5)) return allResults;
//1. Get patches (slices) from the image
std::vector<cv::Rect> patches = ANSUtilityHelper::GetPatches(image, tiledWidth, tiledHeight, overLap);
//2. Extract and resize each patch, then run inference
for (const auto& region : patches) {
// Ensure the region is within the image boundaries
cv::Rect boundedRegion(
std::max(region.x, 0),
std::max(region.y, 0),
std::min(region.width, image.cols - std::max(region.x, 0)),
std::min(region.height, image.rows - std::max(region.y, 0))
);
// Extract and clone the patch from the image
cv::Mat patch = image(boundedRegion).clone();
// Perform inference on the extracted patch
std::vector<Object> patchResults = RunInference(patch);
// Adjust patch bounding boxes to global image coordinates
for (auto& result : patchResults) {
result.box.x += boundedRegion.x;
result.box.y += boundedRegion.y;
}
// Append results to the overall results container
allResults.insert(allResults.end(), patchResults.begin(), patchResults.end());
patch.release();
}
//3. Optionally run full-image inference
std::vector<Object> fullImageResults = RunInference(image, camera_id);
allResults.insert(allResults.end(), fullImageResults.begin(), fullImageResults.end());
//4. Apply Non-Maximum Suppression (NMS) to merge overlapping results
float iouThreshold = 0.1;
std::vector<Object> finalResults = ANSUtilityHelper::ApplyNMS(allResults, iouThreshold);
{
double _sahiMs = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - _sahiStart).count();
ANS_DBG("ODEngine", "SAHI DONE: %.1fms patches=%zu results=%zu cam=%s",
_sahiMs, patches.size() + 1, finalResults.size(), camera_id.c_str());
if (_sahiMs > 2000.0) {
ANS_DBG("ODEngine", "SAHI SLOW: %.1fms — %zu patches held _mutex entire time!",
_sahiMs, patches.size() + 1);
}
}
image.release();
return finalResults;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInferences", e.what(), __FILE__, __LINE__);
return allResults;
}
}
std::vector<Object> ANSODBase::RunInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
int width, height, jpegSubsamp, jpegColorspace;
std::vector<Object> detectionObjects;
detectionObjects.clear();
tjhandle _jpegDecompressor = nullptr;
try {
_jpegDecompressor = tjInitDecompress();
if (_jpegDecompressor) {
int result = tjDecompressHeader3(_jpegDecompressor, reinterpret_cast<const unsigned char*>(jpegData), jpegSize, &width, &height, &jpegSubsamp, &jpegColorspace);
if (result >= 0) {
cv::Mat img(height, width, CV_8UC3);
int pixelFormat = TJPF_BGR; // OpenCV uses BGR format
result = tjDecompress2(_jpegDecompressor, reinterpret_cast<const unsigned char*>(jpegData), jpegSize, img.data, width, 0, height, pixelFormat, TJFLAG_FASTDCT);
if (result >= 0) {
detectionObjects = RunInference(img, camera_id);
}
else {
this->_logger.LogError("ANSODBase::RunInferenceFromJpegString", "Failed to decompress JPEG string", __FILE__, __LINE__);
}
}
tjDestroy(_jpegDecompressor);
_jpegDecompressor = nullptr;
}
return detectionObjects;
}
catch (std::exception& e) {
if (_jpegDecompressor) tjDestroy(_jpegDecompressor);
this->_logger.LogFatal("ANSODBase::RunInferenceFromJpegString", e.what(), __FILE__, __LINE__);
return detectionObjects;
}
}
ModelConfig ANSODBase::GetModelConfig()
{
return _modelConfig;
}
std::vector<Object> ANSODBase::DetectMovement(const cv::Mat& input, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<Object> objects;
objects.clear();
try {
if (!_licenseValid) return objects;
if (input.empty() || !input.data || !input.u) {
return objects;
}
if ((input.cols < 5) || (input.rows < 5)) return objects;
cv::Mat frame = input.clone();
objects = this->_handler.MovementDetect(camera_id, frame);
frame.release();
return objects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::DetectMovement", e.what(), __FILE__, __LINE__);
return objects;
}
}
std::vector<Object> ANSODBase::RunTiledInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, int tiledWith, int tiledHeight, double overLap, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
int width, height, jpegSubsamp, jpegColorspace;
std::vector<Object> detectionObjects;
detectionObjects.clear();
tjhandle _jpegDecompressor = nullptr;
try {
_jpegDecompressor = tjInitDecompress();
if (_jpegDecompressor) {
int result = tjDecompressHeader3(_jpegDecompressor, reinterpret_cast<const unsigned char*>(jpegData), jpegSize, &width, &height, &jpegSubsamp, &jpegColorspace);
if (result >= 0) {
cv::Mat img(height, width, CV_8UC3);
int pixelFormat = TJPF_BGR; // OpenCV uses BGR format
result = tjDecompress2(_jpegDecompressor, reinterpret_cast<const unsigned char*>(jpegData), jpegSize, img.data, width, 0, height, pixelFormat, TJFLAG_FASTDCT);
if (result >= 0) {
detectionObjects = RunInferences(img, tiledWith, tiledHeight, overLap, camera_id);
}
else {
this->_logger.LogError("ANSODBase::RunTiledInferenceFromJpegString", "Failed to decompress JPEG string", __FILE__, __LINE__);
}
}
tjDestroy(_jpegDecompressor);
_jpegDecompressor = nullptr;
}
return detectionObjects;
}
catch (std::exception& e) {
if (_jpegDecompressor) tjDestroy(_jpegDecompressor);
this->_logger.LogFatal("ANSODBase::RunTiledInferenceFromJpegString", e.what(), __FILE__, __LINE__);
return detectionObjects;
}
}
std::vector<Object> ANSODBase::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<ANSCENTER::Object> detectionObjects;
detectionObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSODBase::RunInference", "Invalid License", __FILE__, __LINE__);
return detectionObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODBase::RunInference", "Model is not initialized", __FILE__, __LINE__);
return detectionObjects;
}
try {
if (input.empty()) return detectionObjects;
if ((input.cols <= 5) || (input.rows <= 5)) return detectionObjects;
if (Bbox.size() > 0) {
cv::Mat frame = input;
int fWidth = frame.cols;
int fHeight = frame.rows;
for (const auto& bbox : Bbox) {
cv::Rect objectPos(
std::max(bbox.x, 0),
std::max(bbox.y, 0),
std::min(bbox.width, frame.cols - std::max(bbox.x, 0)),
std::min(bbox.height, frame.rows - std::max(bbox.y, 0))
);
// Crop the object from the frame
cv::Mat croppedObject = frame(objectPos).clone();
// Run inference on the cropped object
auto tempObjects = RunInference(croppedObject, camera_id);
// If any objects are detected, adjust their coordinates and add to results
for (const auto& tempObject : tempObjects) {
ANSCENTER::Object detectionObject = tempObject;
// Adjust bounding box position to global coordinates
detectionObject.box.x += objectPos.x;
detectionObject.box.y += objectPos.y;
// Add the adjusted object to the results
detectionObjects.push_back(detectionObject);
}
croppedObject.release();
}
frame.release();
}
else {
detectionObjects = RunInference(input, camera_id);
}
return detectionObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInference", e.what(), __FILE__, __LINE__);
return detectionObjects;
}
}
std::vector<Object> ANSODBase::RunInference(const cv::Mat& input, std::vector<cv::Point> Polygon, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<ANSCENTER::Object> detectionObjects;
detectionObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSODBase::RunInference", "Invalid License", __FILE__, __LINE__);
return detectionObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODBase::RunInference", "Model is not initialized", __FILE__, __LINE__);
return detectionObjects;
}
try {
if (input.empty()) return detectionObjects;
if ((input.cols <= 5) || (input.rows <= 5)) return detectionObjects;
if (Polygon.size() > 0) {
cv::Mat frame = input.clone();
cv::Rect rect = ANSCENTER::ANSUtilityHelper::GetBoundingBoxFromPolygon(Polygon);
rect.x = std::max(rect.x, 0);
rect.y = std::max(rect.y, 0);
rect.width = std::min(rect.width, frame.cols - std::max(rect.x, 0));
rect.height = std::min(rect.height, frame.rows - std::max(rect.y, 0));
cv::Mat croppedImage = frame(rect).clone();
// Run inference on the cropped object
auto tempObjects = RunInference(croppedImage, camera_id);
// If any objects are detected, adjust their coordinates and add to results
for (const auto& tempObject : tempObjects) {
ANSCENTER::Object detectionObject = tempObject;
// Adjust bounding box position to global coordinates
detectionObject.box.x += rect.x;
detectionObject.box.y += rect.y;
// Add the adjusted object to the results
detectionObjects.push_back(detectionObject);
}
croppedImage.release();
frame.release();
}
else {
detectionObjects = RunInference(input, camera_id);
}
return detectionObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInference", e.what(), __FILE__, __LINE__);
return detectionObjects;
}
}
cv::Rect ANSODBase::computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight) {
// Compute center of the union.
float centerX = unionBox.x + unionBox.width / 2.0f;
float centerY = unionBox.y + unionBox.height / 2.0f;
// Compute the required half-size in each dimension to cover the unionBox.
float reqHalfWidth = std::max(centerX - static_cast<float>(unionBox.x), static_cast<float>(unionBox.x + unionBox.width) - centerX);
float reqHalfHeight = std::max(centerY - static_cast<float>(unionBox.y), static_cast<float>(unionBox.y + unionBox.height) - centerY);
// Desired full dimensions: at least fixed size, but as much as needed to cover unionBox.
int desiredWidth = static_cast<int>(std::ceil(2 * reqHalfWidth));
int desiredHeight = static_cast<int>(std::ceil(2 * reqHalfHeight));
// Allow expansion up to 200% of fixed dimensions.
int minWidth = fixedWidth;
int minHeight = fixedHeight;
int maxWidth = fixedWidth * 2;
int maxHeight = fixedHeight * 2;
int candidateWidth = std::min(std::max(desiredWidth, minWidth), maxWidth);
int candidateHeight = std::min(std::max(desiredHeight, minHeight), maxHeight);
// Compute top-left so that ROI is centered at (centerX, centerY).
int roiX = static_cast<int>(std::round(centerX - candidateWidth / 2.0f));
int roiY = static_cast<int>(std::round(centerY - candidateHeight / 2.0f));
// Clamp the ROI to image boundaries.
if (roiX < 0) roiX = 0;
if (roiY < 0) roiY = 0;
if (roiX + candidateWidth > imageWidth) roiX = imageWidth - candidateWidth;
if (roiY + candidateHeight > imageHeight) roiY = imageHeight - candidateHeight;
return cv::Rect(roiX, roiY, candidateWidth, candidateHeight);
}
std::vector<cv::Rect> ANSODBase::GenerateFixedROIs(const std::vector<Object>& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight) {
try {
// Early check: if the image is smaller than the fixed ROI size, return one ROI covering the full image.
if (imageWidth < fixedWidth || imageHeight < fixedHeight) {
return { cv::Rect(0, 0, imageWidth, imageHeight) };
}
int maxImageSize = std::max(imageWidth, imageHeight);
if (maxImageSize <= 1920) {
int maxFixedSize = std::max(fixedWidth, fixedHeight);
std::vector<cv::Rect> fixedROIs;
cv::Rect singleFixedROI= GenerateMinimumSquareBoundingBox(movementObjects, maxFixedSize);
singleFixedROI.x = std::max(singleFixedROI.x, 0);
singleFixedROI.y = std::max(singleFixedROI.y, 0);
singleFixedROI.width = std::min(singleFixedROI.width, imageWidth - singleFixedROI.x);
singleFixedROI.height = std::min(singleFixedROI.height, imageHeight - singleFixedROI.y);
fixedROIs.push_back(singleFixedROI);
return fixedROIs;
}
// --- Step 1: Group objects by proximity.
// Use a threshold (fixedWidth/2) so that objects whose centers are within that distance belong to the same group.
float groupingThreshold = fixedWidth / 2.0f;
std::vector<Group> groups;
for (const auto& obj : movementObjects) {
cv::Point2f center(obj.box.x + obj.box.width / 2.0f, obj.box.y + obj.box.height / 2.0f);
bool added = false;
for (auto& grp : groups) {
// Use the current group's center (from its unionBox).
cv::Point2f groupCenter(grp.unionBox.x + grp.unionBox.width / 2.0f,
grp.unionBox.y + grp.unionBox.height / 2.0f);
if (distance(center, groupCenter) < groupingThreshold) {
grp.objects.push_back(obj);
grp.unionBox = unionRect(grp.unionBox, obj.box);
added = true;
break;
}
}
if (!added) {
Group newGroup;
newGroup.objects.push_back(obj);
newGroup.unionBox = obj.box;
groups.push_back(newGroup);
}
}
// --- Step 2: For each group, compute a candidate ROI that may expand up to 50% larger than fixed size.
// Then merge groups whose candidate ROIs overlap.
bool merged = true;
while (merged) {
merged = false;
for (size_t i = 0; i < groups.size(); ++i) {
cv::Rect roi_i = computeCandidateROI(groups[i].unionBox, fixedWidth, fixedHeight, imageWidth, imageHeight);
for (size_t j = i + 1; j < groups.size(); ++j) {
cv::Rect roi_j = computeCandidateROI(groups[j].unionBox, fixedWidth, fixedHeight, imageWidth, imageHeight);
if (isOverlap(roi_i, roi_j)) {
// Merge group j into group i.
for (const auto& obj : groups[j].objects) {
groups[i].objects.push_back(obj);
}
groups[i].unionBox = unionRect(groups[i].unionBox, groups[j].unionBox);
groups.erase(groups.begin() + j);
merged = true;
break; // Break inner loop to restart merging.
}
}
if (merged) break;
}
}
// --- Step 3: Compute final candidate ROIs for each (merged) group.
std::vector<cv::Rect> fixedROIs;
for (const auto& grp : groups) {
cv::Rect candidate = computeCandidateROI(grp.unionBox, fixedWidth, fixedHeight, imageWidth, imageHeight);
fixedROIs.push_back(candidate);
}
// (Optional) sort the final ROIs.
std::sort(fixedROIs.begin(), fixedROIs.end(), [](const cv::Rect& a, const cv::Rect& b) {
return (a.y == b.y) ? (a.x < b.x) : (a.y < b.y);
});
return fixedROIs;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFDBase::GenerateFixedROIs", e.what(), __FILE__, __LINE__);
return { cv::Rect(0, 0, imageWidth, imageHeight) };
}
}
cv::Rect ANSODBase::GenerateMinimumSquareBoundingBox(const std::vector<ANSCENTER::Object>& detectedObjects, int minSize) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Ensure there are rectangles to process
int adMinSize = minSize - 20;
if (adMinSize < 0) adMinSize = 0;
if (detectedObjects.empty()) return cv::Rect(0, 0, minSize, minSize);
// Initialize min and max coordinates
int minX = detectedObjects[0].box.x;
int minY = detectedObjects[0].box.y;
int maxX = detectedObjects[0].box.x + detectedObjects[0].box.width;
int maxY = detectedObjects[0].box.y + detectedObjects[0].box.height;
// Calculate bounding box that includes all rectangles
for (const auto& rect : detectedObjects) {
minX = std::min(minX, rect.box.x);
minY = std::min(minY, rect.box.y);
maxX = std::max(maxX, rect.box.x + rect.box.width);
maxY = std::max(maxY, rect.box.y + rect.box.height);
}
// Calculate width and height of the bounding box
int width = maxX - minX;
int height = maxY - minY;
// Determine the size of the square
int squareSize = std::max({ width, height, adMinSize });
// Center the square around the bounding box
int centerX = minX + width / 2;
int centerY = minY + height / 2;
int squareX = centerX - squareSize / 2;
int squareY = centerY - squareSize / 2;
return cv::Rect(squareX - 10, squareY - 10, squareSize + 20, squareSize + 20); // add 10 pixels to the square
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFDBase::GenerateMinimumSquareBoundingBox", e.what(), __FILE__, __LINE__);
return cv::Rect(0, 0, minSize, minSize);
}
}
void ANSODBase::UpdateAndFilterDetectionObjects(std::vector<Object>& detectionObjects, int threshold) {
detectionObjects.erase(
std::remove_if(detectionObjects.begin(), detectionObjects.end(),
[&](Object& obj) {
if (!obj.extraInfo.empty()) {
try {
int value = std::stoi(obj.extraInfo); // Convert extraInfo to an integer
if (value >= threshold) {
return true; // Remove object if value exceeds threshold
}
obj.extraInfo = std::to_string(value + 1); // Increment extraInfo
}
catch (const std::exception&) {
obj.extraInfo = "1"; // Reset to "1" if conversion fails
}
}
return false; // Keep object
}),
detectionObjects.end()); // Remove flagged objects
}
bool ANSODBase::ContainsIntersectingObject(const std::vector<Object>& movementObjects, const Object& result) {
for (const auto& obj : movementObjects) {
// Compute the intersection of the two bounding boxes.
cv::Rect intersection = obj.box & result.box;
if (intersection.area() > 0) {
return true; // Found an intersecting object.
}
}
return false; // No intersections found.
}
std::vector<Object> ANSODBase::RunDynamicInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<Object> output;
output.clear();
try {
// 1. Check if the input image is valid
if (input.empty() || !input.data || !input.u) {
return output;
}
if ((input.cols < 5) || (input.rows < 5)) return output;
// Check ROI
int orginalHeight = input.rows;
int orginalWidth = input.cols;
cv::Rect boundingBox;
// Ensure the bounding box is within the image boundaries
boundingBox.x = std::max(0, Bbox.x);
boundingBox.y = std::max(0, Bbox.y);
boundingBox.width = MIN(orginalWidth, Bbox.width);
boundingBox.height = MIN(orginalHeight, Bbox.height);
if (boundingBox.width == 0)boundingBox.width = orginalWidth;
if (boundingBox.height == 0)boundingBox.height = orginalHeight;
cv::Mat im = input(boundingBox).clone(); // We can get crop from bounding box in here
if ((im.cols <= 5) || (im.rows <= 5))return output;
// 2. Define active ROI
std::vector<cv::Rect> activeROIs;
std::vector<Object> movementObjects;
activeROIs.clear();
movementObjects.clear();
int imageSize = std::max(im.cols, im.rows);
if (imageSize <= 640) {
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));
}
else {
std::vector<Object> movementResults = DetectMovement(im, camera_id);
if ((!movementResults.empty()) && ((movementResults.size() < 12)))
{
movementObjects.insert(movementObjects.end(), movementResults.begin(), movementResults.end());
if (!_detectedObjects.empty())movementObjects.insert(movementObjects.end(), _detectedObjects.begin(), _detectedObjects.end());
}
else {
if (!_detectedObjects.empty())movementObjects.insert(movementObjects.end(), _detectedObjects.begin(), _detectedObjects.end());
}
if (!movementObjects.empty()) {
cv::Rect singleFixedROI = GenerateMinimumSquareBoundingBox(movementObjects, 640);
singleFixedROI.x = std::max(singleFixedROI.x, 0);
singleFixedROI.y = std::max(singleFixedROI.y, 0);
singleFixedROI.width = MIN(singleFixedROI.width, im.cols - singleFixedROI.x);
singleFixedROI.height =MIN(singleFixedROI.height, im.rows - singleFixedROI.y);
activeROIs.push_back(singleFixedROI);
}
else {
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image
}
if ((activeROIs.size() <= 0) ||
(activeROIs.empty()))
{
return output;
}
UpdateAndFilterDetectionObjects(_detectedObjects, 80);
}
// 3. Run inference on each active ROI
#ifdef DEBUGENGINE
cv::Mat draw = im.clone();
if (movementObjects.size() > 0) {
for (int i = 0; i < movementObjects.size(); i++) {
cv::rectangle(draw, movementObjects[i].box, cv::Scalar(0, 255, 255), 2); // RED for detectedArea
}
}
for (int i = 0; i < activeROIs.size(); i++) {
cv::rectangle(draw, activeROIs[i], cv::Scalar(0, 0, 255), 2); // RED for detectedArea
}
#endif
for (int j = 0; j < activeROIs.size(); j++) {
cv::Rect activeROI;
activeROI.x = std::max(0, activeROIs[j].x);
activeROI.y = std::max(0, activeROIs[j].y);
activeROI.width = MIN(im.cols, activeROIs[j].width);
activeROI.height = MIN(im.rows, activeROIs[j].height);
cv::Mat frame = im(activeROI).clone();
std::vector<Object> detectedObjects = RunInference(frame, camera_id);
// 5. Return the detected objects
for (int i = 0; i < detectedObjects.size(); i++)
{
if (detectedObjects[i].confidence > _modelConfig.detectionScoreThreshold) {
Object result;
// 0. Get the face bounding box
int x_min = detectedObjects[i].box.x + activeROI.x;
int y_min = detectedObjects[i].box.y + activeROI.y ;
int width = detectedObjects[i].box.width ;
int height = detectedObjects[i].box.height;
// 1. Update the bounding box coordinates
result.classId = detectedObjects[i].classId;
result.className = detectedObjects[i].className;
result.confidence = detectedObjects[i].confidence;
result.box.x = x_min+ boundingBox.x;
result.box.y = y_min+ boundingBox.y;
result.box.width = width;
result.box.height = height;
result.kps = detectedObjects[i].kps; // landmarks as array of x,y,x,y...
result.cameraId = camera_id;
output.push_back(result);
// Update result
result.extraInfo = "0";
result.box.x = x_min;
result.box.y = y_min;
#ifdef DEBUGENGINE
cv::rectangle(draw, result.box, cv::Scalar(225, 255, 0), 2); //
#endif
// Find if obj already exists in detectionObjects using ContainsIntersectingObject
auto it = std::find_if(_detectedObjects.begin(), _detectedObjects.end(),
[&](Object& existingObj) {
return ContainsIntersectingObject(_detectedObjects, result);
});
if (it != _detectedObjects.end()) {
*it = result; // Replace existing object with the new one
}
else {
// If not found, add the new object to the list
_detectedObjects.push_back(result);
}
}
}
frame.release();
}
#ifdef DEBUGENGINE
cv::imshow("Combined Detected Areas", draw);// Debugging: Diplsay the frame with the combined detected areas
cv::waitKey(1);// Debugging: Diplsay the frame with the combined detected areas
draw.release();// Debugging: Diplsay the frame with the combined detected areas
#endif
im.release();
return output;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunDynamicInference", e.what(), __FILE__, __LINE__);
return {};
}
}
std::vector<Object> ANSODBase::RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id) {
// No coarse _mutex — only uses local variables and virtual RunInference() which has its own engine lock
std::vector<Object> output;
output.clear();
try {
// 1. Check if the input image is valid
if (input.empty() || !input.data || !input.u) {
return output;
}
if ((input.cols < 5) || (input.rows < 5)) return output;
int orginalHeight = input.rows;
int orginalWidth = input.cols;
cv::Rect boundingBox;
boundingBox.x = std::max(0, Bbox.x);
boundingBox.y = std::max(0, Bbox.y);
boundingBox.width = MIN(orginalWidth, Bbox.width);
boundingBox.height = MIN(orginalHeight, Bbox.height);
if (boundingBox.width == 0)boundingBox.width = orginalWidth;
if (boundingBox.height == 0)boundingBox.height = orginalHeight;
cv::Mat im = input(boundingBox).clone(); // We can get crop from bounding box in here
if ((im.cols <= 5) || (im.rows <= 5))return output;
// 2. Define active ROI
std::vector<cv::Rect> activeROIs;
std::vector<Object> movementObjects;
activeROIs.clear();
movementObjects.clear();
int imageSize = std::max(im.cols, im.rows);
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));
// 3. Run inference on each active ROI
for (int j = 0; j < activeROIs.size(); j++) {
cv::Rect activeROI;
activeROI.x = std::max(0, activeROIs[j].x);
activeROI.y = std::max(0, activeROIs[j].y);
activeROI.width = MIN(im.cols, activeROIs[j].width);
activeROI.height = MIN(im.rows, activeROIs[j].height);
cv::Mat frame = im(activeROI).clone();
std::vector<Object> detectedObjects = RunInference(frame, camera_id);
for (int i = 0; i < detectedObjects.size(); i++)
{
if (detectedObjects[i].confidence > _modelConfig.detectionScoreThreshold) {
Object result;
int x_min = detectedObjects[i].box.x + activeROI.x;
int y_min = detectedObjects[i].box.y + activeROI.y;
int width = detectedObjects[i].box.width;
int height = detectedObjects[i].box.height;
result.classId = detectedObjects[i].classId;
result.className = detectedObjects[i].className;
result.confidence = detectedObjects[i].confidence;
result.box.x = x_min + boundingBox.x;
result.box.y = y_min + boundingBox.y;
result.box.width = width;
result.box.height = height;
result.kps = detectedObjects[i].kps; // landmarks as array of x,y,x,y...
result.cameraId = camera_id;
output.push_back(result);
}
}
frame.release();
}
im.release();
return output;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunDynamicInference", e.what(), __FILE__, __LINE__);
return {};
}
}
bool ANSODBase::RunInference(const cv::Mat& input, const std::string& camera_id, std::string& detectionResult) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_licenseValid) {
this->_logger.LogError("ANSODBase::RunInference", "Invalid License", __FILE__, __LINE__);
return false;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODBase::RunInference", "Model is not initialized", __FILE__, __LINE__);
return false;
}
try {
std::vector<Object> detectionObjects;
detectionObjects.clear();
if (input.empty() || !input.data || !input.u) {
return false;
}
if ((input.cols < 5) || (input.rows < 5)) return false;
if (input.cols > 0 && input.rows > 0) {
cv::Mat frame = input.clone();
detectionObjects = RunInference(frame, camera_id);
// Convert detectionObjects to JSON string
detectionResult=ANSCENTER::ANSUtilityHelper::VectorDetectionToJsonString(detectionObjects);
frame.release();
return true;
}
else {
this->_logger.LogError("ANSODBase::RunInference", "Invalid image size", __FILE__, __LINE__);
return false;
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInference", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<std::vector<Object>> ANSODBase::RunInferencesBatch(const std::vector<cv::Mat>& inputs,const std::string& camera_id)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<std::vector<Object>> batchDetections;
// Early validation
if (!_licenseValid) {
_logger.LogError("ANSODBase::RunInferencesBatch",
"Invalid License", __FILE__, __LINE__);
return batchDetections;
}
if (!_isInitialized) {
_logger.LogError("ANSODBase::RunInferencesBatch",
"Model is not initialized", __FILE__, __LINE__);
return batchDetections;
}
if (inputs.empty()) {
_logger.LogWarn("ANSODBase::RunInferencesBatch",
"Empty input batch", __FILE__, __LINE__);
return batchDetections;
}
try {
batchDetections.reserve(inputs.size()); // Pre-allocate
for (size_t i = 0; i < inputs.size(); ++i) {
auto detections = RunInference(inputs[i], camera_id);
//if (detections.empty()) {
// _logger.LogWarn("ANSODBase::RunInferencesBatch",
// "No detections for frame " + std::to_string(i),
// __FILE__, __LINE__);
//}
batchDetections.push_back(std::move(detections));
}
return batchDetections;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSODBase::RunInferencesBatch",
e.what(), __FILE__, __LINE__);
return std::vector<std::vector<Object>>(); // Return fresh empty vector
}
}
// Functions for screen size division
double ANSODBase::calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect) {
cv::Point rectCenter(rect.x + rect.width / 2, rect.y + rect.height / 2);
return std::sqrt(std::pow(rectCenter.x - center.x, 2) + std::pow(rectCenter.y - center.y, 2));
}
std::vector<ANSODBase::ImageSection> ANSODBase::divideImage(const cv::Mat& image) {
if (image.empty()) {
std::cerr << "Error: Empty image!" << std::endl;
return cachedSections;
}
cv::Size currentSize(image.cols, image.rows);
// Check if the image size has changed
if (currentSize == previousImageSize) {
return cachedSections; // Return cached sections if size is the same
}
// Update previous size
previousImageSize = currentSize;
cachedSections.clear();
int width = image.cols;
int height = image.rows;
int maxDimension = std::max(width, height);
int numSections = 10;// std::max(1, numSections); // Ensure at least 1 section
if (maxDimension <= 2560)numSections = 8;
if (maxDimension <= 1280)numSections = 6;
if (maxDimension <= 960)numSections = 4;
if (maxDimension <= 640)numSections = 2;
if (maxDimension <= 320)numSections = 1;
int gridRows = std::sqrt(numSections);
int gridCols = (numSections + gridRows - 1) / gridRows; // Ensure all sections are covered
int sectionWidth = width / gridCols;
int sectionHeight = height / gridRows;
cv::Point imageCenter(width / 2, height / 2);
std::vector<std::pair<double, ImageSection>> distancePriorityList;
// Create sections and store their distance from the center
for (int r = 0; r < gridRows; ++r) {
for (int c = 0; c < gridCols; ++c) {
int x = c * sectionWidth;
int y = r * sectionHeight;
int w = (c == gridCols - 1) ? width - x : sectionWidth;
int h = (r == gridRows - 1) ? height - y : sectionHeight;
ImageSection section(cv::Rect(x, y, w, h));
double distance = calculateDistanceToCenter(imageCenter, section.region);
distancePriorityList.emplace_back(distance, section);
}
}
// Sort sections based on distance from center, then top-to-bottom, then left-to-right
std::sort(distancePriorityList.begin(), distancePriorityList.end(),
[](const std::pair<double, ImageSection>& a, const std::pair<double, ImageSection>& b) {
if (std::abs(a.first - b.first) > 1e-5) {
return a.first < b.first; // Sort by closest distance to center
}
// If distance is the same, prioritize top to bottom, then left to right
return a.second.region.y == b.second.region.y
? a.second.region.x < b.second.region.x
: a.second.region.y < b.second.region.y;
});
// Assign priority
int priority = 1;
for (auto& entry : distancePriorityList) {
entry.second.priority = priority++;
cachedSections.push_back(entry.second);
}
return cachedSections;
}
std::vector<ANSODBase::ImageSection> ANSODBase::createSlideScreens(const cv::Mat& image) {
if (image.empty()) {
std::cerr << "Error: Empty image!" << std::endl;
return cachedSections;
}
cv::Size currentSize(image.cols, image.rows);
if (currentSize == previousImageSize) {
return cachedSections;
}
previousImageSize = currentSize;
cachedSections.clear();
int maxSize = std::max(image.cols, image.rows);
const int minCellSize = 320; // Minimum size for each cell
int maxSections = 10;
const float minAspectRatio = 0.8f;
const float maxAspectRatio = 1.2f;
if (maxSize <= 640) maxSections = 1;
else if (maxSize <= 960) maxSections = 2;
else if (maxSize <= 1280) maxSections = 4;
else if (maxSize <= 2560) maxSections = 6;
else if (maxSize <= 3840) maxSections = 8;
else maxSections = 10;
int width = image.cols;
int height = image.rows;
int bestRows = 1, bestCols = 1;
int bestTileSize = std::numeric_limits<int>::max();
for (int rows = 1; rows <= maxSections; ++rows) {
for (int cols = 1; cols <= maxSections; ++cols) {
if (rows * cols > maxSections) continue;
int tileWidth = (width + cols - 1) / cols;
int tileHeight = (height + rows - 1) / rows;
if (tileWidth < minCellSize || tileHeight < minCellSize)
continue;
float aspectRatio = static_cast<float>(tileWidth) / static_cast<float>(tileHeight);
if (aspectRatio < minAspectRatio || aspectRatio > maxAspectRatio)
continue;
int maxTileDim = std::max(tileWidth, tileHeight);
if (maxTileDim < bestTileSize) {
bestTileSize = maxTileDim;
bestRows = rows;
bestCols = cols;
}
}
}
// Generate tiles using bestRows, bestCols
int tileWidth = (width + bestCols - 1) / bestCols;
int tileHeight = (height + bestRows - 1) / bestRows;
int priority = 1;
for (int r = bestRows - 1; r >= 0; --r) {
for (int c = 0; c < bestCols; ++c) {
int x = c * tileWidth;
int y = r * tileHeight;
int w = std::min(tileWidth, width - x);
int h = std::min(tileHeight, height - y);
if (w <= 0 || h <= 0) continue;
cv::Rect region(x, y, w, h);
ImageSection section(region);
section.priority = priority++;
cachedSections.push_back(section);
}
}
return cachedSections;
}
int ANSODBase::getHighestPriorityRegion() {
if (!cachedSections.empty()) {
return cachedSections.front().priority; // First element has the highest priority
}
return 0; // Return empty rect if no sections exist
}
int ANSODBase::getLowestPriorityRegion() {
if (!cachedSections.empty()) {
return cachedSections.back().priority; // Last element has the lowest priority
}
return 0; // Return empty rect if no sections exist
}
cv::Rect ANSODBase::getRegionByPriority(int priority) {
for (const auto& section : cachedSections) {
if (section.priority == priority) {
return section.region;
}
}
return cv::Rect(); // Return empty rect if priority not found
}
std::vector<Object> ANSODBase::AdjustDetectedBoundingBoxes(
const std::vector<Object>& detectionsInROI,
const cv::Rect& roi,
const cv::Size& fullImageSize,
float aspectRatio,
int padding
) {
std::vector<Object> adjustedDetections;
try {
// Basic input validation
if (detectionsInROI.empty()) {
return adjustedDetections;
}
if (roi.width <= 0 || roi.height <= 0 ||
fullImageSize.width <= 0 || fullImageSize.height <= 0) {
return adjustedDetections;
}
for (const auto& detInROI : detectionsInROI) {
try {
if (detInROI.box.width <= 0 || detInROI.box.height <= 0)
continue; // Skip invalid box
// Convert ROI-relative box to full-image coordinates
cv::Rect detInFullImg;
try {
detInFullImg = detInROI.box + roi.tl();
detInFullImg &= cv::Rect(0, 0, fullImageSize.width, fullImageSize.height);
}
catch (const std::exception& e) {
std::cerr << "[AdjustBBox] Failed to calculate full image box: " << e.what() << std::endl;
continue;
}
// Check if it touches ROI border
bool touchesLeft = detInROI.box.x <= 0;
bool touchesRight = detInROI.box.x + detInROI.box.width >= roi.width - 1;
bool touchesTop = detInROI.box.y <= 0;
bool touchesBottom = detInROI.box.y + detInROI.box.height >= roi.height - 1;
bool touchesBorder = touchesLeft || touchesRight || touchesTop || touchesBottom;
// Compute target width based on aspect ratio
int targetWidth = 0, expandWidth = 0;
try {
targetWidth = std::max(detInFullImg.width, static_cast<int>(detInFullImg.height * aspectRatio));
expandWidth = std::max(0, targetWidth - detInFullImg.width);
}
catch (const std::exception& e) {
std::cerr << "[AdjustBBox] Aspect ratio adjustment failed: " << e.what() << std::endl;
continue;
}
int expandLeft = expandWidth / 2;
int expandRight = expandWidth - expandLeft;
int padX = touchesBorder ? padding : 0;
int padY = touchesBorder ? (padding / 2) : 0;
// Apply padded and expanded box
int newX = std::max(0, detInFullImg.x - expandLeft - padX);
int newY = std::max(0, detInFullImg.y - padY);
int newWidth = detInFullImg.width + expandWidth + 2 * padX;
int newHeight = detInFullImg.height + 2 * padY;
// Clamp to image boundaries
if (newX + newWidth > fullImageSize.width) {
newWidth = fullImageSize.width - newX;
}
if (newY + newHeight > fullImageSize.height) {
newHeight = fullImageSize.height - newY;
}
if (newWidth <= 0 || newHeight <= 0)
continue;
// Construct adjusted object
Object adjustedDet = detInROI;
adjustedDet.box = cv::Rect(newX, newY, newWidth, newHeight);
adjustedDetections.push_back(adjustedDet);
}
catch (const std::exception& e) {
std::cerr << "[AdjustBBox] Exception per detection: " << e.what() << std::endl;
continue;
}
catch (...) {
std::cerr << "[AdjustBBox] Unknown exception per detection." << std::endl;
continue;
}
}
}
catch (const std::exception& e) {
std::cerr << "[AdjustBBox] Fatal error: " << e.what() << std::endl;
}
catch (...) {
std::cerr << "[AdjustBBox] Unknown fatal error occurred." << std::endl;
}
return adjustedDetections;
}
cv::Rect ANSODBase::GetActiveWindow(const cv::Mat& input) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::vector<ImageSection> sections = divideImage(input);
int lowestPriority = getLowestPriorityRegion();
if ((_currentPriority > lowestPriority) || (_currentPriority == 0))
{
_currentPriority = getHighestPriorityRegion();
}
else _currentPriority++;
cv::Rect regionByPriority = getRegionByPriority(_currentPriority);
return regionByPriority;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::GetActiveWindow", e.what(), __FILE__, __LINE__);
return cv::Rect(0, 0, input.cols, input.rows);
}
}
void ANSODBase::UpdateNoDetectionCondition() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (_isObjectDetected) {
_retainDetectedArea++;
if (_retainDetectedArea >= RETAINDETECTEDFRAMES) {// Reset detected area after 80 frames
_detectedArea.width = 0;
_detectedArea.height = 0;
_retainDetectedArea = 0;
_isObjectDetected = false;
}
}
else {
_detectedArea.width = 0;
_detectedArea.height = 0;
_retainDetectedArea = 0;
}
}
void ANSODBase::UpdateActiveROI(const cv::Mat& frame, ANSCENTER::Object detectedObj) {
int cropSize = 640; // Only use 640x640 for the detected area
int imagegSize = std::max(frame.cols, frame.rows);
if (imagegSize > 1920) cropSize = 640;
else if (imagegSize > 1280) cropSize = 480;
else if (imagegSize > 640) cropSize = 320;
else cropSize = 320;
int x1 = detectedObj.box.x;
int y1 = detectedObj.box.y;
int xc = x1 + detectedObj.box.width / 2;
int yc = y1 + detectedObj.box.height / 2;
int x1_new = std::max(xc - cropSize / 2, 0);
int y1_new = std::max(yc - cropSize / 2, 0);
x1_new = std::min(x1_new, frame.cols - cropSize);
y1_new = std::min(y1_new, frame.rows - cropSize);
_detectedArea.x = std::max(x1_new, 0);
_detectedArea.y = std::max(y1_new, 0);
_detectedArea.width = std::min(cropSize, frame.cols - _detectedArea.x);
_detectedArea.height = std::min(cropSize, frame.rows - _detectedArea.y);
}
bool ANSODBase::IsValidObject(const Object& obj, std::vector<int> objectIds) {
bool validObject = false;
for (const auto& id : objectIds) {
if (obj.classId == id) {
validObject = true;
break;
}
}
return validObject;
}
std::vector<Object> ANSODBase::RunInferenceInScanningMode(const cv::Mat& input, const std::string& camera_id) {
std::vector<Object> detectionOutputs;
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
//1. Use divideImage to divide the image into sections
std::vector<ImageSection> sections = createSlideScreens(input);
int lowestPriority = getLowestPriorityRegion();
if ((_currentPriority > lowestPriority) || (_currentPriority == 0))
{
_currentPriority = getHighestPriorityRegion();
}
else _currentPriority++;
cv::Rect regionByPriority = getRegionByPriority(_currentPriority);
_detectedArea = regionByPriority;
//2. Run inference on the active region
if ((_detectedArea.width > 50) && (_detectedArea.height > 50)) {
cv::Mat activeFrame = input(_detectedArea).clone();
std::vector<Object> detectedObjects = RunInference(activeFrame, camera_id);
//3. Adjust the detected bounding boxes
detectionOutputs = AdjustDetectedBoundingBoxes(detectedObjects, _detectedArea, input.size(), 1.0f, 0);
//4. Apply Non-Maximum Suppression (NMS) to merge overlapping results
float iouThreshold = 0.1;
std::vector<Object> finalResults = ANSUtilityHelper::ApplyNMS(detectionOutputs, iouThreshold);
return finalResults;
}
return detectionOutputs;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInferenceInScanningMode", e.what(), __FILE__, __LINE__);
return detectionOutputs;
}
catch (...) {
this->_logger.LogFatal("ANSODBase::RunInferenceInScanningMode", "Unknown error", __FILE__, __LINE__);
return detectionOutputs;
}
}
std::vector<Object> ANSODBase::RunInferenceInTrackingMode(const cv::Mat& input, const std::string& camera_id, std::vector<int> trackingObjectIds) {
std::vector<Object> detectionOutputs;
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
//1. If the detected area valid then we use this.
if ((_detectedArea.width > 50) && (_detectedArea.height > 50)){
cv::Mat activeROI = input(_detectedArea).clone();
std::vector<ANSCENTER::Object> detectedObjects = RunInference(activeROI,camera_id);
if (!detectedObjects.empty()) { // We have some detections here
for (auto& detectedObj : detectedObjects) {
if (IsValidObject(detectedObj, trackingObjectIds)) {
detectedObj.box.x += _detectedArea.x;
detectedObj.box.y += _detectedArea.y;
detectedObj.cameraId = camera_id;
// Filter out the detected objects with low confidence
detectionOutputs.push_back(detectedObj);
UpdateActiveROI(input, detectedObj);
_isObjectDetected = true;// variable to retain the detected area after 40 frames tracking of detected area
_retainDetectedArea = 0;// Reset the retain detected area
}
}
}
else {
UpdateNoDetectionCondition();
}
activeROI.release();
float iouThreshold = 0.1;
std::vector<Object> finalResults = ANSUtilityHelper::ApplyNMS(detectionOutputs, iouThreshold);
return finalResults;
}
//2. Othersise we need to find it.
else {
// Reset the detected area before running the inference
//2.1. Apply scanning technique first to get correct detected area
std::vector<ImageSection> sections = divideImage(input);
int lowestPriority = getLowestPriorityRegion();
if ((_currentPriority > lowestPriority) || (_currentPriority == 0))
{
_currentPriority = getHighestPriorityRegion();
}
else _currentPriority++;
cv::Rect regionByPriority = getRegionByPriority(_currentPriority);
_detectedArea = regionByPriority;
//2.2. Run inference on the active region to redjust it.
if ((_detectedArea.width > 50) && (_detectedArea.height > 50)) {
cv::Mat detectedROI = input(_detectedArea).clone(); // Get sub-image of the detected area
std::vector<ANSCENTER::Object> detectedObjects = RunInference(detectedROI,camera_id);
float maxScore = 0.0;
// Sorting to find the detection that has the highest score
if (detectedObjects.size() > 0) {
for (auto& detectedObj : detectedObjects) {
detectedObj.box.x += _detectedArea.x;
detectedObj.box.y += _detectedArea.y;
detectedObj.cameraId = camera_id;
if ((detectedObj.confidence > 0.35)&&(IsValidObject(detectedObj, trackingObjectIds))) {
cv::Rect box = detectedObj.box;
cv::Mat crop = input(box).clone();
if (maxScore < detectedObj.confidence)
{
maxScore = detectedObj.confidence;
int cropSize = 640;
int imagegSize = std::max(input.cols, input.rows);
if (imagegSize > 1920) cropSize = 640;
else if (imagegSize > 1280) cropSize = 480;
else if (imagegSize > 640) cropSize = 320;
else cropSize = 224;
maxScore = detectedObj.confidence;
int x1 = detectedObj.box.x;
int y1 = detectedObj.box.y;
int xc = x1 + detectedObj.box.width / 2;
int yc = y1 + detectedObj.box.height / 2;
int x1_new = std::max(xc - cropSize / 2, 0);
int y1_new = std::max(yc - cropSize / 2, 0);
x1_new = std::min(x1_new, input.cols - cropSize);
y1_new = std::min(y1_new, input.rows - cropSize);
_detectedArea.x = std::max(x1_new, 0);
_detectedArea.y = std::max(y1_new, 0);
_detectedArea.width = std::min(cropSize, input.cols - _detectedArea.x);
_detectedArea.height = std::min(cropSize, input.rows - _detectedArea.y);
}
detectionOutputs.push_back(detectedObj);
_isObjectDetected = true;// variable to retain the detected area after 40 frames tracking of detected area
_retainDetectedArea = 0;// Reset the retain detected area
break;
}
}
}
else {
// Reset the detected area before running the inference
_detectedArea.width = 0;
_detectedArea.height = 0;
}
detectedROI.release();
}
if (detectionOutputs.size() == 0) {
_detectedArea.width = 0;
_detectedArea.height = 0;
}
//else detectionOutputs.clear(); // Discard the detected objects in the scanning mode
//2.3. Apply Non-Maximum Suppression (NMS) to merge overlapping results
float iouThreshold = 0.1;
std::vector<Object> finalResults = ANSUtilityHelper::ApplyNMS(detectionOutputs, iouThreshold);
return finalResults;
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInferenceInTrackingMode", e.what(), __FILE__, __LINE__);
return detectionOutputs;
}
catch (...) {
this->_logger.LogFatal("ANSODBase::RunInferenceInTrackingMode", "Unknown error", __FILE__, __LINE__);
return detectionOutputs;
}
}
std::vector<Object> ANSODBase::RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode) {
// No coarse _mutex — sub-components (engines, trackers) have their own locks.
// LabVIEW semaphore controls concurrency at the caller level.
try {
ANS_DBG("ODEngine", "RunInferenceWithOption: cam=%s %dx%d mode=%s",
camera_id.c_str(), input.cols, input.rows, activeROIMode.c_str());
int mode = 0;
double confidenceThreshold = 0.35;
std::vector<int> trackingObjectIds;
int cropSize = 640; // Default crop size
int imagegSize = std::max(input.cols, input.rows);
if (imagegSize > 1920) cropSize = 640;
else if (imagegSize > 1280) cropSize = 480;
else if (imagegSize > 640) cropSize = 320;
else cropSize = 224;
// Parsing
ANSCENTER::ANSUtilityHelper::ParseActiveROIMode(activeROIMode, mode, confidenceThreshold, trackingObjectIds);
if (confidenceThreshold <= 0) confidenceThreshold = 0;
if (confidenceThreshold > 1) confidenceThreshold = 1;
// Update model configuration with the new parameters (brief lock for config)
if (confidenceThreshold > 0) {
std::lock_guard<std::recursive_mutex> cfgLock(_mutex);
_modelConfig.detectionScoreThreshold = confidenceThreshold;
}
switch (mode) {
case 0: // Normal mode
return RunInference(input, camera_id); //RunInference
case 1: // Scanning mode
return RunInferenceInScanningMode(input, camera_id);
case 2: // Tracking mode
return RunInferenceInTrackingMode(input, camera_id, trackingObjectIds);
case 3: // SAHI mode
return RunInferences(input, cropSize, cropSize, 0.2, camera_id);
default:
return RunInference(input, camera_id);
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::RunInferenceWithOption", e.what(), __FILE__, __LINE__);
return {};
}
catch (...) {
this->_logger.LogFatal("ANSODBase::RunInferenceWithOption", "Unknown error", __FILE__, __LINE__);
return {};
}
}
bool ANSODBase::UpdateDetectionThreshold(float detectionScore) {
try {
if (detectionScore < 0 || detectionScore > 1) {
this->_logger.LogFatal("ANSODBase::UpdateDetectionThreshold", "Invalid detection score", __FILE__, __LINE__);
return false;
}
_modelConfig.detectionScoreThreshold = detectionScore;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::UpdateDetectionThreshold", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::SetParameters(const Params& param) {
try {
this->_params.ROI_Config.clear();
for (auto& cf : param.ROI_Config) {
this->_params.ROI_Config.push_back(cf);
}
this->_params.ROI_Options.clear();
for (auto& op : param.ROI_Options) {
this->_params.ROI_Options.push_back(op);
}
this->_params.Parameters.clear();
for (auto& par : param.Parameters) {
this->_params.Parameters.push_back(par);
}
this->_params.ROI_Values.clear();
for (auto& roi : param.ROI_Values) {
this->_params.ROI_Values.push_back(roi);
}
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODBase::SetParamaters", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSODBase::ConfigureParameters(Params& param) {
try {
// loop throught this->_params and set the values
param.ROI_Config.clear();
for (auto& cf : this->_params.ROI_Config) {
param.ROI_Config.push_back(cf);
}
param.ROI_Options.clear();
for (auto& op : this->_params.ROI_Options) {
param.ROI_Options.push_back(op);
}
param.Parameters.clear();
for (auto& par : this->_params.Parameters) {
param.Parameters.push_back(par);
}
param.ROI_Values.clear();
for (auto& roi : this->_params.ROI_Values) {
param.ROI_Values.push_back(roi);
}
return true;
}
catch (std::exception& e) {
return false;
}
}
//------------------------------------------------------------------------------------------------------------------------//
}