Files
ANSCORE/modules/ANSODEngine/ANSEngineCommon.cpp

3794 lines
118 KiB
C++

#pragma once
#include "ANSODEngine.h"
#include "ANSYOLOOD.h"
#include "ANSTENSORRTOD.h"
#include "ANSTENSORRTCL.h"
#include "ANSOPENVINOCL.h"
#include "ANSOPENVINOOD.h"
#include "ANSYOLOV10RTOD.h"
#include "ANSYOLOV10OVOD.h"
#include "ANSCUSTOMDetector.h"
#include "ANSFD.h"
#include "ANSANOMALIB.h"
#include "ANSPOSE.h"
#include "ANSSAM.h"
#include <pipelines/metadata.h>
#include <models/input_data.h>
#include "utils/visualizer.hpp"
#include <turbojpeg.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <openvino/openvino.hpp>
#include <models/image_model.h>
#include <models/super_resolution_model.h>
#include <models/jpeg_restoration_model.h>
#include <utils/ocv_common.hpp>
#include <pipelines/async_pipeline.h>
#include <hwinfo/PCIMapper.h>
#include <hwinfo/hwinfo.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <json.hpp>
//ANSENGINE_API void* AllocMemory(size_t size);
//ANSENGINE_API void FreeMemory_(void* ptr);
//#define FreeMemory(ptr) (FreeMemory_(*(ptr)), *(ptr)=0);
static const float lmheight = 112.;
static const float lmwidth = 96.;
// reference landmarks points in the unit square [0,1]x[0,1]
static const float ref_landmarks_normalized[] = {
30.2946f / lmwidth, 51.6963f / lmheight, 65.5318f / lmwidth, 51.5014f / lmheight, 48.0252f / lmwidth,
71.7366f / lmheight, 33.5493f / lmwidth, 92.3655f / lmheight, 62.7299f / lmwidth, 92.2041f / lmheight };
namespace ANSCENTER {
template <typename T> T GetData(const boost::property_tree::ptree& pt, const std::string& key)
{
T ret;
if (boost::optional<T> data = pt.get_optional<T>(key))
{
ret = data.get();
}
return ret;
}
// ANNHUB
void ANNHUBClassifier::ReLu(std::vector<double>& iVal, std::vector<double>& oVal)
{
int dim = iVal.size();
oVal.resize(dim);
for (int i = 0; i < dim; i++)
{
if (iVal[i] >= 0) oVal[i] = iVal[i];
else oVal[i] = 0;
}
}
void ANNHUBClassifier::LogSig(std::vector<double>& iVal, std::vector<double>& oVal)
{
int dim = iVal.size();
oVal.resize(dim);
for (int i = 0; i < dim; i++)
{
oVal[i] = 1 / (1 + exp(-iVal[i]));
}
}
void ANNHUBClassifier::TanSig(std::vector<double>& iVal, std::vector<double>& oVal)
{
int dim = iVal.size();
oVal.resize(dim);
for (int i = 0; i < dim; i++)
{
oVal[i] = 2 / (1 + exp(-2 * iVal[i])) - 1;
}
}
void ANNHUBClassifier::PureLin(std::vector<double>& iVal, std::vector<double>& oVal)
{
oVal = iVal;
}
void ANNHUBClassifier::SoftMax(std::vector<double>& iVal, std::vector<double>& oVal)
{
double softmaxWeight = 0;
int dim = iVal.size();
oVal.resize(dim);
if (dim == 1) {
oVal[0] = 1 / (1 + exp(-iVal[0]));
}
else {
for (int i = 0; i < dim; i++) {
softmaxWeight = softmaxWeight + exp(iVal[i]);
}
for (int i = 0; i < dim; i++) {
oVal[i] = exp(iVal[i]) / softmaxWeight;
}
}
}
void ANNHUBClassifier::ActivationFunction(std::vector<double>& iVal, std::vector<double>& oVal, int mode) {
switch (mode) {
case 0:
PureLin(iVal, oVal);
break;
case 1:
ReLu(iVal, oVal);
break;
case 2:
LogSig(iVal, oVal);
break;
case 3:
TanSig(iVal, oVal);
break;
case 4:
SoftMax(iVal, oVal);
break;
default:
TanSig(iVal, oVal);
break;
}
}
void ANNHUBClassifier::CheckLicense() {
try {
_licenseValid = true;
}
catch (std::exception& e) {
}
}
ANNHUBClassifier::ANNHUBClassifier()
{
// Control creation
isCreated = 0;
// Structrural parameters
nInputNodes = 1;
nHiddenNodes = 10;
nOutputNodes = 1;
hiddenActivation = 2; // default =2
outputActivation = 2; // default =2;
dataNormalisationModeInput = 0;
dataNormalisationModeOutput = 0;
}
ANNHUBClassifier::~ANNHUBClassifier()
{
Destroy();
}
void ANNHUBClassifier::Destroy()
{
if (isCreated != 0) {
FreeNeuralNetwork();
}
}
void ANNHUBClassifier::FreeNeuralNetwork()
{
try {
// Clear vectors
IW.clear();
LW.clear();
nInput.clear();
nOutput.clear();
Ib.clear();
Lb.clear();
xminInput.clear();
xmaxInput.clear();
xminOutput.clear();
xmaxOutput.clear();
// Reset the flag indicating whether the network is created
isCreated = 0;
}
catch (const std::exception& e) {
//this->_logger.LogFatal("ANNHUBAPI::FreeNeuralNetwork. Error:", e.what(), __FILE__, __LINE__);
}
}
void ANNHUBClassifier::PreProcessing(std::vector<double>& Input)
{
switch (dataNormalisationModeInput) {
case 0: // linear
break;
case 1: // mapminmax
for (int i = 0; i < nInputNodes; i++) {
if (xmaxInput[i] != xminInput[i]) {
Input[i] = (Input[i] - xminInput[i]) * (ymaxInput - yminInput) / (xmaxInput[i] - xminInput[i]) + yminInput;
}
}
break;
default:// minmaxmap
for (int i = 0; i < nInputNodes; i++) {
if (xmaxInput[i] != xminInput[i]) {
Input[i] = (Input[i] - xminInput[i]) * (ymaxInput - yminInput) / (xmaxInput[i] - xminInput[i]) + yminInput;
}
}
break;
}
}
void ANNHUBClassifier::PostProcessing(std::vector<double>& Output)
{
switch (dataNormalisationModeOutput) {
case 0: // linear
break;
case 1:
for (int i = 0; i < nOutputNodes; i++) {
if (ymaxOutput != yminOutput) {
Output[i] = (Output[i] - yminOutput) * (xmaxOutput[i] - xminOutput[i]) / (ymaxOutput - yminOutput) + xminOutput[i];
}
}
break;
default:
for (int i = 0; i < nOutputNodes; i++) {
if (ymaxOutput != yminOutput) {
Output[i] = (Output[i] - yminOutput) * (xmaxOutput[i] - xminOutput[i]) / (ymaxOutput - yminOutput) + xminOutput[i];
}
}
break;
}
}
int ANNHUBClassifier::ImportANNFromFile(std::string filename)
{
try {
FILE* fp;
int err = fopen_s(&fp, filename.c_str(), "r"); // r: Opens for reading.
if (err != 0) return -2;
float value, randNum;
//0. Check if it the correct type for C language
fscanf_s(fp, "%f", &value);
if (static_cast<int>(value) != 1) { fclose(fp); return -1; } // Assume that the type C is 0
//1. Load random number
fscanf_s(fp, "%f", &randNum);
//2. Structure (IDs)
int trainingEngine, hlAct, olAct, costFunct, prePro, postPro, evalModel;
fscanf_s(fp, "%f", &value); trainingEngine = static_cast<int>(value);
fscanf_s(fp, "%f", &value); hlAct = static_cast<int>(value);
fscanf_s(fp, "%f", &value); olAct = static_cast<int>(value);
fscanf_s(fp, "%f", &value); costFunct = static_cast<int>(value);
fscanf_s(fp, "%f", &value); prePro = static_cast<int>(value);
fscanf_s(fp, "%f", &value); postPro = static_cast<int>(value);
fscanf_s(fp, "%f", &value); evalModel = static_cast<int>(value);
//2.1 Activation function
hiddenActivation = hlAct - 10;
outputActivation = olAct - 10;
//2.2 Data Processing
dataNormalisationModeInput = prePro - 1000;
dataNormalisationModeOutput = postPro - 1000;
// 3. Load neural network structure and min max inputs value for pre-post processing
int ipNodes, hdNodes, opNodes;
fscanf_s(fp, "%f", &value); ipNodes = static_cast<int>(value);
fscanf_s(fp, "%f", &value); hdNodes = static_cast<int>(value);
fscanf_s(fp, "%f", &value); opNodes = static_cast<int>(value);
Create(ipNodes, hdNodes, opNodes);
// 4. Load Input-Hidden weights (extraction formula)
for (int j = 0; j < nInputNodes; j++)
{
for (int i = 0; i < nHiddenNodes; i++)
{
fscanf_s(fp, "%f", &value);
IW[i][j] = value - randNum;
}
}
// 4.1. Load bias Hidden weights
for (int i = 0; i < nHiddenNodes; i++)
{
fscanf_s(fp, "%f", &value);
Ib[i] = value - randNum;
}
// 4.2. Load Hidden_Output weights
for (int j = 0; j < nHiddenNodes; j++)
{
for (int i = 0; i < nOutputNodes; i++)
{
fscanf_s(fp, "%f", &value);
LW[i][j] = value - randNum;
}
}
// 4.3. Load bias Output weights
for (int i = 0; i < nOutputNodes; i++)
{
fscanf_s(fp, "%f", &value);
Lb[i] = value - randNum;
}
// 5. For Pre-processing
if (dataNormalisationModeInput >= 0) {
// Range settings
fscanf_s(fp, "%f", &value);
yminInput = value - randNum;
fscanf_s(fp, "%f", &value);
ymaxInput = value - randNum;
// Min and max
for (int i = 0; i < nInputNodes; i++) {
fscanf_s(fp, "%f", &value);
xminInput[i] = value - randNum;
}
for (int i = 0; i < nInputNodes; i++) {
fscanf_s(fp, "%f", &value);
xmaxInput[i] = value - randNum;
}
}
// 6. For Post-processing
if (dataNormalisationModeOutput >= 0) {
// Range settings
fscanf_s(fp, "%f", &value);
yminOutput = value - randNum;
fscanf_s(fp, "%f", &value);
ymaxOutput = value - randNum;
for (int i = 0; i < nOutputNodes; i++) {
fscanf_s(fp, "%f", &value);
xminOutput[i] = value - randNum;
}
for (int i = 0; i < nOutputNodes; i++) {
fscanf_s(fp, "%f", &value);
xmaxOutput[i] = value - randNum;
}
}
fclose(fp);
return 0;
}
catch (std::exception& e) {
//this->_logger.LogFatal("ANNHUBAPI::ImportANNFromFile. Error:", e.what(), __FILE__, __LINE__);
return -1;
}
}
void ANNHUBClassifier::Create(int inputNodes, int hiddenNodes, int outputNodes)
{
try {
if (isCreated != 0) {
FreeNeuralNetwork();
}
nInputNodes = inputNodes;
nHiddenNodes = hiddenNodes;
nOutputNodes = outputNodes;
nInput.resize(inputNodes);
nOutput.resize(outputNodes);
IW.resize(hiddenNodes, std::vector<double>(inputNodes));
LW.resize(outputNodes, std::vector<double>(hiddenNodes));
Ib.resize(hiddenNodes);
Lb.resize(outputNodes);
xminInput.resize(inputNodes);
xmaxInput.resize(inputNodes);
xminOutput.resize(outputNodes);
xmaxOutput.resize(outputNodes);
isCreated = 1;
}
catch (std::exception& e) {
//this->_logger.LogFatal("ANNHUBAPI::Create. Error:", e.what(), __FILE__, __LINE__);
}
}
bool ANNHUBClassifier::Init(std::string licenseKey, std::string modelFilePath) {
try {
_licenseKey = licenseKey;
_modelFilePath = modelFilePath;
CheckLicense();
if (_licenseValid) {
int result = ImportANNFromFile(_modelFilePath);
if (result == 0) return true;
else return false;
}
else {
return false;
}
}
catch (std::exception& e) {
//this->_logger.LogFatal("ANNHUBAPI::Init. Error:", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<double> ANNHUBClassifier::Inference(std::vector<double> ip) {
try {
int i, j;
std::vector<double> a1(nHiddenNodes), n1(nHiddenNodes), n2(nOutputNodes);
if (!_licenseValid) return {}; // Invalid license
if (isCreated == 0) return {};
// Need to check the input size as well, return {} if it fails
PreProcessing(ip);
//1. Calculate n1
for (i = 0; i < nHiddenNodes; i++) {
n1[i] = 0;
for (j = 0; j < nInputNodes; j++)
{
if (std::isnan(IW[i][j]) || std::isnan(ip[j])) {
std::cerr << "NaN detected: IW[" << i << "][" << j << "] = " << IW[i][j] << ", ip[" << j << "] = " << ip[j] << std::endl;
}
n1[i] = n1[i] + IW[i][j] * ip[j];
}
n1[i] = n1[i] + Ib[i];
}
ActivationFunction(n1, a1, hiddenActivation);
// 3. Calculate n2
for (i = 0; i < nOutputNodes; i++) {
n2[i] = 0;
for (j = 0; j < nHiddenNodes; j++) {
n2[i] = n2[i] + LW[i][j] * a1[j];
}
n2[i] = n2[i] + Lb[i];
}
ActivationFunction(n2, nOutput, outputActivation);
PostProcessing(nOutput);
return nOutput;
}
catch (std::exception& e) {
//this->_logger.LogFatal("ANNHUBAPI::Inference. Error:", e.what(), __FILE__, __LINE__);
return {};
}
}
// For multiple cameras
//MoveDetectsHandler::MoveDetectsHandler()
//{
// // Initialize shared parameters here if needed
//}
//MoveDetectsHandler::~MoveDetectsHandler() {
// for (auto& [key, cameraData] : cameras)
// {
// cameraData.clear(); // Clear each CameraData instance
// }
// cameras.clear(); // Clear the map itself
//}
//bool MoveDetectsHandler::empty(const std::string& camera_id) const
//{
// auto it = cameras.find(camera_id);
// return it == cameras.end() || it->second.control_frames.empty();
//}
//MoveDetectsHandler& MoveDetectsHandler::clear(const std::string& camera_id)
//{
// cameras[camera_id] = CameraData();
// return *this;
//}
//cv::Mat MoveDetectsHandler::simple_colour_balance(const cv::Mat& src)
//{
// if (src.empty() || src.channels() != 3)
// {
// return src;
// }
// const float full_percent = 1.0;
// const float half_percent = full_percent / 200.0f;
// std::vector<cv::Mat> tmpsplit;
// cv::split(src, tmpsplit);
// for (size_t idx = 0; idx < 3; idx++)
// {
// // find the low and high precentile values (based on the input percentile)
// cv::Mat flat;
// tmpsplit[idx].reshape(1, 1).copyTo(flat);
// cv::sort(flat, flat, cv::SORT_EVERY_ROW + cv::SORT_ASCENDING);
// const int lo = flat.at<uchar>(cvFloor(((float)flat.cols) * (0.0 + half_percent)));
// const int hi = flat.at<uchar>(cvCeil(((float)flat.cols) * (1.0 - half_percent)));
// // saturate below the low percentile and above the high percentile
// tmpsplit[idx].setTo(lo, tmpsplit[idx] < lo);
// tmpsplit[idx].setTo(hi, tmpsplit[idx] > hi);
// // scale the channel
// cv::normalize(tmpsplit[idx], tmpsplit[idx], 0, 255, cv::NORM_MINMAX);
// }
// cv::Mat output;
// cv::merge(tmpsplit, output);
// return output;
//}
//cv::Rect MoveDetectsHandler::BoundingBoxFromContour(std::vector<cv::Point> contour) {
// if (contour.size() <= 0) {
// cv::Rect empty;
// return empty;
// }
// if (cv::contourArea(contour) < 1000) {
// cv::Rect empty;
// return empty;
// }
// cv::Point minp(contour[0].x, contour[0].y),
// maxp(contour[0].x, contour[0].y);
// for (unsigned int i = 0; i < contour.size(); i++) {
// if (contour[i].x < minp.x) {
// minp.x = contour[i].x;
// }
// if (contour[i].y < minp.y) {
// minp.y = contour[i].y;
// }
// if (contour[i].x > maxp.x) {
// maxp.x = contour[i].x;
// }
// if (contour[i].y > maxp.y) {
// maxp.y = contour[i].y;
// }
// }
// cv::Rect box(minp, maxp);
// return box;
//}
//cv::Mat MoveDetectsHandler::getOutput(const std::string& camera_id) const
//{
// return cameras.at(camera_id).output;
//}
//const std::vector<std::vector<cv::Point>>& MoveDetectsHandler::getContours(const std::string& camera_id) const
//{
// return cameras.at(camera_id).contours;
//}
//std::vector<Object> MoveDetectsHandler::MovementDetect(const std::string& camera_id, cv::Mat& next_image) {
// return MovementDetect(camera_id, cameras[camera_id].next_frame_index, next_image);
//}
//double MoveDetectsHandler::psnr(const cv::Mat& src, const cv::Mat& dst)
//{
// if (src.empty() || dst.empty())
// {
// return 0.0;
// }
// if (src.type() != dst.type() ||
// src.cols != dst.cols ||
// src.rows != dst.rows)
// {
// return 0.0;
// }
// cv::Mat s1;
// cv::absdiff(src, dst, s1); // |src - dst|
// s1.convertTo(s1, CV_32F); // cannot make a square on 8 bits
// s1 = s1.mul(s1); // |src - dst|^2
// cv::Scalar s = sum(s1); // sum elements per channel
// const double sse = s.val[0] + s.val[1] + s.val[2]; // sum channels
// if (sse <= 1e-10) // for small values return zero
// {
// return 0.0;
// }
// const double mse = sse / static_cast<double>(src.channels() * src.total());
// const double psnr = 10.0 * std::log10((255 * 255) / mse);
// return psnr;
//}
//std::vector<Object> MoveDetectsHandler::MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image) {
// std::vector<Object> outputObjects;
// CameraData& camera = getCameraData(camera_id);
// camera.contours.clear();
// if (image.empty()) {
// return outputObjects;
// }
// // Set thumbnail size for faster comparison
// if (camera.thumbnail_size.area() <= 1) {
// camera.thumbnail_ratio = std::clamp(camera.thumbnail_ratio, 0.01, 1.0);
// camera.thumbnail_size.width = static_cast<int>(image.cols * camera.thumbnail_ratio);
// camera.thumbnail_size.height = static_cast<int>(image.rows * camera.thumbnail_ratio);
// }
// cv::Mat scb = image; //simple_colour_balance(image);
// cv::Mat thumbnail;
// cv::resize(scb, thumbnail, camera.thumbnail_size, 0, 0, cv::INTER_AREA);
// bool previous_movement_detected = camera.movement_detected;
// camera.movement_detected = false;
// for (auto iter = camera.control_frames.rbegin(); iter != camera.control_frames.rend(); ++iter) {
// const cv::Mat& control_image = iter->second;
// // Compute PSNR between control frame and current thumbnail
// camera.most_recent_psnr_score = psnr(control_image, thumbnail);
// if (camera.most_recent_psnr_score < camera.psnr_threshold) {
// camera.movement_detected = true;
// camera.movement_last_detected = std::chrono::high_resolution_clock::now();
// camera.frame_index_with_movement = frame_index;
// // Compute movement mask
// cv::Mat differences;
// cv::absdiff(control_image, thumbnail, differences);
// // This gives us a very tiny 3-channel image.
// // Now resize it to match the original image size.
// cv::Mat differences_resized;
// cv::resize(differences, differences_resized, image.size(), 0, 0, cv::INTER_CUBIC);
// // We'd like to generate a binary threshold, but that requires us to convert
// // the image to greyscale first.
// cv::Mat greyscale;
// cv::cvtColor(differences_resized, greyscale, cv::COLOR_BGR2GRAY);
// cv::Mat threshold;
// cv::threshold(greyscale, threshold, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU);
// // And finally we dilate + erode the results to combine regions.
// cv::Mat dilated;
// cv::dilate(threshold, dilated, cv::Mat(), cv::Point(-1, -1), 10);
// cv::Mat eroded;
// cv::erode(dilated, eroded, cv::Mat(), cv::Point(-1, -1), 10);
// // Store movement mask
// camera.mask = eroded.clone();
// break;
// }
// }
// // Detect transition states
// camera.transition_detected = (previous_movement_detected != camera.movement_detected);
// if (camera.mask.empty() || (camera.transition_detected && !camera.movement_detected)) {
// camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0));
// }
// // Clone image for output
// camera.output = image.clone();
// std::vector<cv::Vec4i> hierarchy;
// cv::findContours(camera.mask, camera.contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
// // Process each detected movement region
// for (const auto& contour : camera.contours) {
// double area = cv::contourArea(contour);
// //if (area < 1000) continue; // Filter small areas
// // Draw movement contours
// cv::polylines(camera.output, contour, true, cv::Scalar(0, 0, 255), camera.contours_size, camera.line_type);
// // Get bounding rectangle for each movement region
// cv::Rect boundingRect = cv::boundingRect(contour);
// // Ensure bounding box stays within image
// boundingRect.x = std::max(0, std::min(boundingRect.x, image.cols - 1));
// boundingRect.y = std::max(0, std::min(boundingRect.y, image.rows - 1));
// boundingRect.width = std::max(1, std::min(boundingRect.width, image.cols - boundingRect.x));
// boundingRect.height = std::max(1, std::min(boundingRect.height, image.rows - boundingRect.y));
// Object obj;
// obj.box = boundingRect;
// obj.classId = 0;
// obj.className = "movement";
// obj.confidence = 1.0;
// obj.mask = image(boundingRect).clone(); // Extract movement region
// int minObjectSize = std::min(boundingRect.width, boundingRect.height);
// int objectSize = boundingRect.width * boundingRect.height;
// // Ensure detected objects meet size criteria before adding
// if ((minObjectSize > 5) && (objectSize > 25)) {
// outputObjects.push_back(obj);
// }
// }
// // Update control frames (store key frames for PSNR comparison)
// if (frame_index >= camera.next_key_frame || camera.control_frames.size() < camera.number_of_control_frames) {
// camera.control_frames[frame_index] = thumbnail.clone();
// while (camera.control_frames.size() > camera.number_of_control_frames) {
// auto iter = camera.control_frames.begin();
// camera.control_frames.erase(iter);
// }
// camera.next_key_frame = frame_index + camera.key_frame_frequency;
// }
// camera.next_frame_index = frame_index + 1;
// return outputObjects;
//}
//
// Helper class
cv::Mat ANSCENTER::ANSUtilityHelper::JpegStringToMat(const std::string& jpegString) {
// Check if the input string is empty
if (jpegString.empty()) {
return cv::Mat(); // Return an empty Mat if the input string is empty
}
try {
// Convert the string to a vector of bytes
std::vector<uchar> jpegData(jpegString.begin(), jpegString.end());
// Decode the JPEG data into a cv::Mat
cv::Mat image = cv::imdecode(jpegData, cv::IMREAD_COLOR);
return image;
}
catch (const std::exception& e) {
std::cerr << "Error decoding JPEG string: " << e.what() << std::endl;
return cv::Mat(); // Return an empty Mat if decoding fails
}
}
std::vector<std::string> ANSCENTER::ANSUtilityHelper::Split(const std::string& s, char delimiter) {
std::vector<std::string> tokens;
std::string token;
std::istringstream tokenStream(s);
while (getline(tokenStream, token, delimiter)) {
tokens.push_back(token);
}
return tokens;
}
std::vector<cv::Point> ANSCENTER::ANSUtilityHelper::StringToPolygon(const std::string& input) {
std::vector<cv::Point> points;
size_t delimiterPos = input.find('|');
if (delimiterPos == std::string::npos) {
return points; // Return empty vector if format is incorrect
}
std::string xPart = input.substr(0, delimiterPos);
std::string yPart = input.substr(delimiterPos + 1);
std::vector<std::string> xTokens = ANSCENTER::ANSUtilityHelper::Split(xPart, ';');
std::vector<std::string> yTokens = ANSCENTER::ANSUtilityHelper::Split(yPart, ';');
if (xTokens.size() != yTokens.size()) {
return points; // Return empty if sizes don't match
}
for (size_t i = 0; i < xTokens.size(); ++i) {
int x = std::stoi(xTokens[i]);
int y = std::stoi(yTokens[i]);
points.emplace_back(x, y);
}
return points;
}
cv::Mat ANSCENTER::ANSUtilityHelper::CropPolygon(const cv::Mat& image, const std::vector<cv::Point>& polygon) {
// Check if the image is empty
if (image.empty()) {
return cv::Mat();
}
// Create a mask with the same size as the image, initially filled with 0 (black)
cv::Mat mask = cv::Mat::zeros(image.size(), CV_8UC1);
// Define the region of interest using the polygon points
const cv::Point* pts = (const cv::Point*)cv::Mat(polygon).data;
int npts = cv::Mat(polygon).rows;
// Draw the polygon on the mask
cv::fillPoly(mask, &pts, &npts, 1, cv::Scalar(255));
// Crop the image using the mask
cv::Mat croppedImage;
image.copyTo(croppedImage, mask);
return croppedImage;
}
cv::Mat ANSCENTER::ANSUtilityHelper::CropFromStringPolygon(const cv::Mat& image, const std::string& strPolygon) {
//1. Convert string to polygon
std::vector<cv::Point> polygon = ANSCENTER::ANSUtilityHelper::StringToPolygon(strPolygon);
// 2. To cropped image
cv::Mat croppedImage = ANSCENTER::ANSUtilityHelper::CropPolygon(image, polygon);
return croppedImage;
}
std::vector<cv::Point2f> ANSUtilityHelper::PolygonFromString(const std::string& str) {
std::vector<cv::Point2f> points;
if (str.empty()) {
return points;
}
std::stringstream ss(str);
std::string token;
while (std::getline(ss, token, ';')) {
std::stringstream pairStream(token);
std::string xStr, yStr;
if (std::getline(pairStream, xStr, ',') && std::getline(pairStream, yStr, ',')) {
try {
float x = std::stof(xStr);
float y = std::stof(yStr);
points.emplace_back(x, y);
}
catch (...) {
// Skip invalid entries
}
}
}
return points;
}
std::vector<float> ANSUtilityHelper::StringToKeypoints(const std::string& str) {
std::vector<float> keypoints;
if (str.empty()) {
return keypoints;
}
std::stringstream ss(str);
std::string token;
while (std::getline(ss, token, ';')) {
try {
keypoints.push_back(std::stof(token));
}
catch (...) {
// Skip invalid numbers
}
}
return keypoints;
}
std::vector<ANSCENTER::Object> ANSUtilityHelper::GetDetectionsFromString(const std::string& strDets) {
boost::property_tree::ptree root;
boost::property_tree::ptree trackingObjects;
boost::property_tree::ptree pt;
std::vector<ANSCENTER::Object> detectionObjects;
//1. Get input
std::stringstream ss;
ss << strDets;
boost::property_tree::read_json(ss, pt);
//2. Parsing
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
{
const boost::property_tree::ptree& result = child.second;
const auto class_id = GetData<int>(result, "class_id");
const auto track_id = GetData<int>(result, "track_id");
const auto class_name = GetData<std::string>(result, "class_name");
const auto prob = GetData<float>(result, "prob");
const auto x = GetData<float>(result, "x");
const auto y = GetData<float>(result, "y");
const auto width = GetData<float>(result, "width");
const auto height = GetData<float>(result, "height");
const auto mask = GetData<std::string>(result, "mask");
const auto extra_info = GetData<std::string>(result, "extra_info");
const auto camera_id = GetData<std::string>(result, "camera_id");
const auto polygon = GetData<std::string>(result, "polygon");
const auto kps = GetData<std::string>(result, "kps");
//3. Create object
ANSCENTER::Object temp;
temp.classId = class_id;
temp.trackId = track_id;
temp.className = class_name;
temp.confidence = prob;
temp.box.x = x;
temp.box.y = y;
temp.box.width = width;
temp.box.height = height;
temp.extraInfo = extra_info;
temp.cameraId = camera_id;
temp.mask = ANSCENTER::ANSUtilityHelper::JpegStringToMat(mask);
temp.polygon = ANSCENTER::ANSUtilityHelper::PolygonFromString(polygon);
temp.kps = ANSCENTER::ANSUtilityHelper::StringToKeypoints(kps);
detectionObjects.push_back(temp);
}
return detectionObjects;
}
std::vector<cv::Rect> ANSCENTER::ANSUtilityHelper::GetBoundingBoxesFromString(std::string strBBoxes) {
std::vector<cv::Rect> bBoxes;
bBoxes.clear();
std::stringstream ss;
ss << strBBoxes;
boost::property_tree::ptree pt;
boost::property_tree::read_json(ss, pt);
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
{
const boost::property_tree::ptree& result = child.second;
const auto x = GetData<float>(result, "x");
const auto y = GetData<float>(result, "y");
const auto width = GetData<float>(result, "width");
const auto height = GetData<float>(result, "height");
cv::Rect rectTemp;
rectTemp.x = x;
rectTemp.y = y;
rectTemp.width = width;
rectTemp.height = height;
bBoxes.push_back(rectTemp);
}
return bBoxes;
}
ANSCENTER::Params ANSCENTER::ANSUtilityHelper::ParseCustomParameters(const std::string& jsonStr) {
Params params;
std::stringstream ss(jsonStr);
boost::property_tree::ptree root;
read_json(ss, root);
for (auto& item : root.get_child("ROI_Config")) {
const auto& pt = item.second;
ROIConfig cfg;
cfg.Rectangle = GetData<bool>(pt, "Rectangle");
cfg.Polygon = GetData<bool>(pt, "Polygon");
cfg.Line = GetData<bool>(pt, "Line");
cfg.MinItems = GetData<int>(pt, "MinItems");
cfg.MaxItems = GetData<int>(pt, "MaxItems");
cfg.Name = GetData<std::string>(pt, "Name");
cfg.ROIMatch = GetData<std::string>(pt, "ROI-Match");
params.ROI_Config.push_back(cfg);
}
for (auto& item : root.get_child("ROI_Options")) {
params.ROI_Options.push_back(item.second.get_value<std::string>());
}
for (auto& item : root.get_child("Parameters")) {
const auto& pt = item.second;
Parameter p;
p.Name = GetData<std::string>(pt, "Name");
p.DataType = GetData<std::string>(pt, "DataType");
p.NoOfDecimals = GetData<int>(pt, "NoOfdecimals");
p.MaxValue = GetData<int>(pt, "MaxValue");
p.MinValue = GetData<int>(pt, "MinValue");
p.StartValue = GetData<std::string>(pt, "StartValue");
p.DefaultValue = GetData<std::string>(pt, "DefaultValue");
p.Value = GetData<std::string>(pt, "Value");
for (const auto& li : pt.get_child("ListItems")) {
p.ListItems.push_back(li.second.get_value<std::string>());
}
params.Parameters.push_back(p);
}
for (auto& item : root.get_child("ROI_Values")) {
const auto& pt = item.second;
ROIValue rv;
rv.ROIMatch = GetData<std::string>(pt, "ROI-Match");
rv.Option = GetData<std::string>(pt, "Option");
rv.Name = GetData<std::string>(pt, "Name");
rv.OriginalImageSize = GetData<int>(pt, "OriginalImageSize");
for (const auto& pointNode : pt.get_child("ROIPoints")) {
Point ptObj;
ptObj.x = GetData<int>(pointNode.second, "x");
ptObj.y = GetData<int>(pointNode.second, "y");
rv.ROIPoints.push_back(ptObj);
}
params.ROI_Values.push_back(rv);
}
return params;
}
bool ANSCENTER::ANSUtilityHelper::ParseActiveROIMode(const std::string activeROIMode, int& mode, double &detectionScore,std::vector<int>& trackingObjectIds) {
// Check if the input string is empty
if (activeROIMode.empty()) {
mode = 0;
trackingObjectIds.clear();
return true;
}
try
{
std::istringstream iss(activeROIMode);
boost::property_tree::ptree pt;
boost::property_tree::read_json(iss, pt);
// Parse Mode using the helper
mode = GetData<int>(pt, "Mode");
// DetectionScore
detectionScore = GetData<double>(pt, "Score");
// Parse ObjectIds manually
trackingObjectIds.clear();
const auto& idsNode = pt.get_child("ObjectIds");
for (const auto& item : idsNode)
{
trackingObjectIds.push_back(item.second.get_value<int>());
}
return true;
}
catch (const std::exception& ex)
{
// Handle malformed JSON or missing fields
mode = -1;
trackingObjectIds.clear();
return false;
}
}
std::string ANSCENTER::ANSUtilityHelper::SerializeCustomParamters(const ANSCENTER::Params& params) {
boost::property_tree::ptree root;
boost::property_tree::ptree roiConfig, roiOptions, parameters, roiValues;
// ROI_Config
for (const auto& cfg : params.ROI_Config) {
boost::property_tree::ptree pt;
pt.put("Rectangle", cfg.Rectangle); // convert to string if needed
pt.put("Polygon", cfg.Polygon);
pt.put("Line", cfg.Line);
pt.put("MinItems", cfg.MinItems);
pt.put("MaxItems", cfg.MaxItems);
pt.put("Name", cfg.Name);
pt.put("ROI-Match", cfg.ROIMatch);
roiConfig.push_back(std::make_pair("", pt));
}
root.add_child("ROI_Config", roiConfig);
// ROI_Options
for (const auto& opt : params.ROI_Options) {
boost::property_tree::ptree optNode;
optNode.put("", opt);
roiOptions.push_back(std::make_pair("", optNode));
}
root.add_child("ROI_Options", roiOptions);
// Parameters
for (const auto& param : params.Parameters) {
boost::property_tree::ptree pt;
pt.put("Name", param.Name);
pt.put("DataType", param.DataType);
pt.put("NoOfdecimals", param.NoOfDecimals);
pt.put("MaxValue", param.MaxValue);
pt.put("MinValue", param.MinValue);
pt.put("StartValue", param.StartValue);
boost::property_tree::ptree listItemsNode;
for (const auto& item : param.ListItems) {
boost::property_tree::ptree itemNode;
itemNode.put("", item);
listItemsNode.push_back(std::make_pair("", itemNode));
}
pt.add_child("ListItems", listItemsNode);
pt.put("DefaultValue", param.DefaultValue);
pt.put("Value", param.Value);
parameters.push_back(std::make_pair("", pt));
}
root.add_child("Parameters", parameters);
// ROI_Values
for (const auto& rv : params.ROI_Values) {
boost::property_tree::ptree pt;
pt.put("ROI-Match", rv.ROIMatch);
boost::property_tree::ptree pointsNode;
for (const auto& point : rv.ROIPoints) {
boost::property_tree::ptree pointNode;
pointNode.put("x", point.x);
pointNode.put("y", point.y);
pointsNode.push_back(std::make_pair("", pointNode));
}
pt.add_child("ROIPoints", pointsNode);
pt.put("Option", rv.Option);
pt.put("Name", rv.Name);
pt.put("OriginalImageSize", rv.OriginalImageSize);
roiValues.push_back(std::make_pair("", pt));
}
root.add_child("ROI_Values", roiValues);
// Serialize to JSON
std::ostringstream oss;
boost::property_tree::write_json(oss, root, false);
std::string jsonString = oss.str();
// Remove whitespace characters
jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\n'), jsonString.end());
jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\r'), jsonString.end());
jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\t'), jsonString.end());
jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), ' '), jsonString.end());
return jsonString;
}
cv::Rect ANSCENTER::ANSUtilityHelper::GetBoundingBoxFromPolygon(const std::vector<cv::Point>& polygon) {
if (polygon.empty()) {
return cv::Rect(); // Return an empty rectangle if the polygon has no points
}
// Compute the bounding rectangle
cv::Rect boundingBox = cv::boundingRect(polygon);
return boundingBox;
}
std::string ANSCENTER::ANSUtilityHelper::VectorDetectionToJsonString(const std::vector<Object>& dets)
{
if (dets.empty()) {
return R"({"results":[]})";
}
try {
nlohmann::json root;
auto& results = root["results"] = nlohmann::json::array();
for (const auto& det : dets) {
results.push_back({
{"class_id", std::to_string(det.classId)},
//{"track_id", std::to_string(det.trackId)},
{"track_id", "0"},
{"class_name", det.className},
{"prob", std::to_string(det.confidence)},
{"x", std::to_string(det.box.x)},
{"y", std::to_string(det.box.y)},
{"width", std::to_string(det.box.width)},
{"height", std::to_string(det.box.height)},
{"mask", ""}, // TODO: convert masks to comma separated string
{"extra_info",det.extraInfo},
{"camera_id", det.cameraId},
{"polygon", PolygonToString(det.polygon)},
{"kps", KeypointsToString(det.kps)}
});
}
return root.dump();
}
catch (const std::exception& e) {
// Add your error logging here if needed
return R"({"results":[],"error":"Serialization failed"})";
}
}
cv::Mat ANSCENTER::ANSUtilityHelper::ReadImagePath(const std::string& imagePath) {
return cv::imread(imagePath);
}
std::vector<unsigned char> ANSCENTER::ANSUtilityHelper::DecodeBase64(const std::string& base64) {
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
std::vector<unsigned char> result;
int i = 0, j = 0, in = 0;
unsigned char char_array_4[4], char_array_3[3];
for (const auto& c : base64) {
if (c == '=') break;
if (isspace(c)) continue; // ignore whitespace
if (c == '+' || c == '/') {
char_array_4[i++] = c;
}
else if (isalnum(c)) {
char_array_4[i++] = c;
}
if (i == 4) {
for (i = 0; i < 4; i++) {
char_array_4[i] = base64_chars.find(char_array_4[i]);
}
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0x0f) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x03) << 6) + char_array_4[3];
for (i = 0; i < 3; i++) {
result.push_back(char_array_3[i]);
}
i = 0;
}
}
if (i) {
for (j = i; j < 4; j++) {
char_array_4[j] = 0;
}
for (j = 0; j < 4; j++) {
char_array_4[j] = base64_chars.find(char_array_4[j]);
}
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0x0f) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x03) << 6) + char_array_4[3];
for (j = 0; j < i - 1; j++) {
result.push_back(char_array_3[j]);
}
}
return result;
}
cv::Mat ANSCENTER::ANSUtilityHelper::ReadImageStreamBase64(const std::string& imageStreamBase64) {
std::vector<unsigned char> decoded_data = DecodeBase64(imageStreamBase64);
return cv::imdecode(decoded_data, cv::IMREAD_COLOR);
}
cv::Mat ANSCENTER::ANSUtilityHelper::FormatToSquare(const cv::Mat& source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
unsigned char* ANSCENTER::ANSUtilityHelper::CVMatToBytes(cv::Mat image, unsigned int& bufferLengh)
{
int size = image.total() * image.elemSize();
unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later
std::memcpy(bytes, image.data, size * sizeof(unsigned char));
bufferLengh = size * sizeof(unsigned char);
return bytes;
}
std::vector<std::string> ANSCENTER::ANSUtilityHelper::GetConfigFileContent(std::string modelConfigFile, ModelType& modelType, std::vector<int>& inputShape) {
/* {
"engine_type": "NVIDIA_GPU",
"detection_type" : "DETECTION",
"model_type" : "YOLOV8",
"input_size" : [640, 640] ,
"labels" : ["person", "bicycle", "car", "motorcycle"]
}*/
boost::property_tree::ptree pt;
std::vector <std::string> labels;
boost::property_tree::read_json(modelConfigFile, pt);
// 0.Get labels vector first
for (auto& item : pt.get_child("labels")) {
labels.push_back(item.second.get_value<std::string>());
}
inputShape.clear();
for (auto& itemInput : pt.get_child("input_size")) {
inputShape.push_back(itemInput.second.get_value<int>());
}
// TENSORFLOW = 0,
// YOLOV4 = 1,
// YOLOV5 = 2,
// YOLOV8 = 3,
// TENSORRT = 4,
// OPENVINO = 5
// FACEDETECT = 6,
// FACERECOGNIZE = 7,
// ALPR = 8,
// OCR = 9,
// ANOMALIB = 10,
// POSE = 11,
// SAM = 12,
// ODHUBMODEL = 13,
// YOLOV10RTOD = 14, // TensorRT Object Detection for Yolov10
// YOLOV10OVOD = 15, // OpenVINO Object Detection for Yolov10
// 1. Get engine type
//auto it = pt.get_child("engine_type");
//std::string value = it.get_value<std::string>();
std::string stModel = GetData<std::string>(pt, "model_type");
if (stModel == "TENSORFLOW") {
modelType = ModelType::TENSORFLOW;
std::cout << "Model Type: Tensorflow" << std::endl;
}
else if (stModel == "YOLOV4") {
modelType = ModelType::YOLOV4;
std::cout << "Model Type: yolov4" << std::endl;
}
else if (stModel == "YOLOV5") {
modelType = ModelType::YOLOV5;
std::cout << "Model Type: yolov5" << std::endl;
}
else if (stModel == "YOLOV8") {
modelType = ModelType::YOLOV8;
std::cout << "Model Type: yolov8" << std::endl;
}
else if (stModel == "TENSORRT") {
modelType = ModelType::TENSORRT;
std::cout << "Model Type: tensorrt" << std::endl;
}
else if (stModel == "OPENVINO") {
modelType = ModelType::OPENVINO;
std::cout << "Model Type: openvino" << std::endl;
}
else if (stModel == "FACEDETECT") {
modelType = ModelType::FACEDETECT;
std::cout << "Model Type: face detection" << std::endl;
}
else if (stModel == "FACERECOGNIZE") {
modelType = ModelType::FACERECOGNIZE;
std::cout << "Model Type: face recogniser" << std::endl;
}
else if (stModel == "ALPR") {
modelType = ModelType::ALPR;
std::cout << "Model Type: license plate recognition" << std::endl;
}
else if (stModel == "OCR") {
modelType = ModelType::OCR;
std::cout << "Model Type: ocr" << std::endl;
}
else if (stModel == "ANOMALIB") {
modelType = ModelType::ANOMALIB;
std::cout << "Model Type: anomalib" << std::endl;
}
else if (stModel == "POSE") {
modelType = ModelType::POSE;
std::cout << "Model Type: pose" << std::endl;
}
else if (stModel == "SAM") {
modelType = ModelType::SAM;
std::cout << "Model Type: sam" << std::endl;
}
else if (stModel == "ODHUBMODEL") {
modelType = ModelType::ODHUBMODEL;
std::cout << "Model Type: odhub" << std::endl;
}
else if (stModel == "YOLOV10RTOD") {
modelType = ModelType::YOLOV10RTOD;
std::cout << "Model Type: yolov10rt" << std::endl;
}
else if (stModel == "YOLOV10OVOD") {
modelType = ModelType::YOLOV10OVOD;
std::cout << "Model Type: yolov10ov" << std::endl;
}
else if (stModel == "CUSTOMDETECTOR") {
modelType = ModelType::CUSTOMDETECTOR;
std::cout << "Model Type: custom detector" << std::endl;
}
else {
modelType = ModelType::YOLOV8;
}
return labels;
}
MetaData ANSCENTER::ANSUtilityHelper::GetJson(const std::string& jsonPath) {
if (FileExist(jsonPath)) {
MetaData metaData;
std::vector<float> _mean;
std::vector<float> _std;
std::vector<std::string> _transforms;
boost::property_tree::ptree pt, ptTransform;
boost::property_tree::read_json(jsonPath, pt);
auto itImageThreshold = pt.get_child("image_threshold");
auto itPixelThreshold = pt.get_child("pixel_threshold");
auto itMin = pt.get_child("min");
auto itMax = pt.get_child("max");
float image_threshold = itImageThreshold.get_value<float>();
float pixel_threshold = itPixelThreshold.get_value<float>();
float min = itMin.get_value<float>();
float max = itMax.get_value<float>();
auto rootNode = pt.get_child("transform");
auto childNode1 = rootNode.get_child("transform");
auto childNode2 = childNode1.get_child("transforms");
auto itHeight = childNode2.begin()->second.get_child("height");
auto itWidth = childNode2.begin()->second.get_child("width");
int infer_height = itHeight.get_value<int>();
int infer_width = itWidth.get_value<int>();
_mean.clear();
_std.clear();
std::string _classFullName;
for (auto& itemInput : childNode2) {
auto itClassFullName = itemInput.second.get_child("__class_fullname__");
_classFullName = itClassFullName.get_value<std::string>();
std::cout << "Class Full Name:" << _classFullName << std::endl;
if (_classFullName == "Normalize") {
_mean.clear();
for (auto& item : itemInput.second.get_child("mean")) {
_mean.push_back(item.second.get_value<float>());
}
_std.clear();
for (auto& item : itemInput.second.get_child("std")) {
_std.push_back(item.second.get_value<float>());
}
}
}
metaData.imageThreshold = image_threshold;
metaData.pixelThreshold = pixel_threshold;
metaData.min = min;
metaData.max = max;
metaData.inferSize[0] = infer_height;
metaData.inferSize[1] = infer_width;
metaData._mean = _mean;
metaData._std = _std;
return metaData;
}
else {
MetaData metaData;
metaData.imageThreshold = 15;
metaData.pixelThreshold = 12;
metaData.min = 0;
metaData.max = 50;
metaData.inferSize[0] = 256;
metaData.inferSize[1] = 256;
metaData._mean = { 0.485,0.456,0.406 };
metaData._std = { 0.229 ,0.224 ,0.225 };
return metaData;
}
}
cv::Mat ANSCENTER::ANSUtilityHelper::Resize(const cv::Mat& src, int dst_height, int dst_width,
const std::string& interpolation) {
cv::Mat dst(dst_height, dst_width, src.type());
if (interpolation == "bilinear") {
cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_LINEAR);
}
else if (interpolation == "nearest") {
cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_NEAREST);
}
else if (interpolation == "area") {
cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_AREA);
}
else if (interpolation == "bicubic") {
cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_CUBIC);
}
else if (interpolation == "lanczos") {
cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_LANCZOS4);
}
else {
assert(0);
}
return dst;
}
cv::Mat ANSCENTER::ANSUtilityHelper::Crop(const cv::Mat& src, int top, int left, int bottom, int right) {
return src(cv::Range(top, bottom + 1), cv::Range(left, right + 1)).clone();
}
cv::Mat ANSCENTER::ANSUtilityHelper::Divide(const cv::Mat& src, float divide) {
cv::Mat dst;
src.convertTo(dst, CV_32FC3, 1.0 / divide);
return dst;
}
cv::Mat ANSCENTER::ANSUtilityHelper::Normalize(cv::Mat& src, const std::vector<float>& mean, const std::vector<float>& std,
bool to_rgb, bool inplace) {
assert(src.channels() == mean.size());
assert(mean.size() == std.size());
cv::Mat dst;
if (src.depth() == CV_32F) {
dst = inplace ? src : src.clone();
}
else {
src.convertTo(dst, CV_32FC(src.channels()));
}
if (to_rgb && dst.channels() == 3) {
cv::cvtColor(dst, dst, cv::ColorConversionCodes::COLOR_BGR2RGB);
}
auto _mean = mean;
auto _std = std;
for (auto i = mean.size(); i < 3; ++i) {
_mean.push_back(0.);
_std.push_back(1.0);
}
cv::Scalar mean_scalar(_mean[0], _mean[1], _mean[2]);
cv::Scalar std_scalar(1.0 / _std[0], 1.0 / _std[1], 1.0 / _std[2]);
cv::subtract(dst, mean_scalar, dst);
cv::multiply(dst, std_scalar, dst);
return dst;
}
cv::Mat ANSCENTER::ANSUtilityHelper::Transpose(const cv::Mat& src) {
cv::Mat _src{ src.rows * src.cols, src.channels(), CV_MAKETYPE(src.depth(), 1), src.data };
cv::Mat _dst;
cv::transpose(_src, _dst);
return _dst;
}
cv::Mat ANSCENTER::ANSUtilityHelper::Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type,
float val) {
cv::Mat dst;
cv::Scalar scalar = { val, val, val, val };
cv::copyMakeBorder(src, dst, top, bottom, left, right, border_type, scalar);
return dst;
}
cv::Mat ANSUtilityHelper::MeanAxis0(const cv::Mat& src) {
int num = src.rows;
int dim = src.cols;
cv::Mat output(1, dim, CV_32F);
for (int i = 0; i < dim; i++) {
float sum = 0;
for (int j = 0; j < num; j++) {
sum += src.at<float>(j, i);
}
output.at<float>(0, i) = sum / num;
}
return output;
}
cv::Mat ANSUtilityHelper::ElementwiseMinus(const cv::Mat& A, const cv::Mat& B) {
cv::Mat output(A.rows, A.cols, A.type());
assert(B.cols == A.cols);
if (B.cols == A.cols) {
for (int i = 0; i < A.rows; i++) {
for (int j = 0; j < B.cols; j++) {
output.at<float>(i, j) = A.at<float>(i, j) - B.at<float>(0, j);
}
}
}
return output;
}
cv::Mat ANSUtilityHelper::VarAxis0(const cv::Mat& src) {
cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src));
cv::multiply(temp_, temp_, temp_);
return MeanAxis0(temp_);
}
int ANSUtilityHelper::MatrixRank(cv::Mat M) {
cv::Mat w, u, vt;
cv::SVD::compute(M, w, u, vt);
cv::Mat1b non_zero_singular_values = w > 0.0001;
int rank = countNonZero(non_zero_singular_values);
return rank;
}
cv::Mat ANSUtilityHelper::SimilarTransform(cv::Mat& dst, cv::Mat& src) {
int num = dst.rows;
int dim = dst.cols;
cv::Mat src_mean = MeanAxis0(dst);
cv::Mat dst_mean = MeanAxis0(src);
cv::Mat src_demean = ElementwiseMinus(dst, src_mean);
cv::Mat dst_demean = ElementwiseMinus(src, dst_mean);
cv::Mat A = (dst_demean.t() * src_demean) / static_cast<float>(num);
cv::Mat d(dim, 1, CV_32F);
d.setTo(1.0f);
if (cv::determinant(A) < 0) {
d.at<float>(dim - 1, 0) = -1;
}
cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F);
cv::Mat U, S, V;
cv::SVD::compute(A, S, U, V);
int rank = MatrixRank(A);
if (rank == 0) {
assert(rank == 0);
}
else if (rank == dim - 1) {
if (cv::determinant(U) * cv::determinant(V) > 0) {
T.rowRange(0, dim).colRange(0, dim) = U * V;
}
else {
int s = d.at<float>(dim - 1, 0) = -1;
d.at<float>(dim - 1, 0) = -1;
T.rowRange(0, dim).colRange(0, dim) = U * V;
cv::Mat diag_ = cv::Mat::diag(d);
cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T)
cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1);
cv::Mat C = B.diag(0);
T.rowRange(0, dim).colRange(0, dim) = U * twp;
d.at<float>(dim - 1, 0) = s;
}
}
else {
cv::Mat diag_ = cv::Mat::diag(d);
cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T)
cv::Mat res = U * twp; // U
T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp;
}
cv::Mat var_ = VarAxis0(src_demean);
float val = cv::sum(var_).val[0];
cv::Mat res;
cv::multiply(d, S, res);
float scale = 1.0 / val * cv::sum(res).val[0];
T.rowRange(0, dim).colRange(0, dim) =
-T.rowRange(0, dim).colRange(0, dim).t();
cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim]
cv::Mat temp2 = src_mean.t(); // src_mean.T
cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T)
cv::Mat temp4 = scale * temp3;
T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t());
T.rowRange(0, dim).colRange(0, dim) *= scale;
return T;
}
std::vector<cv::Mat> ANSUtilityHelper::AlignFaceWithFivePoints(const cv::Mat& image,
const std::vector<std::array<int, 4>> boxes,
std::vector<std::array<float, 2>> landmarks) {
std::vector<cv::Mat> output_images;
output_images.reserve(boxes.size());
if (image.empty() || !image.data || !image.u) return output_images;
else {
std::vector<std::array<float, 2>> std_landmarks = { {38.2946f, 51.6963f},
{73.5318f, 51.5014f},
{56.0252f, 71.7366f},
{41.5493f, 92.3655f},
{70.7299f, 92.2041f}
};
if (boxes.empty())return output_images;
cv::Mat src(5, 2, CV_32FC1, std_landmarks.data());
for (int i = 0; i < landmarks.size(); i += 5) {
cv::Mat dst(5, 2, CV_32FC1, landmarks.data() + i);
cv::Mat m = SimilarTransform(dst, src);
cv::Mat map_matrix;
cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2);
cv::Mat(m, map_matrix_r).copyTo(map_matrix);
cv::Mat cropped_image_aligned;
int outputSize = 112;// MAX(width, height);
cv::warpAffine(image, cropped_image_aligned, map_matrix, { outputSize,outputSize });
if (cropped_image_aligned.empty()) {
std::cout << "croppedImageAligned is empty." << std::endl;
}
output_images.emplace_back(cropped_image_aligned);
}
return output_images;
}
}
std::vector<cv::Mat>ANSUtilityHelper::GetCroppedFaces(const cv::Mat& image, const std::vector<std::array<int, 4>> boxes) {
if (image.empty() || !image.data || !image.u) return std::vector<cv::Mat>();
else {
cv::Mat frame = image.clone();
std::vector<cv::Mat> croppedFaces;
if (boxes.size() > 0) {
for (int i = 0; i < boxes.size(); i++) {
int x1, y1, x2, y2;
x1 = boxes[i][0];
y1 = boxes[i][1];
x2 = boxes[i][2];
y2 = boxes[i][3];
cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2));
cv::Mat tempCrop = frame(facePos);
croppedFaces.push_back(tempCrop);
}
}
frame.release();
return croppedFaces;
}
}
cv::Mat ANSUtilityHelper::GetCroppedFace(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2) {
if (image.empty() || !image.data || !image.u) return cv::Mat();
else {
cv::Mat frame = image.clone();
cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2));
cv::Mat tempCrop = frame(facePos);
frame.release();
return tempCrop;
}
}
cv::Mat ANSUtilityHelper::GetCroppedFaceScale(const cv::Mat& image,
const int x1, const int y1,
const int x2, const int y2,
int croppedImageSize)
{
cv::Mat resizedImage;
cv::Mat frame = image.clone();
try {
if (image.empty() || !image.data) return resizedImage;
// 1. Define the bounding rectangle for cropping
cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2));
// Ensure the rectangle has valid dimensions
if (facePos.width <= 0 || facePos.height <= 0) return resizedImage;
// 2. Validate crop size; minimum 112x112 for OpenVINO FaceNet model
croppedImageSize = std::max(croppedImageSize, 112);
// 3. Apply Gaussian blur to the cropped region for smoother output
cv::Mat tempCrop = frame(facePos); // Use clone to avoid modifying the input image
//cv::GaussianBlur(tempCrop, tempCrop, cv::Size(3, 3), 0);
// 4. Resize the cropped face to the required size
cv::resize(tempCrop, resizedImage, cv::Size(croppedImageSize, croppedImageSize), 0, 0, cv::INTER_LANCZOS4);
tempCrop.release();
}
catch (const std::exception& e) {
std::cerr << "Error in GetCroppedFaceScale: " << e.what() << std::endl;
}
frame.release();
return resizedImage;
}
cv::Mat ANSUtilityHelper::GetTransform(cv::Mat* src, cv::Mat* dst) {
cv::Mat col_mean_src;
reduce(*src, col_mean_src, 0, cv::REDUCE_AVG);
for (int i = 0; i < src->rows; i++) {
src->row(i) -= col_mean_src;
}
cv::Mat col_mean_dst;
reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG);
for (int i = 0; i < dst->rows; i++) {
dst->row(i) -= col_mean_dst;
}
cv::Scalar mean, dev_src, dev_dst;
cv::meanStdDev(*src, mean, dev_src);
dev_src(0) =
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_src(0));
*src /= dev_src(0);
cv::meanStdDev(*dst, mean, dev_dst);
dev_dst(0) =
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_dst(0));
*dst /= dev_dst(0);
cv::Mat w, u, vt;
cv::SVD::compute((*src).t() * (*dst), w, u, vt);
cv::Mat r = (u * vt).t();
cv::Mat m(2, 3, CV_32F);
m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0));
m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t());
return m;
}
void ANSUtilityHelper::AlignFacesExt(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
if (landmarks_vec->size() == 0) {
return;
}
CV_Assert(face_images->size() == landmarks_vec->size());
cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
for (size_t j = 0; j < face_images->size(); j++) {
for (int i = 0; i < ref_landmarks.rows; i++) {
ref_landmarks.at<float>(i, 0) =
ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
ref_landmarks.at<float>(i, 1) =
ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
}
cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
cv::warpAffine(face_images->at(j), face_images->at(j), m, { 112,112 }, cv::WARP_INVERSE_MAP);
}
}
void ANSUtilityHelper::AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
if (landmarks_vec->size() == 0) {
return;
}
CV_Assert(face_images->size() == landmarks_vec->size());
cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
for (size_t j = 0; j < face_images->size(); j++) {
for (int i = 0; i < ref_landmarks.rows; i++) {
ref_landmarks.at<float>(i, 0) =
ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
ref_landmarks.at<float>(i, 1) =
ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
}
cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
cv::warpAffine(face_images->at(j), face_images->at(j), m, face_images->at(j).size(), cv::WARP_INVERSE_MAP);
cv::resize(face_images->at(j), face_images->at(j), cv::Size(112, 112));
}
}
std::pair<cv::Mat, cv::Mat> ANSUtilityHelper::AlignFacesSCRFD(
const cv::Mat& input_mat,
const std::vector<cv::Point2f>& face_landmark_5)
{
// Sanity check
if (face_landmark_5.size() != 5) {
throw std::runtime_error(
"AlignFacesSCRFD: Expected 5 landmarks, got " +
std::to_string(face_landmark_5.size()));
}
// ---------------------------------------------------------------------
// Precomputed ArcFace template landmarks scaled to 112x112
// ---------------------------------------------------------------------
static const std::vector<cv::Point2f> kTemplate112 = []() {
// ArcFace template landmarks (normalized to [0, 1])
const std::vector<cv::Point2f> face_template = {
{0.34191607f, 0.46157411f}, // left eye
{0.65653393f, 0.45983393f}, // right eye
{0.50022500f, 0.64050536f}, // nose tip
{0.37097589f, 0.82469196f}, // left mouth
{0.63151696f, 0.82325089f} // right mouth
};
std::vector<cv::Point2f> tpl;
tpl.reserve(face_template.size());
for (const auto& pt : face_template) {
tpl.emplace_back(pt.x * 112.0f, pt.y * 112.0f);
}
return tpl;
}();
// ---------------------------------------------------------------------
// Estimate similarity transform from input landmarks to template.
// With exactly 5 point correspondences, RANSAC is unnecessary —
// there are no outliers to reject. Use default method (least-squares).
// ---------------------------------------------------------------------
cv::Mat affine_matrix = cv::estimateAffinePartial2D(
face_landmark_5, // src points
kTemplate112 // dst points (precomputed)
);
if (affine_matrix.empty()) {
throw std::runtime_error(
"AlignFacesSCRFD: Failed to estimate affine transformation");
}
// ---------------------------------------------------------------------
// Apply affine warp to align face to 112x112
// ---------------------------------------------------------------------
cv::Mat aligned_face;
cv::warpAffine(
input_mat,
aligned_face,
affine_matrix,
cv::Size(112, 112),
cv::INTER_AREA,
cv::BORDER_REPLICATE
);
return std::make_pair(aligned_face, affine_matrix);
}
//std::pair<cv::Mat, cv::Mat> ANSUtilityHelper::AlignFacesSCRFD(
// const cv::Mat& input_mat,
// const std::vector<cv::Point2f>& face_landmark_5)
//{
// // Sanity check
// if (face_landmark_5.size() != 5) {
// throw std::runtime_error(
// "AlignFacesSCRFD: Expected 5 landmarks, got " +
// std::to_string(face_landmark_5.size()));
// }
// // ---------------------------------------------------------------------
// // Precomputed ArcFace template landmarks scaled to 112x112
// // ---------------------------------------------------------------------
// static const std::vector<cv::Point2f> kTemplate112 = []() {
// // ArcFace template landmarks (normalized to [0, 1])
// const std::vector<cv::Point2f> face_template = {
// {0.34191607f, 0.46157411f}, // left eye
// {0.65653393f, 0.45983393f}, // right eye
// {0.50022500f, 0.64050536f}, // nose tip
// {0.37097589f, 0.82469196f}, // left mouth
// {0.63151696f, 0.82325089f} // right mouth
// };
// std::vector<cv::Point2f> normed_template;
// normed_template.reserve(face_template.size());
// for (const auto& pt : face_template) {
// normed_template.emplace_back(pt.x * 112.0f, pt.y * 112.0f);
// }
// return normed_template;
// }();
// // ---------------------------------------------------------------------
// // Use 3 keypoints (eyes + mouth center) to compute affine transform
// // ---------------------------------------------------------------------
// // input landmarks: [Leye, Reye, Nose, Lmouth, Rmouth]
// std::vector<cv::Point2f> src(3), dst(3);
// // source: eyes + mouth center
// src[0] = face_landmark_5[0]; // left eye
// src[1] = face_landmark_5[1]; // right eye
// src[2] = (face_landmark_5[3] + face_landmark_5[4]) * 0.5f; // mouth center
// // destination: corresponding template points
// dst[0] = kTemplate112[0]; // left eye
// dst[1] = kTemplate112[1]; // right eye
// dst[2] = (kTemplate112[3] + kTemplate112[4]) * 0.5f; // mouth center
// cv::Mat affine_matrix = cv::getAffineTransform(src, dst);
// if (affine_matrix.empty()) {
// throw std::runtime_error("AlignFacesSCRFD: Failed to compute affine matrix");
// }
// // ---------------------------------------------------------------------
// // Apply affine warp to align face to 112x112
// // ---------------------------------------------------------------------
// cv::Mat aligned_face;
// cv::warpAffine(
// input_mat,
// aligned_face,
// affine_matrix,
// cv::Size(112, 112),
// cv::INTER_AREA,
// cv::BORDER_REPLICATE
// );
// return std::make_pair(aligned_face, affine_matrix);
//}
//std::pair<cv::Mat, cv::Mat> ANSUtilityHelper::AlignFacesSCRFD(const cv::Mat& input_mat,const std::vector<cv::Point2f>& face_landmark_5)
//{
// // Sanity check
// if (face_landmark_5.size() != 5) {
// throw std::runtime_error("AlignFacesSCRFD: Expected 5 landmarks, got " + std::to_string(face_landmark_5.size()));
// }
// // ArcFace template landmarks (normalized to [0, 1])
// const std::vector<cv::Point2f> face_template = {
// cv::Point2f(0.34191607f, 0.46157411f), // left eye
// cv::Point2f(0.65653393f, 0.45983393f), // right eye
// cv::Point2f(0.50022500f, 0.64050536f), // nose tip
// cv::Point2f(0.37097589f, 0.82469196f), // left mouth
// cv::Point2f(0.63151696f, 0.82325089f) // right mouth
// };
// // Scale template to 112x112 output image
// std::vector<cv::Point2f> normed_template;
// for (const auto& pt : face_template) {
// normed_template.emplace_back(pt.x * 112.0f, pt.y * 112.0f);
// }
// // Estimate affine transform from input landmarks to template
// cv::Mat inliers;
// cv::Mat affine_matrix = cv::estimateAffinePartial2D(
// face_landmark_5,
// normed_template,
// inliers,
// cv::RANSAC,
// 100.0
// );
// if (affine_matrix.empty()) {
// throw std::runtime_error("AlignFacesSCRFD: Failed to estimate affine transformation");
// }
// // Apply affine warp to align face
// cv::Mat aligned_face;
// cv::warpAffine(
// input_mat,
// aligned_face,
// affine_matrix,
// cv::Size(112, 112),
// cv::INTER_AREA,
// cv::BORDER_REPLICATE
// );
// return std::make_pair(aligned_face, affine_matrix);
//}
bool ANSUtilityHelper::ModelOptimizer(std::string modelZipFilePath, std::string modelFileZipPassword, int fp16, std::string& optimisedModelFolder, int inputImageHeight, int inputImageWidth) {
try {
bool _fp16 = false;
if (fp16 == 1) _fp16 = true;
//A. Unzip model folder
std::string _modelFolder = "";
_modelFolder.clear();
// 0. Check if the modelZipFilePath exist?
if (!FileExist(modelZipFilePath)) {
return false;
}
// 1. Unzip model zip file to a special location with folder name as model file (and version)
std::string outputFolder;
std::vector<std::string> passwordArray;
if (!modelFileZipPassword.empty()) passwordArray.push_back(modelFileZipPassword);
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
passwordArray.push_back("AnsCustomModels20@$");
passwordArray.push_back("AnsDemoModels20@!");
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
size_t vectorSize = passwordArray.size();
for (size_t i = 0; i < vectorSize; i++) {
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
break; // Break the loop when the condition is met.
}
// 2. Check if the outputFolder exist
if (!std::filesystem::exists(_modelFolder)) {
return false; // That means the model file is not exist or the password is not correct
}
// 3. Create model file path
std::string _modelFilePath;
std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
}
else {
return false;
}
optimisedModelFolder = _modelFolder;
// Specify options for GPU inference
ANSCENTER::Options options;
// This particular model only supports a fixed batch size of 1
options.optBatchSize = 1;
options.maxBatchSize = 1;
options.maxInputHeight = inputImageHeight;
options.minInputHeight = inputImageHeight;
options.optInputHeight = inputImageHeight;
options.maxInputWidth = inputImageWidth;
options.minInputWidth = inputImageWidth;
options.optInputWidth = inputImageWidth;
options.engineFileDir = _modelFolder;
// Use FP16 precision to speed up inference
if (_fp16) {
options.precision = ANSCENTER::Precision::FP16;
}
else {
options.precision = ANSCENTER::Precision::FP32;
}
const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
const bool NORMALIZE = true;
// Create our TensorRT inference engine
std::unique_ptr<Engine<float>> m_trtEngine = std::make_unique<Engine<float>>(options);
// Build the onnx model into a TensorRT engine file
auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
const std::string errMsg =
"Error: Unable to build the TensorRT engine. "
"Try increasing TensorRT log severity to kVERBOSE.";
return false;
}
return true;
}
catch (std::exception& e) {
std::cout << "TENSORRTOD::OptimizeModel" << e.what();;
return false;
}
}
cv::Rect ANSUtilityHelper::BoundingBoxFromContour(std::vector<cv::Point> contour) {
if (contour.size() <= 0) {
cv::Rect empty;
return empty;
}
if (cv::contourArea(contour) < 1000) {
cv::Rect empty;
return empty;
}
/*
NOTE:
- Finding exremas.
*/
cv::Point minp(contour[0].x, contour[0].y),
maxp(contour[0].x, contour[0].y);
for (unsigned int i = 0; i < contour.size(); i++) {
/*
NOTE:
- Updating local minima.
*/
if (contour[i].x < minp.x) {
minp.x = contour[i].x;
}
if (contour[i].y < minp.y) {
minp.y = contour[i].y;
}
/*
NOTE:
- Updating local maxima.
*/
if (contour[i].x > maxp.x) {
maxp.x = contour[i].x;
}
if (contour[i].y > maxp.y) {
maxp.y = contour[i].y;
}
}
/*
NOTE:
- Creating a box and returning the box based on the extremas found.
*/
cv::Rect box(minp, maxp);
return box;
}
std::string ANSUtilityHelper::PolygonToString(const std::vector<cv::Point2f>& polygon) {
if (polygon.empty()) {
return "";
}
std::string result;
result.reserve(polygon.size() * 20);
char buffer[64];
for (size_t i = 0; i < polygon.size(); ++i) {
if (i > 0) {
snprintf(buffer, sizeof(buffer), ";%.3f;%.3f", polygon[i].x, polygon[i].y);
}
else {
snprintf(buffer, sizeof(buffer), "%.3f;%.3f", polygon[i].x, polygon[i].y);
}
result += buffer;
}
return result;
}
std::string ANSUtilityHelper::KeypointsToString(const std::vector<float>& kps) {
if (kps.empty()) {
return "";
}
std::string result;
result.reserve(kps.size() * 10);
char buffer[32];
for (size_t i = 0; i < kps.size(); ++i) {
if (i > 0) result += ';';
snprintf(buffer, sizeof(buffer), "%.3f", kps[i]);
result += buffer;
}
return result;
}
//std::string ANSUtilityHelper::PolygonToString(const std::vector<cv::Point2f>& polygon) {
// if (polygon.empty()) {
// return ""; // Return empty string if kps is empty
// }
// std::ostringstream oss;
// for (const auto& point : polygon) {
// oss << point.x << ";" << point.y << ";";
// }
// std::string result = oss.str();
// // Remove the trailing semicolon
// if (!result.empty()) {
// result.pop_back();
// }
// return result;
//}
//std::string ANSUtilityHelper::KeypointsToString(const std::vector<float>& kps) {
// if (kps.empty()) {
// return ""; // Return empty string if kps is empty
// }
// std::ostringstream oss;
// for (const auto& kp : kps) {
// oss << kp << ";";
// }
// std::string result = oss.str();
// // Remove the trailing semicolon
// if (!result.empty()) {
// result.pop_back();
// }
// return result;
//}
std::vector<cv::Point2f> ANSUtilityHelper::RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight) {
if (imageWidth <= 0 || imageHeight <= 0) {
return {};
}
// Normalized [0,1] corners of the rectangle (closed polygon: first == last)
std::vector<cv::Point2f> polygon = {
{ rect.x / imageWidth, rect.y / imageHeight }, // Top-left
{ (rect.x + rect.width) / imageWidth, rect.y / imageHeight }, // Top-right
{ (rect.x + rect.width) / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-right
{ rect.x / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-left
{ rect.x / imageWidth, rect.y / imageHeight } // Close
};
return polygon;
}
std::vector<cv::Point2f> ANSUtilityHelper::MaskToNormalizedPolygon(
const cv::Mat& binaryMask, const cv::Rect& boundingBox,
float imageWidth, float imageHeight,
float simplificationEpsilon, int minContourArea, int maxPoints)
{
std::vector<cv::Point2f> polygon;
try {
if (binaryMask.empty() || imageWidth <= 0 || imageHeight <= 0)
return polygon;
// The mask is already cropped to bounding box size
cv::Mat mask8u;
if (binaryMask.type() != CV_8UC1)
binaryMask.convertTo(mask8u, CV_8UC1, 255.0);
else
mask8u = binaryMask;
// Find contours
std::vector<std::vector<cv::Point>> contours;
cv::findContours(mask8u.clone(), contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
if (contours.empty()) return polygon;
// Find largest contour
int largestIdx = 0;
double largestArea = 0.0;
for (size_t i = 0; i < contours.size(); ++i) {
double area = cv::contourArea(contours[i]);
if (area > largestArea && area >= minContourArea) {
largestArea = area;
largestIdx = static_cast<int>(i);
}
}
if (largestArea < minContourArea) return polygon;
// Simplify contour with approxPolyDP
std::vector<cv::Point> simplified;
cv::approxPolyDP(contours[largestIdx], simplified, simplificationEpsilon, true);
if (simplified.size() < 3) return polygon;
// If still too many points after simplification, progressively increase
// epsilon until we fit within maxPoints, then uniformly downsample as fallback
if (maxPoints > 0 && static_cast<int>(simplified.size()) > maxPoints) {
// Try increasing epsilon to reduce points naturally (preserves shape better)
double eps = simplificationEpsilon;
for (int attempt = 0; attempt < 10 && static_cast<int>(simplified.size()) > maxPoints; ++attempt) {
eps *= 1.5;
cv::approxPolyDP(contours[largestIdx], simplified, eps, true);
}
// Fallback: uniformly sample maxPoints from the contour
if (static_cast<int>(simplified.size()) > maxPoints) {
std::vector<cv::Point> downsampled;
downsampled.reserve(maxPoints);
const int n = static_cast<int>(simplified.size());
for (int i = 0; i < maxPoints; ++i) {
int idx = static_cast<int>(std::round(static_cast<double>(i) * (n - 1) / (maxPoints - 1)));
downsampled.push_back(simplified[idx]);
}
simplified = std::move(downsampled);
}
}
// Convert to normalized coordinates:
// mask pixel (mx, my) -> image pixel (boundingBox.x + mx, boundingBox.y + my) -> normalized
polygon.reserve(simplified.size() + 1); // +1 for closing point
for (const auto& pt : simplified) {
float nx = static_cast<float>(pt.x + boundingBox.x) / imageWidth;
float ny = static_cast<float>(pt.y + boundingBox.y) / imageHeight;
polygon.emplace_back(
std::clamp(nx, 0.0f, 1.0f),
std::clamp(ny, 0.0f, 1.0f));
}
// Close the polygon: append first point at end so first == last
if (polygon.size() >= 3) {
polygon.push_back(polygon.front());
}
}
catch (...) {
polygon.clear();
}
return polygon;
}
float ANSUtilityHelper::calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2) {
int xx1 = std::max(box1.x, box2.x);
int yy1 = std::max(box1.y, box2.y);
int xx2 = std::min(box1.x + box1.width, box2.x + box2.width);
int yy2 = std::min(box1.y + box1.height, box2.y + box2.height);
int width = std::max(0, xx2 - xx1);
int height = std::max(0, yy2 - yy1);
return static_cast<float>(width * height);
}
float ANSUtilityHelper::calculate_bbox_iou(const Object& pred1, const Object& pred2) {
float area1 = pred1.box.width * pred1.box.height;
float area2 = pred2.box.width * pred2.box.height;
float intersect = calculate_intersection_area(pred1.box, pred2.box);
return intersect / (area1 + area2 - intersect);
}
float ANSUtilityHelper::calculate_bbox_ios(const Object& pred1, const Object& pred2) {
float area1 = pred1.box.width * pred1.box.height;
float area2 = pred2.box.width * pred2.box.height;
float intersect = calculate_intersection_area(pred1.box, pred2.box);
float smaller_area = std::min(area1, area2);
return intersect / smaller_area;
}
bool ANSUtilityHelper::has_match(const Object& pred1, const Object& pred2, const std::string& match_type, float match_threshold) {
bool threshold_condition = false;
if (match_type == "IOU") {
threshold_condition = calculate_bbox_iou(pred1, pred2) > match_threshold;
}
else if (match_type == "IOS") {
threshold_condition = calculate_bbox_ios(pred1, pred2) > match_threshold;
}
else {// Default
threshold_condition = calculate_bbox_iou(pred1, pred2) > match_threshold;
}
return threshold_condition;
}
float ANSUtilityHelper::get_merged_score(const Object& pred1, const Object& pred2) {
return std::max(pred1.confidence, pred2.confidence);
}
cv::Rect ANSUtilityHelper::calculate_box_union(const cv::Rect& box1, const cv::Rect& box2) {
int x1 = std::min(box1.x, box2.x);
int y1 = std::min(box1.y, box2.y);
int x2 = std::max(box1.x + box1.width, box2.x + box2.width);
int y2 = std::max(box1.y + box1.height, box2.y + box2.height);
return cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2));
}
cv::Rect ANSUtilityHelper::get_merged_bbox(const Object& pred1, const Object& pred2) {
return calculate_box_union(pred1.box, pred2.box);
}
Object ANSUtilityHelper::merge_object_pair(const Object& obj1, const Object& obj2) {
cv::Rect merged_bbox = get_merged_bbox(obj1, obj2);
float merged_score = get_merged_score(obj1, obj2);
int merged_class_id = get_merged_class_id(obj1, obj2);
std::string merged_class_name = get_merged_category(obj1, obj2);
Object merged_obj;
merged_obj.box = merged_bbox;
merged_obj.confidence = merged_score;
merged_obj.classId = merged_class_id;
merged_obj.className = merged_class_name;
if (obj1.confidence > obj2.confidence) {
merged_obj.trackId = obj1.trackId;
merged_obj.mask = obj1.mask;
merged_obj.polygon = obj1.polygon;
merged_obj.kps = obj1.kps;
merged_obj.extraInfo = obj1.extraInfo;
}
else {
merged_obj.trackId = obj2.trackId;
merged_obj.mask = obj2.mask;
merged_obj.polygon = obj2.polygon;
merged_obj.kps = obj2.kps;
merged_obj.extraInfo = obj2.extraInfo;
}
return merged_obj;
}
std::vector<Object> ANSUtilityHelper::select_object_predictions(const std::vector<Object>& object_prediction_list,
const std::map<int, std::vector<int>>& keep_to_merge_list,
const std::string& match_metric,
float match_threshold
)
{
std::vector<Object> selected_object_predictions;
// Create a modifiable copy of object_prediction_list
std::vector<Object> modifiable_predictions = object_prediction_list;
for (const auto& [keep_ind, merge_ind_list] : keep_to_merge_list) {
for (int merge_ind : merge_ind_list) {
if (has_match(modifiable_predictions[keep_ind], modifiable_predictions[merge_ind], match_metric, match_threshold)) {
modifiable_predictions[keep_ind] = merge_object_pair(modifiable_predictions[keep_ind], modifiable_predictions[merge_ind]);
}
}
selected_object_predictions.push_back(modifiable_predictions[keep_ind]);
}
return selected_object_predictions;
}
std::map<int, std::vector<int>> ANSUtilityHelper::Greedy_NMM(const std::vector<Object>& object_predictions, const std::string& match_metric, float match_threshold) {
std::map<int, std::vector<int>> keep_to_merge_list;
std::vector<float> areas(object_predictions.size());
// Calculate the area of each bounding box
for (size_t i = 0; i < object_predictions.size(); ++i) {
areas[i] = static_cast<float>(object_predictions[i].box.width) * object_predictions[i].box.height;
}
// Sort indices based on confidence scores in ascending order
std::vector<int> order(object_predictions.size());
iota(order.begin(), order.end(), 0); // Fill order with 0, 1, ..., n-1
sort(order.begin(), order.end(), [&object_predictions](int a, int b) { return object_predictions[a].confidence < object_predictions[b].confidence; });
while (!order.empty()) {
// Index of the prediction with the highest score
int idx = order.back();
order.pop_back();
// Sanity check
if (order.empty()) {
keep_to_merge_list[idx] = {};
break;
}
// Create a list of indices for matched and unmatched boxes
std::vector<int> matched_indices;
std::vector<int> unmatched_indices;
for (int other_idx : order) {
// Calculate intersection area between `idx` and `other_idx`
int xx1 = std::max(object_predictions[idx].box.x, object_predictions[other_idx].box.x);
int yy1 = std::max(object_predictions[idx].box.y, object_predictions[other_idx].box.y);
int xx2 = std::min(object_predictions[idx].box.x + object_predictions[idx].box.width, object_predictions[other_idx].box.x + object_predictions[other_idx].box.width);
int yy2 = std::min(object_predictions[idx].box.y + object_predictions[idx].box.height, object_predictions[other_idx].box.y + object_predictions[other_idx].box.height);
int w = std::max(0, xx2 - xx1);
int h = std::max(0, yy2 - yy1);
float inter = static_cast<float>(w * h);
float match_metric_value;
if (match_metric == "IOU") {
// Union for IoU
float union_area = areas[idx] + areas[other_idx] - inter;
match_metric_value = inter / union_area;
}
else if (match_metric == "IOS") {
// Smaller area for IoS
float smaller_area = std::min(areas[idx], areas[other_idx]);
match_metric_value = inter / smaller_area;
}
else {// Default
// Union for IoU
float union_area = areas[idx] + areas[other_idx] - inter;
match_metric_value = inter / union_area;
}
// Check if the match metric value exceeds the threshold
if (match_metric_value >= match_threshold) {
matched_indices.push_back(other_idx);
}
else {
unmatched_indices.push_back(other_idx);
}
}
// Update `order` to contain only unmatched indices, sorted by confidence scores
sort(unmatched_indices.begin(), unmatched_indices.end(), [&object_predictions](int a, int b) { return object_predictions[a].confidence < object_predictions[b].confidence; });
order = unmatched_indices;
// Add mapping for the current `idx`
keep_to_merge_list[idx] = matched_indices;
}
return keep_to_merge_list;
}
std::vector<Object> ANSUtilityHelper::ApplyNMS(const std::vector<Object>& objects, float nmsThreshold) {
std::vector<Object> finalObjects;
std::map<int, std::vector<int>> keep_to_merge_list;
keep_to_merge_list = Greedy_NMM(objects, "IOU", nmsThreshold);
finalObjects = select_object_predictions(objects, keep_to_merge_list, "IOU", nmsThreshold);
return finalObjects;
}
std::vector<cv::Rect> ANSUtilityHelper::GetPatches(cv::Mat& image, int tileWidth, int tileHeight, double overlap) {
std::vector<cv::Rect> patches;
int overLapWidth = static_cast<int>(tileWidth * overlap);
int overLapHeight = static_cast<int>(tileHeight * overlap);
int stepX = tileWidth - overLapWidth;
int stepY = tileHeight - overLapHeight;
for (int y = 0; y < image.rows; y += stepY) {
for (int x = 0; x < image.cols; x += stepX) {
int width = std::min(tileWidth, image.cols - x);
int height = std::min(tileHeight, image.rows - y);
patches.push_back(cv::Rect(x, y, width, height));
}
}
return patches;
}
std::vector<cv::Mat> ANSUtilityHelper::ExtractPatches(cv::Mat& image, std::vector<cv::Rect>& patchRegions) {
std::vector<cv::Mat> patches;
for (const auto& region : patchRegions) {
patches.push_back(image(region).clone());
}
return patches;
}
cv::Mat ANSUtilityHelper::ResizePatch(cv::Mat& patch, int modelWidth, int modelHeight) {
cv::Mat resizedPatch;
double aspectRatio = static_cast<double>(patch.cols) / patch.rows;
int newWidth, newHeight;
if (aspectRatio > 1) { // Width > Height
newWidth = modelWidth;
newHeight = static_cast<int>(modelWidth / aspectRatio);
}
else {
newHeight = modelHeight;
newWidth = static_cast<int>(modelHeight * aspectRatio);
}
cv::resize(patch, resizedPatch, cv::Size(newWidth, newHeight));
return resizedPatch;
}
void ANSUtilityHelper::AdjustBoudingBox(Object& obj, int offsetX, int offsetY) {
obj.box.x += offsetX;
obj.box.y += offsetY;
}
/// MOVEMENT DETECTION FUNCTIONS
// ============================================================================
// Constructor and Destructor
// ============================================================================
MoveDetectsHandler::MoveDetectsHandler()
{
// Initialize any shared resources if needed
}
MoveDetectsHandler::~MoveDetectsHandler()
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
for (auto& [key, cameraData] : cameras)
{
cameraData.clear();
}
cameras.clear();
}
// ============================================================================
// Camera Management
// ============================================================================
bool MoveDetectsHandler::hasCameraData(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
return cameras.find(camera_id) != cameras.end();
}
void MoveDetectsHandler::removeCamera(const std::string& camera_id)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.clear();
cameras.erase(it);
}
}
std::vector<std::string> MoveDetectsHandler::getCameraIds() const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
std::vector<std::string> ids;
ids.reserve(cameras.size());
for (const auto& [id, _] : cameras)
{
ids.push_back(id);
}
return ids;
}
bool MoveDetectsHandler::empty(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
return it == cameras.end() || it->second.control_frames.empty();
}
void MoveDetectsHandler::clear(const std::string& camera_id)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.clear();
}
}
void MoveDetectsHandler::clearAll()
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
for (auto& [key, cameraData] : cameras)
{
cameraData.clear();
}
cameras.clear();
}
// ============================================================================
// Configuration Methods
// ============================================================================
void MoveDetectsHandler::setThreshold(const std::string& camera_id, double threshold)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.psnr_threshold = std::clamp(threshold, 0.0, 100.0);
}
}
void MoveDetectsHandler::setKeyFrameFrequency(const std::string& camera_id, size_t frequency)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.key_frame_frequency = std::max(size_t(1), frequency);
}
}
void MoveDetectsHandler::setNumberOfControlFrames(const std::string& camera_id, size_t count)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.number_of_control_frames = std::clamp(count, size_t(1), size_t(100));
}
}
void MoveDetectsHandler::setThumbnailRatio(const std::string& camera_id, double ratio)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.thumbnail_ratio = std::clamp(ratio, 0.01, 1.0);
it->second.thumbnail_size = cv::Size(0, 0); // Reset to recalculate
}
}
void MoveDetectsHandler::setMaskEnabled(const std::string& camera_id, bool enabled)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.mask_enabled = enabled;
}
}
void MoveDetectsHandler::setContoursEnabled(const std::string& camera_id, bool enabled)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.contours_enabled = enabled;
}
}
void MoveDetectsHandler::setBboxEnabled(const std::string& camera_id, bool enabled)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.bbox_enabled = enabled;
}
}
void MoveDetectsHandler::setContourThickness(const std::string& camera_id, int thickness)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.contours_size = std::max(1, thickness);
}
}
void MoveDetectsHandler::setBboxThickness(const std::string& camera_id, int thickness)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.bbox_size = std::max(1, thickness);
}
}
void MoveDetectsHandler::setMinObjectArea(const std::string& camera_id, double area)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.min_object_area = std::max(0.0, area);
}
}
void MoveDetectsHandler::setMinObjectSize(const std::string& camera_id, int size)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.min_object_dimension = std::max(0, size);
}
}
void MoveDetectsHandler::setMorphologyIterations(const std::string& camera_id, int iterations)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.morphology_iterations = std::clamp(iterations, 1, 50);
}
}
// ============================================================================
// Configuration Getters
// ============================================================================
double MoveDetectsHandler::getThreshold(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.psnr_threshold;
}
return 0.0;
}
size_t MoveDetectsHandler::getKeyFrameFrequency(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.key_frame_frequency;
}
return 0;
}
size_t MoveDetectsHandler::getNumberOfControlFrames(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.number_of_control_frames;
}
return 0;
}
double MoveDetectsHandler::getThumbnailRatio(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.thumbnail_ratio;
}
return 0.0;
}
bool MoveDetectsHandler::isMaskEnabled(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.mask_enabled;
}
return false;
}
bool MoveDetectsHandler::isContoursEnabled(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.contours_enabled;
}
return false;
}
bool MoveDetectsHandler::isBboxEnabled(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.bbox_enabled;
}
return false;
}
// ============================================================================
// State Query Methods
// ============================================================================
bool MoveDetectsHandler::isMovementDetected(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
return it != cameras.end() && it->second.movement_detected;
}
bool MoveDetectsHandler::wasTransitionDetected(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
return it != cameras.end() && it->second.transition_detected;
}
double MoveDetectsHandler::getPSNRScore(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.most_recent_psnr_score;
}
return 0.0;
}
size_t MoveDetectsHandler::getFrameIndexWithMovement(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.frame_index_with_movement;
}
return 0;
}
std::chrono::milliseconds MoveDetectsHandler::getTimeSinceLastMovement(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it == cameras.end() || it->second.frame_index_with_movement == 0)
{
return std::chrono::milliseconds::max();
}
auto now = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(
now - it->second.movement_last_detected
);
}
size_t MoveDetectsHandler::getControlFrameCount(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.control_frames.size();
}
return 0;
}
size_t MoveDetectsHandler::getNextFrameIndex(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.next_frame_index;
}
return 0;
}
cv::Mat MoveDetectsHandler::getOutput(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.output.clone();
}
return cv::Mat();
}
cv::Mat MoveDetectsHandler::getMask(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.mask.clone();
}
return cv::Mat();
}
std::vector<std::vector<cv::Point>> MoveDetectsHandler::getContours(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.contours;
}
return std::vector<std::vector<cv::Point>>();
}
// ============================================================================
// Statistics
// ============================================================================
MoveDetectsHandler::CameraStats MoveDetectsHandler::getStats(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.stats;
}
return CameraStats();
}
void MoveDetectsHandler::resetStats(const std::string& camera_id)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.stats.reset();
}
}
// ============================================================================
// Helper Functions
// ============================================================================
bool MoveDetectsHandler::cameraExists(const std::string& camera_id) const
{
// Note: cameras_mutex should already be locked by caller
return cameras.find(camera_id) != cameras.end();
}
MoveDetectsHandler::CameraData& MoveDetectsHandler::getCameraData(const std::string& camera_id)
{
// Note: cameras_mutex should already be locked by caller
return cameras.try_emplace(camera_id, CameraData{}).first->second;
}
const MoveDetectsHandler::CameraData* MoveDetectsHandler::getCameraDataConst(const std::string& camera_id) const
{
// Note: cameras_mutex should already be locked by caller
auto it = cameras.find(camera_id);
if (it == cameras.end())
{
return nullptr;
}
return &it->second;
}
// ============================================================================
// Core Algorithm Functions
// ============================================================================
double MoveDetectsHandler::psnr(const cv::Mat& src, const cv::Mat& dst)
{
if (src.empty() || dst.empty())
{
std::cerr << "PSNR: Empty image(s)" << std::endl;
return 0.0;
}
if (src.type() != dst.type())
{
std::cerr << "PSNR: Type mismatch - src: " << src.type()
<< " dst: " << dst.type() << std::endl;
return 0.0;
}
if (src.size() != dst.size())
{
std::cerr << "PSNR: Size mismatch - src: " << src.size()
<< " dst: " << dst.size() << std::endl;
return 0.0;
}
cv::Mat s1;
cv::absdiff(src, dst, s1);
s1.convertTo(s1, CV_32F);
s1 = s1.mul(s1);
cv::Scalar s = cv::sum(s1);
double sse = s.val[0] + s.val[1] + s.val[2];
if (sse <= 1e-10)
{
// Identical images - return very high PSNR
return 100.0; // Use a high but finite value instead of infinity
}
const double mse = sse / static_cast<double>(src.channels() * src.total());
const double psnr_value = 10.0 * std::log10((255.0 * 255.0) / mse);
// Sanity check
if (std::isnan(psnr_value) || std::isinf(psnr_value))
{
std::cerr << "PSNR: Invalid result (NaN or Inf), MSE=" << mse << std::endl;
return 0.0;
}
return psnr_value;
}
cv::Mat MoveDetectsHandler::simple_colour_balance(const cv::Mat& src)
{
if (src.empty() || src.channels() != 3)
{
return src.clone();
}
const float full_percent = 1.0f;
const float half_percent = full_percent / 200.0f;
std::vector<cv::Mat> tmpsplit;
cv::split(src, tmpsplit);
for (size_t idx = 0; idx < 3; idx++)
{
cv::Mat flat;
tmpsplit[idx].reshape(1, 1).copyTo(flat);
cv::sort(flat, flat, cv::SORT_EVERY_ROW + cv::SORT_ASCENDING);
const int lo = flat.at<uchar>(cvFloor(static_cast<float>(flat.cols) * half_percent));
const int hi = flat.at<uchar>(cvCeil(static_cast<float>(flat.cols) * (1.0f - half_percent)));
tmpsplit[idx].setTo(lo, tmpsplit[idx] < lo);
tmpsplit[idx].setTo(hi, tmpsplit[idx] > hi);
cv::normalize(tmpsplit[idx], tmpsplit[idx], 0, 255, cv::NORM_MINMAX);
}
cv::Mat output;
cv::merge(tmpsplit, output);
return output;
}
cv::Rect MoveDetectsHandler::BoundingBoxFromContour(const std::vector<cv::Point>& contour)
{
if (contour.empty())
{
return cv::Rect();
}
if (cv::contourArea(contour) < 1000)
{
return cv::Rect();
}
return cv::boundingRect(contour);
}
cv::Mat MoveDetectsHandler::computeMovementMask(const cv::Mat& control_frame,
const cv::Mat& current_frame,
const cv::Size& output_size,
int morphology_iterations)
{
if (control_frame.empty() || current_frame.empty())
{
std::cerr << "computeMovementMask: Empty input frame(s)" << std::endl;
return cv::Mat();
}
// Compute difference
cv::Mat differences;
cv::absdiff(control_frame, current_frame, differences);
// Resize to output size
cv::Mat differences_resized;
cv::resize(differences, differences_resized, output_size, 0, 0, cv::INTER_CUBIC);
// Convert to grayscale
cv::Mat greyscale;
cv::cvtColor(differences_resized, greyscale, cv::COLOR_BGR2GRAY);
// NEW: Apply Gaussian blur to reduce noise BEFORE thresholding
cv::Mat blurred;
cv::GaussianBlur(greyscale, blurred, cv::Size(5, 5), 0);
// NEW: Use adaptive threshold instead of Otsu for better noise handling
cv::Mat threshold;
// Try adaptive threshold first
cv::adaptiveThreshold(blurred, threshold, 255,
cv::ADAPTIVE_THRESH_GAUSSIAN_C,
cv::THRESH_BINARY, 11, 2);
// Check if adaptive threshold produced too much noise
int non_zero_adaptive = cv::countNonZero(threshold);
double adaptive_percent = (100.0 * non_zero_adaptive) / threshold.total();
std::cout << "Adaptive threshold: " << adaptive_percent << "% of frame" << std::endl;
// If adaptive produces too much noise, fall back to Otsu with higher threshold
if (adaptive_percent > 25.0)
{
std::cout << "Adaptive too noisy, using Otsu..." << std::endl;
cv::threshold(blurred, threshold, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU);
// Check Otsu result
int non_zero_otsu = cv::countNonZero(threshold);
double otsu_percent = (100.0 * non_zero_otsu) / threshold.total();
std::cout << "Otsu threshold: " << otsu_percent << "% of frame" << std::endl;
// If still too noisy, use fixed high threshold
if (otsu_percent > 25.0)
{
std::cout << "Still too noisy, using fixed threshold 50" << std::endl;
cv::threshold(blurred, threshold, 50, 255.0, cv::THRESH_BINARY);
}
}
// NEW: More aggressive morphology
cv::Mat element = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(5, 5));
// Close small gaps first
cv::Mat closed;
cv::morphologyEx(threshold, closed, cv::MORPH_CLOSE, element, cv::Point(-1, -1), 2);
// Then dilate and erode
cv::Mat dilated, eroded;
cv::dilate(closed, dilated, element, cv::Point(-1, -1), morphology_iterations);
cv::erode(dilated, eroded, element, cv::Point(-1, -1), morphology_iterations);
// NEW: Open to remove small noise
cv::Mat opened;
cv::morphologyEx(eroded, opened, cv::MORPH_OPEN, element, cv::Point(-1, -1), 3);
// Final check
int final_pixels = cv::countNonZero(opened);
double final_percent = (100.0 * final_pixels) / opened.total();
std::cout << "Final mask: " << final_percent << "% of frame" << std::endl;
return opened;
}
std::vector<Object> MoveDetectsHandler::extractObjectsFromMask(const cv::Mat& mask,
const cv::Mat& image,
CameraData& camera,
const std::string& camera_id)
{
std::vector<Object> outputObjects;
if (mask.empty())
{
return outputObjects;
}
std::vector<cv::Vec4i> hierarchy;
cv::findContours(mask, camera.contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
for (const auto& contour : camera.contours)
{
double area = cv::contourArea(contour);
if (area < camera.min_object_area)
{
continue;
}
cv::Rect boundingRect = cv::boundingRect(contour);
// Clamp to image bounds
boundingRect &= cv::Rect(0, 0, image.cols, image.rows);
if (boundingRect.width < camera.min_object_dimension ||
boundingRect.height < camera.min_object_dimension ||
boundingRect.area() < camera.min_object_total_size)
{
continue;
}
// Draw visualizations
if (camera.contours_enabled)
{
cv::polylines(camera.output, contour, true, cv::Scalar(0, 0, 255),
camera.contours_size, camera.line_type);
}
if (camera.bbox_enabled)
{
cv::rectangle(camera.output, boundingRect, cv::Scalar(0, 255, 0),
camera.bbox_size, camera.line_type);
}
Object obj;
obj.classId = 0;
obj.trackId = 0;
obj.className = "movement";
obj.confidence = 1.0f;
obj.box = boundingRect;
obj.mask = image(boundingRect).clone();
obj.cameraId = camera_id;
// Create polygon from contour (simplified to bounding box corners)
obj.polygon = {
cv::Point2f(static_cast<float>(boundingRect.x), static_cast<float>(boundingRect.y)),
cv::Point2f(static_cast<float>(boundingRect.x + boundingRect.width), static_cast<float>(boundingRect.y)),
cv::Point2f(static_cast<float>(boundingRect.x + boundingRect.width), static_cast<float>(boundingRect.y + boundingRect.height)),
cv::Point2f(static_cast<float>(boundingRect.x), static_cast<float>(boundingRect.y + boundingRect.height))
};
if ((boundingRect.width >= 25) &&
(boundingRect.height >= 25))outputObjects.push_back(obj);
}
return outputObjects;
}
void MoveDetectsHandler::updateControlFrames(CameraData& camera, size_t frame_index,
const cv::Mat& thumbnail)
{
if (thumbnail.empty())
{
std::cerr << "updateControlFrames: Empty thumbnail!" << std::endl;
return;
}
if (frame_index >= camera.next_key_frame ||
camera.control_frames.size() < camera.number_of_control_frames)
{
// Store a deep copy
cv::Mat thumbnail_copy = thumbnail.clone();
// Ensure continuous memory
if (!thumbnail_copy.isContinuous())
{
thumbnail_copy = thumbnail_copy.clone();
}
camera.control_frames[frame_index] = thumbnail_copy;
// Remove oldest frames if exceeding limit
while (camera.control_frames.size() > camera.number_of_control_frames)
{
auto oldest = camera.control_frames.begin();
oldest->second.release(); // Explicitly release memory
camera.control_frames.erase(oldest);
}
camera.next_key_frame = frame_index + camera.key_frame_frequency;
}
}
void MoveDetectsHandler::updateStatistics(CameraData& camera, double psnr_value,
bool movement_detected,
std::chrono::milliseconds processing_time)
{
camera.stats.total_frames_processed++;
if (movement_detected)
{
camera.stats.frames_with_movement++;
camera.stats.last_movement_time = std::chrono::high_resolution_clock::now();
}
camera.stats.control_frames_count = camera.control_frames.size();
// Update PSNR statistics (skip infinity values from identical frames)
if (!std::isinf(psnr_value))
{
camera.stats.min_psnr = std::min(camera.stats.min_psnr, psnr_value);
camera.stats.max_psnr = std::max(camera.stats.max_psnr, psnr_value);
// Running average
double n = static_cast<double>(camera.stats.total_frames_processed);
camera.stats.average_psnr =
(camera.stats.average_psnr * (n - 1.0) + psnr_value) / n;
}
camera.stats.total_processing_time += processing_time;
}
//void MoveDetectsHandler::updateStatistics(CameraData& camera, double psnr_value,
// bool movement_detected,
// std::chrono::milliseconds processing_time)
//{
// camera.stats.total_frames_processed++;
// if (movement_detected)
// {
// camera.stats.frames_with_movement++;
// camera.stats.last_movement_time = std::chrono::high_resolution_clock::now();
// }
// camera.stats.control_frames_count = camera.control_frames.size();
// // Update PSNR statistics (skip infinity values from identical frames)
// if (!std::isinf(psnr_value))
// {
// camera.stats.min_psnr = std::min(camera.stats.min_psnr, psnr_value);
// camera.stats.max_psnr = std::max(camera.stats.max_psnr, psnr_value);
// // Running average
// double n = static_cast<double>(camera.stats.total_frames_processed);
// camera.stats.average_psnr =
// (camera.stats.average_psnr * (n - 1.0) + psnr_value) / n;
// }
// // ========================================================================
// // UPDATE TEMPORAL CONSISTENCY STATISTICS - THIS WAS MISSING!
// // ========================================================================
// double n = static_cast<double>(camera.stats.total_frames_processed);
// camera.stats.average_temporal_consistency =
// (camera.stats.average_temporal_consistency * (n - 1.0) +
// camera.last_temporal_consistency_score) / n;
// camera.stats.total_processing_time += processing_time;
//}
// ============================================================================
// Main Detection Methods
// ============================================================================
std::vector<Object> MoveDetectsHandler::MovementDetect(const std::string& camera_id,
cv::Mat& next_image)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
CameraData& camera = getCameraData(camera_id);
return MovementDetect(camera_id, camera.next_frame_index, next_image);
}
std::vector<Object> MoveDetectsHandler::MovementDetect(const std::string& camera_id,
const size_t frame_index,
cv::Mat& image)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
if (image.empty())
{
return std::vector<Object>();
}
// CRITICAL: Ensure consistent format
cv::Mat processed_image;
// Convert to BGR if needed
if (image.channels() == 1)
{
cv::cvtColor(image, processed_image, cv::COLOR_GRAY2BGR);
}
else if (image.channels() == 4)
{
cv::cvtColor(image, processed_image, cv::COLOR_BGRA2BGR);
}
else if (image.channels() == 3)
{
// Already BGR, but ensure it's a continuous clone
processed_image = image.clone();
}
else
{
// Unsupported format
return std::vector<Object>();
}
// Ensure continuous memory layout
if (!processed_image.isContinuous())
{
processed_image = processed_image.clone();
}
CameraData& camera = getCameraData(camera_id);
return MovementDetectInternal(camera_id, frame_index, processed_image, camera);
}
void MoveDetectsHandler::setTemporalConsistency(const std::string& camera_id, bool enabled)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.temporal_consistency_enabled = enabled;
}
}
void MoveDetectsHandler::setMaskOverlapThreshold(const std::string& camera_id, double threshold)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.mask_overlap_threshold = std::clamp(threshold, 0.0, 1.0);
}
}
void MoveDetectsHandler::setTemporalHistorySize(const std::string& camera_id, size_t size)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.temporal_history_size = std::clamp(size, size_t(1), size_t(20));
}
}
void MoveDetectsHandler::setMinConsistentFrames(const std::string& camera_id, size_t frames)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.min_consistent_frames = std::max(size_t(1), frames);
}
}
void MoveDetectsHandler::setLocationStabilityEnabled(const std::string& camera_id, bool enabled)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.location_stability_enabled = enabled;
}
}
void MoveDetectsHandler::setMaxLocationJitter(const std::string& camera_id, double pixels)
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
it->second.max_location_jitter = std::max(0.0, pixels);
}
}
bool MoveDetectsHandler::isTemporalConsistencyEnabled(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.temporal_consistency_enabled;
}
return false;
}
double MoveDetectsHandler::getTemporalConsistencyScore(const std::string& camera_id) const
{
std::lock_guard<std::recursive_mutex> lock(cameras_mutex);
auto it = cameras.find(camera_id);
if (it != cameras.end())
{
return it->second.last_temporal_consistency_score;
}
return 0.0;
}
// ============================================================================
// Temporal Consistency Helper Functions
// ============================================================================
double MoveDetectsHandler::calculateMaskOverlap(const cv::Mat& mask1, const cv::Mat& mask2)
{
if (mask1.empty() || mask2.empty())
{
return 0.0;
}
if (mask1.size() != mask2.size())
{
return 0.0;
}
// Calculate intersection
cv::Mat intersection;
cv::bitwise_and(mask1, mask2, intersection);
int intersection_pixels = cv::countNonZero(intersection);
// Calculate union
cv::Mat union_mask;
cv::bitwise_or(mask1, mask2, union_mask);
int union_pixels = cv::countNonZero(union_mask);
if (union_pixels == 0)
{
return 0.0;
}
// IoU (Intersection over Union)
double overlap = static_cast<double>(intersection_pixels) / static_cast<double>(union_pixels);
return overlap;
}
cv::Point MoveDetectsHandler::calculateMaskCentroid(const cv::Mat& mask)
{
if (mask.empty())
{
return cv::Point(-1, -1);
}
cv::Moments m = cv::moments(mask, true);
if (m.m00 == 0)
{
return cv::Point(-1, -1);
}
cv::Point centroid;
centroid.x = static_cast<int>(m.m10 / m.m00);
centroid.y = static_cast<int>(m.m01 / m.m00);
return centroid;
}
double MoveDetectsHandler::calculateLocationStability(const std::deque<cv::Point>& centroids)
{
if (centroids.size() < 2)
{
return 0.0;
}
// Calculate total movement distance
double total_distance = 0.0;
for (size_t i = 1; i < centroids.size(); i++)
{
if (centroids[i].x < 0 || centroids[i - 1].x < 0)
{
continue; // Skip invalid centroids
}
double dx = centroids[i].x - centroids[i - 1].x;
double dy = centroids[i].y - centroids[i - 1].y;
double distance = std::sqrt(dx * dx + dy * dy);
total_distance += distance;
}
// Average movement per frame
double avg_movement = total_distance / (centroids.size() - 1);
return avg_movement;
}
void MoveDetectsHandler::updateTemporalHistory(CameraData& camera, const cv::Mat& mask)
{
if (mask.empty())
{
return;
}
// Add mask to history
camera.mask_history.push_back(mask.clone());
// Maintain history size limit
while (camera.mask_history.size() > camera.temporal_history_size)
{
camera.mask_history.front().release();
camera.mask_history.pop_front();
}
// Calculate and store centroid
cv::Point centroid = calculateMaskCentroid(mask);
camera.centroid_history.push_back(centroid);
// Maintain centroid history size
while (camera.centroid_history.size() > camera.temporal_history_size)
{
camera.centroid_history.pop_front();
}
}
bool MoveDetectsHandler::checkTemporalConsistency(CameraData& camera, const cv::Mat& current_mask)
{
if (!camera.temporal_consistency_enabled)
{
camera.last_temporal_consistency_score = 1.0;
return true; // Always pass if disabled
}
if (current_mask.empty())
{
camera.last_temporal_consistency_score = 0.0;
return false;
}
// If no history yet, accept the first detection
if (camera.mask_history.empty())
{
camera.last_temporal_consistency_score = 1.0;
return true;
}
// Calculate overlap with recent masks
double total_overlap = 0.0;
int valid_comparisons = 0;
for (const auto& historical_mask : camera.mask_history)
{
if (!historical_mask.empty())
{
double overlap = calculateMaskOverlap(current_mask, historical_mask);
total_overlap += overlap;
valid_comparisons++;
}
}
double avg_overlap = (valid_comparisons > 0) ? (total_overlap / valid_comparisons) : 0.0;
camera.last_temporal_consistency_score = avg_overlap;
std::cout << "Temporal consistency check:" << std::endl;
std::cout << " Average overlap with history: " << (avg_overlap * 100) << "%" << std::endl;
std::cout << " Required threshold: " << (camera.mask_overlap_threshold * 100) << "%" << std::endl;
// Check mask overlap criterion
bool overlap_passed = avg_overlap >= camera.mask_overlap_threshold;
// Check location stability if enabled
bool location_stable = true;
if (camera.location_stability_enabled && camera.centroid_history.size() >= 2)
{
cv::Point current_centroid = calculateMaskCentroid(current_mask);
if (current_centroid.x >= 0)
{
// Check jitter with most recent centroid
const cv::Point& last_centroid = camera.centroid_history.back();
if (last_centroid.x >= 0)
{
double dx = current_centroid.x - last_centroid.x;
double dy = current_centroid.y - last_centroid.y;
double distance = std::sqrt(dx * dx + dy * dy);
location_stable = distance <= camera.max_location_jitter;
std::cout << " Location jitter: " << distance << " pixels (max: "
<< camera.max_location_jitter << ")" << std::endl;
std::cout << " Location stable: " << (location_stable ? "YES" : "NO") << std::endl;
}
}
}
// Both criteria must pass
bool passed = overlap_passed && location_stable;
// Update consistent frame counter
if (passed)
{
camera.consistent_frame_count++;
std::cout << " Consistent frames: " << camera.consistent_frame_count
<< " / " << camera.min_consistent_frames << std::endl;
}
else
{
camera.consistent_frame_count = 0;
std::cout << " Consistency check FAILED - resetting counter" << std::endl;
}
// Movement is only confirmed after min consecutive consistent frames
bool confirmed = camera.consistent_frame_count >= camera.min_consistent_frames;
std::cout << " Movement confirmed: " << (confirmed ? "YES" : "NO") << std::endl;
return confirmed;
}
std::vector<Object> MoveDetectsHandler::MovementDetectInternal(const std::string& camera_id,
const size_t frame_index,
cv::Mat& image,
CameraData& camera)
{
auto start_time = std::chrono::high_resolution_clock::now();
std::vector<Object> outputObjects;
// DEBUG: Log input
std::cout << "\n=== MovementDetect Debug ===" << std::endl;
std::cout << "Camera ID: " << camera_id << std::endl;
std::cout << "Frame Index: " << frame_index << std::endl;
std::cout << "Image size: " << image.cols << "x" << image.rows << std::endl;
std::cout << "Image type: " << image.type() << " (CV_8UC3=" << CV_8UC3 << ")" << std::endl;
std::cout << "Image channels: " << image.channels() << std::endl;
std::cout << "Image empty: " << (image.empty() ? "YES" : "NO") << std::endl;
if (image.empty())
{
std::cout << "ERROR: Empty image!" << std::endl;
return outputObjects;
}
// Ensure image is continuous in memory
if (!image.isContinuous())
{
std::cout << "WARNING: Image is not continuous, cloning..." << std::endl;
image = image.clone();
}
camera.contours.clear();
// Initialize thumbnail size if needed
if (camera.thumbnail_size.area() == 0)
{
camera.thumbnail_ratio = std::clamp(camera.thumbnail_ratio, 0.01, 1.0);
camera.thumbnail_size.width = static_cast<int>(image.cols * camera.thumbnail_ratio);
camera.thumbnail_size.height = static_cast<int>(image.rows * camera.thumbnail_ratio);
camera.thumbnail_size.width = std::max(1, camera.thumbnail_size.width);
camera.thumbnail_size.height = std::max(1, camera.thumbnail_size.height);
std::cout << "Initialized thumbnail size: " << camera.thumbnail_size.width
<< "x" << camera.thumbnail_size.height << std::endl;
}
// Create thumbnail for comparison
cv::Mat thumbnail;
cv::resize(image, thumbnail, camera.thumbnail_size, 0, 0, cv::INTER_AREA);
std::cout << "Thumbnail created: " << thumbnail.cols << "x" << thumbnail.rows
<< " type: " << thumbnail.type() << std::endl;
// DEBUG: Control frames info
std::cout << "Control frames count: " << camera.control_frames.size() << std::endl;
std::cout << "Control frame indices: ";
for (const auto& [idx, frame] : camera.control_frames)
{
std::cout << idx << " ";
}
std::cout << std::endl;
if (camera.control_frames.empty())
{
std::cout << "First frame - initializing control frame" << std::endl;
camera.control_frames[frame_index] = thumbnail.clone();
camera.next_key_frame = frame_index + camera.key_frame_frequency;
camera.next_frame_index = frame_index + 1;
camera.output = image.clone();
camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0));
auto end_time = std::chrono::high_resolution_clock::now();
auto processing_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time
);
updateStatistics(camera, 0.0, false, processing_time);
return outputObjects;
}
bool previous_movement_detected = camera.movement_detected;
camera.movement_detected = false;
std::cout << "Previous movement: " << (previous_movement_detected ? "YES" : "NO") << std::endl;
std::cout << "PSNR Threshold: " << camera.psnr_threshold << std::endl;
// Compare with control frames
for (auto iter = camera.control_frames.rbegin(); iter != camera.control_frames.rend(); ++iter)
{
const cv::Mat& control_image = iter->second;
// DEBUG: Verify control frame
std::cout << "Comparing with control frame " << iter->first
<< " size: " << control_image.cols << "x" << control_image.rows
<< " type: " << control_image.type() << std::endl;
if (control_image.empty())
{
std::cout << "ERROR: Control frame is empty!" << std::endl;
continue;
}
if (control_image.size() != thumbnail.size())
{
std::cout << "ERROR: Size mismatch! Control: " << control_image.size()
<< " Thumbnail: " << thumbnail.size() << std::endl;
continue;
}
if (control_image.type() != thumbnail.type())
{
std::cout << "ERROR: Type mismatch! Control: " << control_image.type()
<< " Thumbnail: " << thumbnail.type() << std::endl;
continue;
}
camera.most_recent_psnr_score = psnr(control_image, thumbnail);
std::cout << "PSNR Score: " << std::fixed << std::setprecision(2)
<< camera.most_recent_psnr_score << std::endl;
if (std::isinf(camera.most_recent_psnr_score))
{
std::cout << "PSNR is infinity (identical frames), skipping..." << std::endl;
continue;
}
if (camera.most_recent_psnr_score < camera.psnr_threshold)
{
std::cout << "*** MOVEMENT DETECTED *** (PSNR < threshold)" << std::endl;
camera.movement_detected = true;
camera.movement_last_detected = std::chrono::high_resolution_clock::now();
camera.frame_index_with_movement = frame_index;
camera.mask = computeMovementMask(control_image, thumbnail, image.size(),
camera.morphology_iterations);
std::cout << "Mask created: " << camera.mask.cols << "x" << camera.mask.rows
<< " non-zero pixels: " << cv::countNonZero(camera.mask) << std::endl;
break;
}
else
{
std::cout << "No movement (PSNR >= threshold)" << std::endl;
}
}
camera.transition_detected = (previous_movement_detected != camera.movement_detected);
std::cout << "Current movement: " << (camera.movement_detected ? "YES" : "NO") << std::endl;
std::cout << "Transition detected: " << (camera.transition_detected ? "YES" : "NO") << std::endl;
if (camera.mask.empty() || (camera.transition_detected && !camera.movement_detected))
{
std::cout << "Resetting mask to zeros" << std::endl;
camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0));
}
camera.output = image.clone();
if (!camera.mask.empty() && camera.movement_detected)
{
outputObjects = extractObjectsFromMask(camera.mask, image, camera, camera_id);
std::cout << "Objects extracted: " << outputObjects.size() << std::endl;
}
// Update control frames
if (frame_index >= camera.next_key_frame ||
camera.control_frames.size() < camera.number_of_control_frames)
{
std::cout << "Adding new control frame at index " << frame_index << std::endl;
updateControlFrames(camera, frame_index, thumbnail);
std::cout << "Next key frame will be at: " << camera.next_key_frame << std::endl;
}
camera.next_frame_index = frame_index + 1;
auto end_time = std::chrono::high_resolution_clock::now();
auto processing_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time
);
updateStatistics(camera, camera.most_recent_psnr_score, camera.movement_detected,
processing_time);
std::cout << "Processing time: " << processing_time.count() << "ms" << std::endl;
std::cout << "=== End Debug ===" << std::endl;
return outputObjects;
}
} // namespace ANSCENTER