#pragma once #include "ANSODEngine.h" #include "ANSYOLOOD.h" #include "ANSTENSORRTOD.h" #include "ANSTENSORRTCL.h" #include "ANSOPENVINOCL.h" #include "ANSOPENVINOOD.h" #include "ANSYOLOV10RTOD.h" #include "ANSYOLOV10OVOD.h" #include "ANSCUSTOMDetector.h" #include "ANSFD.h" #include "ANSANOMALIB.h" #include "ANSPOSE.h" #include "ANSSAM.h" #include #include #include "utils/visualizer.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //ANSENGINE_API void* AllocMemory(size_t size); //ANSENGINE_API void FreeMemory_(void* ptr); //#define FreeMemory(ptr) (FreeMemory_(*(ptr)), *(ptr)=0); static const float lmheight = 112.; static const float lmwidth = 96.; // reference landmarks points in the unit square [0,1]x[0,1] static const float ref_landmarks_normalized[] = { 30.2946f / lmwidth, 51.6963f / lmheight, 65.5318f / lmwidth, 51.5014f / lmheight, 48.0252f / lmwidth, 71.7366f / lmheight, 33.5493f / lmwidth, 92.3655f / lmheight, 62.7299f / lmwidth, 92.2041f / lmheight }; namespace ANSCENTER { template T GetData(const boost::property_tree::ptree& pt, const std::string& key) { T ret; if (boost::optional data = pt.get_optional(key)) { ret = data.get(); } return ret; } // ANNHUB void ANNHUBClassifier::ReLu(std::vector& iVal, std::vector& oVal) { int dim = iVal.size(); oVal.resize(dim); for (int i = 0; i < dim; i++) { if (iVal[i] >= 0) oVal[i] = iVal[i]; else oVal[i] = 0; } } void ANNHUBClassifier::LogSig(std::vector& iVal, std::vector& oVal) { int dim = iVal.size(); oVal.resize(dim); for (int i = 0; i < dim; i++) { oVal[i] = 1 / (1 + exp(-iVal[i])); } } void ANNHUBClassifier::TanSig(std::vector& iVal, std::vector& oVal) { int dim = iVal.size(); oVal.resize(dim); for (int i = 0; i < dim; i++) { oVal[i] = 2 / (1 + exp(-2 * iVal[i])) - 1; } } void ANNHUBClassifier::PureLin(std::vector& iVal, std::vector& oVal) { oVal = iVal; } void ANNHUBClassifier::SoftMax(std::vector& iVal, std::vector& oVal) { double softmaxWeight = 0; int dim = iVal.size(); oVal.resize(dim); if (dim == 1) { oVal[0] = 1 / (1 + exp(-iVal[0])); } else { for (int i = 0; i < dim; i++) { softmaxWeight = softmaxWeight + exp(iVal[i]); } for (int i = 0; i < dim; i++) { oVal[i] = exp(iVal[i]) / softmaxWeight; } } } void ANNHUBClassifier::ActivationFunction(std::vector& iVal, std::vector& oVal, int mode) { switch (mode) { case 0: PureLin(iVal, oVal); break; case 1: ReLu(iVal, oVal); break; case 2: LogSig(iVal, oVal); break; case 3: TanSig(iVal, oVal); break; case 4: SoftMax(iVal, oVal); break; default: TanSig(iVal, oVal); break; } } void ANNHUBClassifier::CheckLicense() { try { _licenseValid = true; } catch (std::exception& e) { } } ANNHUBClassifier::ANNHUBClassifier() { // Control creation isCreated = 0; // Structrural parameters nInputNodes = 1; nHiddenNodes = 10; nOutputNodes = 1; hiddenActivation = 2; // default =2 outputActivation = 2; // default =2; dataNormalisationModeInput = 0; dataNormalisationModeOutput = 0; } ANNHUBClassifier::~ANNHUBClassifier() { Destroy(); } void ANNHUBClassifier::Destroy() { if (isCreated != 0) { FreeNeuralNetwork(); } } void ANNHUBClassifier::FreeNeuralNetwork() { try { // Clear vectors IW.clear(); LW.clear(); nInput.clear(); nOutput.clear(); Ib.clear(); Lb.clear(); xminInput.clear(); xmaxInput.clear(); xminOutput.clear(); xmaxOutput.clear(); // Reset the flag indicating whether the network is created isCreated = 0; } catch (const std::exception& e) { //this->_logger.LogFatal("ANNHUBAPI::FreeNeuralNetwork. Error:", e.what(), __FILE__, __LINE__); } } void ANNHUBClassifier::PreProcessing(std::vector& Input) { switch (dataNormalisationModeInput) { case 0: // linear break; case 1: // mapminmax for (int i = 0; i < nInputNodes; i++) { if (xmaxInput[i] != xminInput[i]) { Input[i] = (Input[i] - xminInput[i]) * (ymaxInput - yminInput) / (xmaxInput[i] - xminInput[i]) + yminInput; } } break; default:// minmaxmap for (int i = 0; i < nInputNodes; i++) { if (xmaxInput[i] != xminInput[i]) { Input[i] = (Input[i] - xminInput[i]) * (ymaxInput - yminInput) / (xmaxInput[i] - xminInput[i]) + yminInput; } } break; } } void ANNHUBClassifier::PostProcessing(std::vector& Output) { switch (dataNormalisationModeOutput) { case 0: // linear break; case 1: for (int i = 0; i < nOutputNodes; i++) { if (ymaxOutput != yminOutput) { Output[i] = (Output[i] - yminOutput) * (xmaxOutput[i] - xminOutput[i]) / (ymaxOutput - yminOutput) + xminOutput[i]; } } break; default: for (int i = 0; i < nOutputNodes; i++) { if (ymaxOutput != yminOutput) { Output[i] = (Output[i] - yminOutput) * (xmaxOutput[i] - xminOutput[i]) / (ymaxOutput - yminOutput) + xminOutput[i]; } } break; } } int ANNHUBClassifier::ImportANNFromFile(std::string filename) { try { FILE* fp; int err = fopen_s(&fp, filename.c_str(), "r"); // r: Opens for reading. if (err != 0) return -2; float value, randNum; //0. Check if it the correct type for C language fscanf_s(fp, "%f", &value); if (static_cast(value) != 1) { fclose(fp); return -1; } // Assume that the type C is 0 //1. Load random number fscanf_s(fp, "%f", &randNum); //2. Structure (IDs) int trainingEngine, hlAct, olAct, costFunct, prePro, postPro, evalModel; fscanf_s(fp, "%f", &value); trainingEngine = static_cast(value); fscanf_s(fp, "%f", &value); hlAct = static_cast(value); fscanf_s(fp, "%f", &value); olAct = static_cast(value); fscanf_s(fp, "%f", &value); costFunct = static_cast(value); fscanf_s(fp, "%f", &value); prePro = static_cast(value); fscanf_s(fp, "%f", &value); postPro = static_cast(value); fscanf_s(fp, "%f", &value); evalModel = static_cast(value); //2.1 Activation function hiddenActivation = hlAct - 10; outputActivation = olAct - 10; //2.2 Data Processing dataNormalisationModeInput = prePro - 1000; dataNormalisationModeOutput = postPro - 1000; // 3. Load neural network structure and min max inputs value for pre-post processing int ipNodes, hdNodes, opNodes; fscanf_s(fp, "%f", &value); ipNodes = static_cast(value); fscanf_s(fp, "%f", &value); hdNodes = static_cast(value); fscanf_s(fp, "%f", &value); opNodes = static_cast(value); Create(ipNodes, hdNodes, opNodes); // 4. Load Input-Hidden weights (extraction formula) for (int j = 0; j < nInputNodes; j++) { for (int i = 0; i < nHiddenNodes; i++) { fscanf_s(fp, "%f", &value); IW[i][j] = value - randNum; } } // 4.1. Load bias Hidden weights for (int i = 0; i < nHiddenNodes; i++) { fscanf_s(fp, "%f", &value); Ib[i] = value - randNum; } // 4.2. Load Hidden_Output weights for (int j = 0; j < nHiddenNodes; j++) { for (int i = 0; i < nOutputNodes; i++) { fscanf_s(fp, "%f", &value); LW[i][j] = value - randNum; } } // 4.3. Load bias Output weights for (int i = 0; i < nOutputNodes; i++) { fscanf_s(fp, "%f", &value); Lb[i] = value - randNum; } // 5. For Pre-processing if (dataNormalisationModeInput >= 0) { // Range settings fscanf_s(fp, "%f", &value); yminInput = value - randNum; fscanf_s(fp, "%f", &value); ymaxInput = value - randNum; // Min and max for (int i = 0; i < nInputNodes; i++) { fscanf_s(fp, "%f", &value); xminInput[i] = value - randNum; } for (int i = 0; i < nInputNodes; i++) { fscanf_s(fp, "%f", &value); xmaxInput[i] = value - randNum; } } // 6. For Post-processing if (dataNormalisationModeOutput >= 0) { // Range settings fscanf_s(fp, "%f", &value); yminOutput = value - randNum; fscanf_s(fp, "%f", &value); ymaxOutput = value - randNum; for (int i = 0; i < nOutputNodes; i++) { fscanf_s(fp, "%f", &value); xminOutput[i] = value - randNum; } for (int i = 0; i < nOutputNodes; i++) { fscanf_s(fp, "%f", &value); xmaxOutput[i] = value - randNum; } } fclose(fp); return 0; } catch (std::exception& e) { //this->_logger.LogFatal("ANNHUBAPI::ImportANNFromFile. Error:", e.what(), __FILE__, __LINE__); return -1; } } void ANNHUBClassifier::Create(int inputNodes, int hiddenNodes, int outputNodes) { try { if (isCreated != 0) { FreeNeuralNetwork(); } nInputNodes = inputNodes; nHiddenNodes = hiddenNodes; nOutputNodes = outputNodes; nInput.resize(inputNodes); nOutput.resize(outputNodes); IW.resize(hiddenNodes, std::vector(inputNodes)); LW.resize(outputNodes, std::vector(hiddenNodes)); Ib.resize(hiddenNodes); Lb.resize(outputNodes); xminInput.resize(inputNodes); xmaxInput.resize(inputNodes); xminOutput.resize(outputNodes); xmaxOutput.resize(outputNodes); isCreated = 1; } catch (std::exception& e) { //this->_logger.LogFatal("ANNHUBAPI::Create. Error:", e.what(), __FILE__, __LINE__); } } bool ANNHUBClassifier::Init(std::string licenseKey, std::string modelFilePath) { try { _licenseKey = licenseKey; _modelFilePath = modelFilePath; CheckLicense(); if (_licenseValid) { int result = ImportANNFromFile(_modelFilePath); if (result == 0) return true; else return false; } else { return false; } } catch (std::exception& e) { //this->_logger.LogFatal("ANNHUBAPI::Init. Error:", e.what(), __FILE__, __LINE__); return false; } } std::vector ANNHUBClassifier::Inference(std::vector ip) { try { int i, j; std::vector a1(nHiddenNodes), n1(nHiddenNodes), n2(nOutputNodes); if (!_licenseValid) return {}; // Invalid license if (isCreated == 0) return {}; // Need to check the input size as well, return {} if it fails PreProcessing(ip); //1. Calculate n1 for (i = 0; i < nHiddenNodes; i++) { n1[i] = 0; for (j = 0; j < nInputNodes; j++) { if (std::isnan(IW[i][j]) || std::isnan(ip[j])) { std::cerr << "NaN detected: IW[" << i << "][" << j << "] = " << IW[i][j] << ", ip[" << j << "] = " << ip[j] << std::endl; } n1[i] = n1[i] + IW[i][j] * ip[j]; } n1[i] = n1[i] + Ib[i]; } ActivationFunction(n1, a1, hiddenActivation); // 3. Calculate n2 for (i = 0; i < nOutputNodes; i++) { n2[i] = 0; for (j = 0; j < nHiddenNodes; j++) { n2[i] = n2[i] + LW[i][j] * a1[j]; } n2[i] = n2[i] + Lb[i]; } ActivationFunction(n2, nOutput, outputActivation); PostProcessing(nOutput); return nOutput; } catch (std::exception& e) { //this->_logger.LogFatal("ANNHUBAPI::Inference. Error:", e.what(), __FILE__, __LINE__); return {}; } } // For multiple cameras //MoveDetectsHandler::MoveDetectsHandler() //{ // // Initialize shared parameters here if needed //} //MoveDetectsHandler::~MoveDetectsHandler() { // for (auto& [key, cameraData] : cameras) // { // cameraData.clear(); // Clear each CameraData instance // } // cameras.clear(); // Clear the map itself //} //bool MoveDetectsHandler::empty(const std::string& camera_id) const //{ // auto it = cameras.find(camera_id); // return it == cameras.end() || it->second.control_frames.empty(); //} //MoveDetectsHandler& MoveDetectsHandler::clear(const std::string& camera_id) //{ // cameras[camera_id] = CameraData(); // return *this; //} //cv::Mat MoveDetectsHandler::simple_colour_balance(const cv::Mat& src) //{ // if (src.empty() || src.channels() != 3) // { // return src; // } // const float full_percent = 1.0; // const float half_percent = full_percent / 200.0f; // std::vector tmpsplit; // cv::split(src, tmpsplit); // for (size_t idx = 0; idx < 3; idx++) // { // // find the low and high precentile values (based on the input percentile) // cv::Mat flat; // tmpsplit[idx].reshape(1, 1).copyTo(flat); // cv::sort(flat, flat, cv::SORT_EVERY_ROW + cv::SORT_ASCENDING); // const int lo = flat.at(cvFloor(((float)flat.cols) * (0.0 + half_percent))); // const int hi = flat.at(cvCeil(((float)flat.cols) * (1.0 - half_percent))); // // saturate below the low percentile and above the high percentile // tmpsplit[idx].setTo(lo, tmpsplit[idx] < lo); // tmpsplit[idx].setTo(hi, tmpsplit[idx] > hi); // // scale the channel // cv::normalize(tmpsplit[idx], tmpsplit[idx], 0, 255, cv::NORM_MINMAX); // } // cv::Mat output; // cv::merge(tmpsplit, output); // return output; //} //cv::Rect MoveDetectsHandler::BoundingBoxFromContour(std::vector contour) { // if (contour.size() <= 0) { // cv::Rect empty; // return empty; // } // if (cv::contourArea(contour) < 1000) { // cv::Rect empty; // return empty; // } // cv::Point minp(contour[0].x, contour[0].y), // maxp(contour[0].x, contour[0].y); // for (unsigned int i = 0; i < contour.size(); i++) { // if (contour[i].x < minp.x) { // minp.x = contour[i].x; // } // if (contour[i].y < minp.y) { // minp.y = contour[i].y; // } // if (contour[i].x > maxp.x) { // maxp.x = contour[i].x; // } // if (contour[i].y > maxp.y) { // maxp.y = contour[i].y; // } // } // cv::Rect box(minp, maxp); // return box; //} //cv::Mat MoveDetectsHandler::getOutput(const std::string& camera_id) const //{ // return cameras.at(camera_id).output; //} //const std::vector>& MoveDetectsHandler::getContours(const std::string& camera_id) const //{ // return cameras.at(camera_id).contours; //} //std::vector MoveDetectsHandler::MovementDetect(const std::string& camera_id, cv::Mat& next_image) { // return MovementDetect(camera_id, cameras[camera_id].next_frame_index, next_image); //} //double MoveDetectsHandler::psnr(const cv::Mat& src, const cv::Mat& dst) //{ // if (src.empty() || dst.empty()) // { // return 0.0; // } // if (src.type() != dst.type() || // src.cols != dst.cols || // src.rows != dst.rows) // { // return 0.0; // } // cv::Mat s1; // cv::absdiff(src, dst, s1); // |src - dst| // s1.convertTo(s1, CV_32F); // cannot make a square on 8 bits // s1 = s1.mul(s1); // |src - dst|^2 // cv::Scalar s = sum(s1); // sum elements per channel // const double sse = s.val[0] + s.val[1] + s.val[2]; // sum channels // if (sse <= 1e-10) // for small values return zero // { // return 0.0; // } // const double mse = sse / static_cast(src.channels() * src.total()); // const double psnr = 10.0 * std::log10((255 * 255) / mse); // return psnr; //} //std::vector MoveDetectsHandler::MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image) { // std::vector outputObjects; // CameraData& camera = getCameraData(camera_id); // camera.contours.clear(); // if (image.empty()) { // return outputObjects; // } // // Set thumbnail size for faster comparison // if (camera.thumbnail_size.area() <= 1) { // camera.thumbnail_ratio = std::clamp(camera.thumbnail_ratio, 0.01, 1.0); // camera.thumbnail_size.width = static_cast(image.cols * camera.thumbnail_ratio); // camera.thumbnail_size.height = static_cast(image.rows * camera.thumbnail_ratio); // } // cv::Mat scb = image; //simple_colour_balance(image); // cv::Mat thumbnail; // cv::resize(scb, thumbnail, camera.thumbnail_size, 0, 0, cv::INTER_AREA); // bool previous_movement_detected = camera.movement_detected; // camera.movement_detected = false; // for (auto iter = camera.control_frames.rbegin(); iter != camera.control_frames.rend(); ++iter) { // const cv::Mat& control_image = iter->second; // // Compute PSNR between control frame and current thumbnail // camera.most_recent_psnr_score = psnr(control_image, thumbnail); // if (camera.most_recent_psnr_score < camera.psnr_threshold) { // camera.movement_detected = true; // camera.movement_last_detected = std::chrono::high_resolution_clock::now(); // camera.frame_index_with_movement = frame_index; // // Compute movement mask // cv::Mat differences; // cv::absdiff(control_image, thumbnail, differences); // // This gives us a very tiny 3-channel image. // // Now resize it to match the original image size. // cv::Mat differences_resized; // cv::resize(differences, differences_resized, image.size(), 0, 0, cv::INTER_CUBIC); // // We'd like to generate a binary threshold, but that requires us to convert // // the image to greyscale first. // cv::Mat greyscale; // cv::cvtColor(differences_resized, greyscale, cv::COLOR_BGR2GRAY); // cv::Mat threshold; // cv::threshold(greyscale, threshold, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU); // // And finally we dilate + erode the results to combine regions. // cv::Mat dilated; // cv::dilate(threshold, dilated, cv::Mat(), cv::Point(-1, -1), 10); // cv::Mat eroded; // cv::erode(dilated, eroded, cv::Mat(), cv::Point(-1, -1), 10); // // Store movement mask // camera.mask = eroded.clone(); // break; // } // } // // Detect transition states // camera.transition_detected = (previous_movement_detected != camera.movement_detected); // if (camera.mask.empty() || (camera.transition_detected && !camera.movement_detected)) { // camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0)); // } // // Clone image for output // camera.output = image.clone(); // std::vector hierarchy; // cv::findContours(camera.mask, camera.contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); // // Process each detected movement region // for (const auto& contour : camera.contours) { // double area = cv::contourArea(contour); // //if (area < 1000) continue; // Filter small areas // // Draw movement contours // cv::polylines(camera.output, contour, true, cv::Scalar(0, 0, 255), camera.contours_size, camera.line_type); // // Get bounding rectangle for each movement region // cv::Rect boundingRect = cv::boundingRect(contour); // // Ensure bounding box stays within image // boundingRect.x = std::max(0, std::min(boundingRect.x, image.cols - 1)); // boundingRect.y = std::max(0, std::min(boundingRect.y, image.rows - 1)); // boundingRect.width = std::max(1, std::min(boundingRect.width, image.cols - boundingRect.x)); // boundingRect.height = std::max(1, std::min(boundingRect.height, image.rows - boundingRect.y)); // Object obj; // obj.box = boundingRect; // obj.classId = 0; // obj.className = "movement"; // obj.confidence = 1.0; // obj.mask = image(boundingRect).clone(); // Extract movement region // int minObjectSize = std::min(boundingRect.width, boundingRect.height); // int objectSize = boundingRect.width * boundingRect.height; // // Ensure detected objects meet size criteria before adding // if ((minObjectSize > 5) && (objectSize > 25)) { // outputObjects.push_back(obj); // } // } // // Update control frames (store key frames for PSNR comparison) // if (frame_index >= camera.next_key_frame || camera.control_frames.size() < camera.number_of_control_frames) { // camera.control_frames[frame_index] = thumbnail.clone(); // while (camera.control_frames.size() > camera.number_of_control_frames) { // auto iter = camera.control_frames.begin(); // camera.control_frames.erase(iter); // } // camera.next_key_frame = frame_index + camera.key_frame_frequency; // } // camera.next_frame_index = frame_index + 1; // return outputObjects; //} // // Helper class cv::Mat ANSCENTER::ANSUtilityHelper::JpegStringToMat(const std::string& jpegString) { // Check if the input string is empty if (jpegString.empty()) { return cv::Mat(); // Return an empty Mat if the input string is empty } try { // Convert the string to a vector of bytes std::vector jpegData(jpegString.begin(), jpegString.end()); // Decode the JPEG data into a cv::Mat cv::Mat image = cv::imdecode(jpegData, cv::IMREAD_COLOR); return image; } catch (const std::exception& e) { std::cerr << "Error decoding JPEG string: " << e.what() << std::endl; return cv::Mat(); // Return an empty Mat if decoding fails } } std::vector ANSCENTER::ANSUtilityHelper::Split(const std::string& s, char delimiter) { std::vector tokens; std::string token; std::istringstream tokenStream(s); while (getline(tokenStream, token, delimiter)) { tokens.push_back(token); } return tokens; } std::vector ANSCENTER::ANSUtilityHelper::StringToPolygon(const std::string& input) { std::vector points; size_t delimiterPos = input.find('|'); if (delimiterPos == std::string::npos) { return points; // Return empty vector if format is incorrect } std::string xPart = input.substr(0, delimiterPos); std::string yPart = input.substr(delimiterPos + 1); std::vector xTokens = ANSCENTER::ANSUtilityHelper::Split(xPart, ';'); std::vector yTokens = ANSCENTER::ANSUtilityHelper::Split(yPart, ';'); if (xTokens.size() != yTokens.size()) { return points; // Return empty if sizes don't match } for (size_t i = 0; i < xTokens.size(); ++i) { int x = std::stoi(xTokens[i]); int y = std::stoi(yTokens[i]); points.emplace_back(x, y); } return points; } cv::Mat ANSCENTER::ANSUtilityHelper::CropPolygon(const cv::Mat& image, const std::vector& polygon) { // Check if the image is empty if (image.empty()) { return cv::Mat(); } // Create a mask with the same size as the image, initially filled with 0 (black) cv::Mat mask = cv::Mat::zeros(image.size(), CV_8UC1); // Define the region of interest using the polygon points const cv::Point* pts = (const cv::Point*)cv::Mat(polygon).data; int npts = cv::Mat(polygon).rows; // Draw the polygon on the mask cv::fillPoly(mask, &pts, &npts, 1, cv::Scalar(255)); // Crop the image using the mask cv::Mat croppedImage; image.copyTo(croppedImage, mask); return croppedImage; } cv::Mat ANSCENTER::ANSUtilityHelper::CropFromStringPolygon(const cv::Mat& image, const std::string& strPolygon) { //1. Convert string to polygon std::vector polygon = ANSCENTER::ANSUtilityHelper::StringToPolygon(strPolygon); // 2. To cropped image cv::Mat croppedImage = ANSCENTER::ANSUtilityHelper::CropPolygon(image, polygon); return croppedImage; } std::vector ANSUtilityHelper::PolygonFromString(const std::string& str) { std::vector points; if (str.empty()) { return points; } std::stringstream ss(str); std::string token; while (std::getline(ss, token, ';')) { std::stringstream pairStream(token); std::string xStr, yStr; if (std::getline(pairStream, xStr, ',') && std::getline(pairStream, yStr, ',')) { try { float x = std::stof(xStr); float y = std::stof(yStr); points.emplace_back(x, y); } catch (...) { // Skip invalid entries } } } return points; } std::vector ANSUtilityHelper::StringToKeypoints(const std::string& str) { std::vector keypoints; if (str.empty()) { return keypoints; } std::stringstream ss(str); std::string token; while (std::getline(ss, token, ';')) { try { keypoints.push_back(std::stof(token)); } catch (...) { // Skip invalid numbers } } return keypoints; } std::vector ANSUtilityHelper::GetDetectionsFromString(const std::string& strDets) { boost::property_tree::ptree root; boost::property_tree::ptree trackingObjects; boost::property_tree::ptree pt; std::vector detectionObjects; //1. Get input std::stringstream ss; ss << strDets; boost::property_tree::read_json(ss, pt); //2. Parsing BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto track_id = GetData(result, "track_id"); const auto class_name = GetData(result, "class_name"); const auto prob = GetData(result, "prob"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); const auto mask = GetData(result, "mask"); const auto extra_info = GetData(result, "extra_info"); const auto camera_id = GetData(result, "camera_id"); const auto polygon = GetData(result, "polygon"); const auto kps = GetData(result, "kps"); //3. Create object ANSCENTER::Object temp; temp.classId = class_id; temp.trackId = track_id; temp.className = class_name; temp.confidence = prob; temp.box.x = x; temp.box.y = y; temp.box.width = width; temp.box.height = height; temp.extraInfo = extra_info; temp.cameraId = camera_id; temp.mask = ANSCENTER::ANSUtilityHelper::JpegStringToMat(mask); temp.polygon = ANSCENTER::ANSUtilityHelper::PolygonFromString(polygon); temp.kps = ANSCENTER::ANSUtilityHelper::StringToKeypoints(kps); detectionObjects.push_back(temp); } return detectionObjects; } std::vector ANSCENTER::ANSUtilityHelper::GetBoundingBoxesFromString(std::string strBBoxes) { std::vector bBoxes; bBoxes.clear(); std::stringstream ss; ss << strBBoxes; boost::property_tree::ptree pt; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::Rect rectTemp; rectTemp.x = x; rectTemp.y = y; rectTemp.width = width; rectTemp.height = height; bBoxes.push_back(rectTemp); } return bBoxes; } ANSCENTER::Params ANSCENTER::ANSUtilityHelper::ParseCustomParameters(const std::string& jsonStr) { Params params; std::stringstream ss(jsonStr); boost::property_tree::ptree root; read_json(ss, root); for (auto& item : root.get_child("ROI_Config")) { const auto& pt = item.second; ROIConfig cfg; cfg.Rectangle = GetData(pt, "Rectangle"); cfg.Polygon = GetData(pt, "Polygon"); cfg.Line = GetData(pt, "Line"); cfg.MinItems = GetData(pt, "MinItems"); cfg.MaxItems = GetData(pt, "MaxItems"); cfg.Name = GetData(pt, "Name"); cfg.ROIMatch = GetData(pt, "ROI-Match"); params.ROI_Config.push_back(cfg); } for (auto& item : root.get_child("ROI_Options")) { params.ROI_Options.push_back(item.second.get_value()); } for (auto& item : root.get_child("Parameters")) { const auto& pt = item.second; Parameter p; p.Name = GetData(pt, "Name"); p.DataType = GetData(pt, "DataType"); p.NoOfDecimals = GetData(pt, "NoOfdecimals"); p.MaxValue = GetData(pt, "MaxValue"); p.MinValue = GetData(pt, "MinValue"); p.StartValue = GetData(pt, "StartValue"); p.DefaultValue = GetData(pt, "DefaultValue"); p.Value = GetData(pt, "Value"); for (const auto& li : pt.get_child("ListItems")) { p.ListItems.push_back(li.second.get_value()); } params.Parameters.push_back(p); } for (auto& item : root.get_child("ROI_Values")) { const auto& pt = item.second; ROIValue rv; rv.ROIMatch = GetData(pt, "ROI-Match"); rv.Option = GetData(pt, "Option"); rv.Name = GetData(pt, "Name"); rv.OriginalImageSize = GetData(pt, "OriginalImageSize"); for (const auto& pointNode : pt.get_child("ROIPoints")) { Point ptObj; ptObj.x = GetData(pointNode.second, "x"); ptObj.y = GetData(pointNode.second, "y"); rv.ROIPoints.push_back(ptObj); } params.ROI_Values.push_back(rv); } return params; } bool ANSCENTER::ANSUtilityHelper::ParseActiveROIMode(const std::string activeROIMode, int& mode, double &detectionScore,std::vector& trackingObjectIds) { // Check if the input string is empty if (activeROIMode.empty()) { mode = 0; trackingObjectIds.clear(); return true; } try { std::istringstream iss(activeROIMode); boost::property_tree::ptree pt; boost::property_tree::read_json(iss, pt); // Parse Mode using the helper mode = GetData(pt, "Mode"); // DetectionScore detectionScore = GetData(pt, "Score"); // Parse ObjectIds manually trackingObjectIds.clear(); const auto& idsNode = pt.get_child("ObjectIds"); for (const auto& item : idsNode) { trackingObjectIds.push_back(item.second.get_value()); } return true; } catch (const std::exception& ex) { // Handle malformed JSON or missing fields mode = -1; trackingObjectIds.clear(); return false; } } std::string ANSCENTER::ANSUtilityHelper::SerializeCustomParamters(const ANSCENTER::Params& params) { boost::property_tree::ptree root; boost::property_tree::ptree roiConfig, roiOptions, parameters, roiValues; // ROI_Config for (const auto& cfg : params.ROI_Config) { boost::property_tree::ptree pt; pt.put("Rectangle", cfg.Rectangle); // convert to string if needed pt.put("Polygon", cfg.Polygon); pt.put("Line", cfg.Line); pt.put("MinItems", cfg.MinItems); pt.put("MaxItems", cfg.MaxItems); pt.put("Name", cfg.Name); pt.put("ROI-Match", cfg.ROIMatch); roiConfig.push_back(std::make_pair("", pt)); } root.add_child("ROI_Config", roiConfig); // ROI_Options for (const auto& opt : params.ROI_Options) { boost::property_tree::ptree optNode; optNode.put("", opt); roiOptions.push_back(std::make_pair("", optNode)); } root.add_child("ROI_Options", roiOptions); // Parameters for (const auto& param : params.Parameters) { boost::property_tree::ptree pt; pt.put("Name", param.Name); pt.put("DataType", param.DataType); pt.put("NoOfdecimals", param.NoOfDecimals); pt.put("MaxValue", param.MaxValue); pt.put("MinValue", param.MinValue); pt.put("StartValue", param.StartValue); boost::property_tree::ptree listItemsNode; for (const auto& item : param.ListItems) { boost::property_tree::ptree itemNode; itemNode.put("", item); listItemsNode.push_back(std::make_pair("", itemNode)); } pt.add_child("ListItems", listItemsNode); pt.put("DefaultValue", param.DefaultValue); pt.put("Value", param.Value); parameters.push_back(std::make_pair("", pt)); } root.add_child("Parameters", parameters); // ROI_Values for (const auto& rv : params.ROI_Values) { boost::property_tree::ptree pt; pt.put("ROI-Match", rv.ROIMatch); boost::property_tree::ptree pointsNode; for (const auto& point : rv.ROIPoints) { boost::property_tree::ptree pointNode; pointNode.put("x", point.x); pointNode.put("y", point.y); pointsNode.push_back(std::make_pair("", pointNode)); } pt.add_child("ROIPoints", pointsNode); pt.put("Option", rv.Option); pt.put("Name", rv.Name); pt.put("OriginalImageSize", rv.OriginalImageSize); roiValues.push_back(std::make_pair("", pt)); } root.add_child("ROI_Values", roiValues); // Serialize to JSON std::ostringstream oss; boost::property_tree::write_json(oss, root, false); std::string jsonString = oss.str(); // Remove whitespace characters jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\n'), jsonString.end()); jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\r'), jsonString.end()); jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), '\t'), jsonString.end()); jsonString.erase(std::remove(jsonString.begin(), jsonString.end(), ' '), jsonString.end()); return jsonString; } cv::Rect ANSCENTER::ANSUtilityHelper::GetBoundingBoxFromPolygon(const std::vector& polygon) { if (polygon.empty()) { return cv::Rect(); // Return an empty rectangle if the polygon has no points } // Compute the bounding rectangle cv::Rect boundingBox = cv::boundingRect(polygon); return boundingBox; } std::string ANSCENTER::ANSUtilityHelper::VectorDetectionToJsonString(const std::vector& dets) { if (dets.empty()) { return R"({"results":[]})"; } try { nlohmann::json root; auto& results = root["results"] = nlohmann::json::array(); for (const auto& det : dets) { results.push_back({ {"class_id", std::to_string(det.classId)}, //{"track_id", std::to_string(det.trackId)}, {"track_id", "0"}, {"class_name", det.className}, {"prob", std::to_string(det.confidence)}, {"x", std::to_string(det.box.x)}, {"y", std::to_string(det.box.y)}, {"width", std::to_string(det.box.width)}, {"height", std::to_string(det.box.height)}, {"mask", ""}, // TODO: convert masks to comma separated string {"extra_info",det.extraInfo}, {"camera_id", det.cameraId}, {"polygon", PolygonToString(det.polygon)}, {"kps", KeypointsToString(det.kps)} }); } // ensure_ascii=true escapes non-ASCII chars as \uXXXX for LabVIEW compatibility return root.dump(-1, ' ', true); } catch (const std::exception& e) { return R"({"results":[],"error":"Serialization failed"})"; } } cv::Mat ANSCENTER::ANSUtilityHelper::ReadImagePath(const std::string& imagePath) { return cv::imread(imagePath); } std::vector ANSCENTER::ANSUtilityHelper::DecodeBase64(const std::string& base64) { static const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; std::vector result; int i = 0, j = 0, in = 0; unsigned char char_array_4[4], char_array_3[3]; for (const auto& c : base64) { if (c == '=') break; if (isspace(c)) continue; // ignore whitespace if (c == '+' || c == '/') { char_array_4[i++] = c; } else if (isalnum(c)) { char_array_4[i++] = c; } if (i == 4) { for (i = 0; i < 4; i++) { char_array_4[i] = base64_chars.find(char_array_4[i]); } char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0x0f) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x03) << 6) + char_array_4[3]; for (i = 0; i < 3; i++) { result.push_back(char_array_3[i]); } i = 0; } } if (i) { for (j = i; j < 4; j++) { char_array_4[j] = 0; } for (j = 0; j < 4; j++) { char_array_4[j] = base64_chars.find(char_array_4[j]); } char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0x0f) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x03) << 6) + char_array_4[3]; for (j = 0; j < i - 1; j++) { result.push_back(char_array_3[j]); } } return result; } cv::Mat ANSCENTER::ANSUtilityHelper::ReadImageStreamBase64(const std::string& imageStreamBase64) { std::vector decoded_data = DecodeBase64(imageStreamBase64); return cv::imdecode(decoded_data, cv::IMREAD_COLOR); } cv::Mat ANSCENTER::ANSUtilityHelper::FormatToSquare(const cv::Mat& source) { int col = source.cols; int row = source.rows; int _max = MAX(col, row); cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); source.copyTo(result(cv::Rect(0, 0, col, row))); return result; } unsigned char* ANSCENTER::ANSUtilityHelper::CVMatToBytes(cv::Mat image, unsigned int& bufferLengh) { int size = image.total() * image.elemSize(); unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later std::memcpy(bytes, image.data, size * sizeof(unsigned char)); bufferLengh = size * sizeof(unsigned char); return bytes; } std::vector ANSCENTER::ANSUtilityHelper::GetConfigFileContent(std::string modelConfigFile, ModelType& modelType, std::vector& inputShape) { /* { "engine_type": "NVIDIA_GPU", "detection_type" : "DETECTION", "model_type" : "YOLOV8", "input_size" : [640, 640] , "labels" : ["person", "bicycle", "car", "motorcycle"] }*/ boost::property_tree::ptree pt; std::vector labels; boost::property_tree::read_json(modelConfigFile, pt); // 0.Get labels vector first for (auto& item : pt.get_child("labels")) { labels.push_back(item.second.get_value()); } inputShape.clear(); for (auto& itemInput : pt.get_child("input_size")) { inputShape.push_back(itemInput.second.get_value()); } // TENSORFLOW = 0, // YOLOV4 = 1, // YOLOV5 = 2, // YOLOV8 = 3, // TENSORRT = 4, // OPENVINO = 5 // FACEDETECT = 6, // FACERECOGNIZE = 7, // ALPR = 8, // OCR = 9, // ANOMALIB = 10, // POSE = 11, // SAM = 12, // ODHUBMODEL = 13, // YOLOV10RTOD = 14, // TensorRT Object Detection for Yolov10 // YOLOV10OVOD = 15, // OpenVINO Object Detection for Yolov10 // 1. Get engine type //auto it = pt.get_child("engine_type"); //std::string value = it.get_value(); std::string stModel = GetData(pt, "model_type"); if (stModel == "TENSORFLOW") { modelType = ModelType::TENSORFLOW; std::cout << "Model Type: Tensorflow" << std::endl; } else if (stModel == "YOLOV4") { modelType = ModelType::YOLOV4; std::cout << "Model Type: yolov4" << std::endl; } else if (stModel == "YOLOV5") { modelType = ModelType::YOLOV5; std::cout << "Model Type: yolov5" << std::endl; } else if (stModel == "YOLOV8") { modelType = ModelType::YOLOV8; std::cout << "Model Type: yolov8" << std::endl; } else if (stModel == "TENSORRT") { modelType = ModelType::TENSORRT; std::cout << "Model Type: tensorrt" << std::endl; } else if (stModel == "OPENVINO") { modelType = ModelType::OPENVINO; std::cout << "Model Type: openvino" << std::endl; } else if (stModel == "FACEDETECT") { modelType = ModelType::FACEDETECT; std::cout << "Model Type: face detection" << std::endl; } else if (stModel == "FACERECOGNIZE") { modelType = ModelType::FACERECOGNIZE; std::cout << "Model Type: face recogniser" << std::endl; } else if (stModel == "ALPR") { modelType = ModelType::ALPR; std::cout << "Model Type: license plate recognition" << std::endl; } else if (stModel == "OCR") { modelType = ModelType::OCR; std::cout << "Model Type: ocr" << std::endl; } else if (stModel == "ANOMALIB") { modelType = ModelType::ANOMALIB; std::cout << "Model Type: anomalib" << std::endl; } else if (stModel == "POSE") { modelType = ModelType::POSE; std::cout << "Model Type: pose" << std::endl; } else if (stModel == "SAM") { modelType = ModelType::SAM; std::cout << "Model Type: sam" << std::endl; } else if (stModel == "ODHUBMODEL") { modelType = ModelType::ODHUBMODEL; std::cout << "Model Type: odhub" << std::endl; } else if (stModel == "YOLOV10RTOD") { modelType = ModelType::YOLOV10RTOD; std::cout << "Model Type: yolov10rt" << std::endl; } else if (stModel == "YOLOV10OVOD") { modelType = ModelType::YOLOV10OVOD; std::cout << "Model Type: yolov10ov" << std::endl; } else if (stModel == "CUSTOMDETECTOR") { modelType = ModelType::CUSTOMDETECTOR; std::cout << "Model Type: custom detector" << std::endl; } else { modelType = ModelType::YOLOV8; } return labels; } MetaData ANSCENTER::ANSUtilityHelper::GetJson(const std::string& jsonPath) { if (FileExist(jsonPath)) { MetaData metaData; std::vector _mean; std::vector _std; std::vector _transforms; boost::property_tree::ptree pt, ptTransform; boost::property_tree::read_json(jsonPath, pt); auto itImageThreshold = pt.get_child("image_threshold"); auto itPixelThreshold = pt.get_child("pixel_threshold"); auto itMin = pt.get_child("min"); auto itMax = pt.get_child("max"); float image_threshold = itImageThreshold.get_value(); float pixel_threshold = itPixelThreshold.get_value(); float min = itMin.get_value(); float max = itMax.get_value(); auto rootNode = pt.get_child("transform"); auto childNode1 = rootNode.get_child("transform"); auto childNode2 = childNode1.get_child("transforms"); auto itHeight = childNode2.begin()->second.get_child("height"); auto itWidth = childNode2.begin()->second.get_child("width"); int infer_height = itHeight.get_value(); int infer_width = itWidth.get_value(); _mean.clear(); _std.clear(); std::string _classFullName; for (auto& itemInput : childNode2) { auto itClassFullName = itemInput.second.get_child("__class_fullname__"); _classFullName = itClassFullName.get_value(); std::cout << "Class Full Name:" << _classFullName << std::endl; if (_classFullName == "Normalize") { _mean.clear(); for (auto& item : itemInput.second.get_child("mean")) { _mean.push_back(item.second.get_value()); } _std.clear(); for (auto& item : itemInput.second.get_child("std")) { _std.push_back(item.second.get_value()); } } } metaData.imageThreshold = image_threshold; metaData.pixelThreshold = pixel_threshold; metaData.min = min; metaData.max = max; metaData.inferSize[0] = infer_height; metaData.inferSize[1] = infer_width; metaData._mean = _mean; metaData._std = _std; return metaData; } else { MetaData metaData; metaData.imageThreshold = 15; metaData.pixelThreshold = 12; metaData.min = 0; metaData.max = 50; metaData.inferSize[0] = 256; metaData.inferSize[1] = 256; metaData._mean = { 0.485,0.456,0.406 }; metaData._std = { 0.229 ,0.224 ,0.225 }; return metaData; } } cv::Mat ANSCENTER::ANSUtilityHelper::Resize(const cv::Mat& src, int dst_height, int dst_width, const std::string& interpolation) { cv::Mat dst(dst_height, dst_width, src.type()); if (interpolation == "bilinear") { cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_LINEAR); } else if (interpolation == "nearest") { cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_NEAREST); } else if (interpolation == "area") { cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_AREA); } else if (interpolation == "bicubic") { cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_CUBIC); } else if (interpolation == "lanczos") { cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_LANCZOS4); } else { assert(0); } return dst; } cv::Mat ANSCENTER::ANSUtilityHelper::Crop(const cv::Mat& src, int top, int left, int bottom, int right) { return src(cv::Range(top, bottom + 1), cv::Range(left, right + 1)).clone(); } cv::Mat ANSCENTER::ANSUtilityHelper::Divide(const cv::Mat& src, float divide) { cv::Mat dst; src.convertTo(dst, CV_32FC3, 1.0 / divide); return dst; } cv::Mat ANSCENTER::ANSUtilityHelper::Normalize(cv::Mat& src, const std::vector& mean, const std::vector& std, bool to_rgb, bool inplace) { assert(src.channels() == mean.size()); assert(mean.size() == std.size()); cv::Mat dst; if (src.depth() == CV_32F) { dst = inplace ? src : src.clone(); } else { src.convertTo(dst, CV_32FC(src.channels())); } if (to_rgb && dst.channels() == 3) { cv::cvtColor(dst, dst, cv::ColorConversionCodes::COLOR_BGR2RGB); } auto _mean = mean; auto _std = std; for (auto i = mean.size(); i < 3; ++i) { _mean.push_back(0.); _std.push_back(1.0); } cv::Scalar mean_scalar(_mean[0], _mean[1], _mean[2]); cv::Scalar std_scalar(1.0 / _std[0], 1.0 / _std[1], 1.0 / _std[2]); cv::subtract(dst, mean_scalar, dst); cv::multiply(dst, std_scalar, dst); return dst; } cv::Mat ANSCENTER::ANSUtilityHelper::Transpose(const cv::Mat& src) { cv::Mat _src{ src.rows * src.cols, src.channels(), CV_MAKETYPE(src.depth(), 1), src.data }; cv::Mat _dst; cv::transpose(_src, _dst); return _dst; } cv::Mat ANSCENTER::ANSUtilityHelper::Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type, float val) { cv::Mat dst; cv::Scalar scalar = { val, val, val, val }; cv::copyMakeBorder(src, dst, top, bottom, left, right, border_type, scalar); return dst; } cv::Mat ANSUtilityHelper::MeanAxis0(const cv::Mat& src) { int num = src.rows; int dim = src.cols; cv::Mat output(1, dim, CV_32F); for (int i = 0; i < dim; i++) { float sum = 0; for (int j = 0; j < num; j++) { sum += src.at(j, i); } output.at(0, i) = sum / num; } return output; } cv::Mat ANSUtilityHelper::ElementwiseMinus(const cv::Mat& A, const cv::Mat& B) { cv::Mat output(A.rows, A.cols, A.type()); assert(B.cols == A.cols); if (B.cols == A.cols) { for (int i = 0; i < A.rows; i++) { for (int j = 0; j < B.cols; j++) { output.at(i, j) = A.at(i, j) - B.at(0, j); } } } return output; } cv::Mat ANSUtilityHelper::VarAxis0(const cv::Mat& src) { cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src)); cv::multiply(temp_, temp_, temp_); return MeanAxis0(temp_); } int ANSUtilityHelper::MatrixRank(cv::Mat M) { cv::Mat w, u, vt; cv::SVD::compute(M, w, u, vt); cv::Mat1b non_zero_singular_values = w > 0.0001; int rank = countNonZero(non_zero_singular_values); return rank; } cv::Mat ANSUtilityHelper::SimilarTransform(cv::Mat& dst, cv::Mat& src) { int num = dst.rows; int dim = dst.cols; cv::Mat src_mean = MeanAxis0(dst); cv::Mat dst_mean = MeanAxis0(src); cv::Mat src_demean = ElementwiseMinus(dst, src_mean); cv::Mat dst_demean = ElementwiseMinus(src, dst_mean); cv::Mat A = (dst_demean.t() * src_demean) / static_cast(num); cv::Mat d(dim, 1, CV_32F); d.setTo(1.0f); if (cv::determinant(A) < 0) { d.at(dim - 1, 0) = -1; } cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F); cv::Mat U, S, V; cv::SVD::compute(A, S, U, V); int rank = MatrixRank(A); if (rank == 0) { assert(rank == 0); } else if (rank == dim - 1) { if (cv::determinant(U) * cv::determinant(V) > 0) { T.rowRange(0, dim).colRange(0, dim) = U * V; } else { int s = d.at(dim - 1, 0) = -1; d.at(dim - 1, 0) = -1; T.rowRange(0, dim).colRange(0, dim) = U * V; cv::Mat diag_ = cv::Mat::diag(d); cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T) cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1); cv::Mat C = B.diag(0); T.rowRange(0, dim).colRange(0, dim) = U * twp; d.at(dim - 1, 0) = s; } } else { cv::Mat diag_ = cv::Mat::diag(d); cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T) cv::Mat res = U * twp; // U T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp; } cv::Mat var_ = VarAxis0(src_demean); float val = cv::sum(var_).val[0]; cv::Mat res; cv::multiply(d, S, res); float scale = 1.0 / val * cv::sum(res).val[0]; T.rowRange(0, dim).colRange(0, dim) = -T.rowRange(0, dim).colRange(0, dim).t(); cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim] cv::Mat temp2 = src_mean.t(); // src_mean.T cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T) cv::Mat temp4 = scale * temp3; T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t()); T.rowRange(0, dim).colRange(0, dim) *= scale; return T; } std::vector ANSUtilityHelper::AlignFaceWithFivePoints(const cv::Mat& image, const std::vector> boxes, std::vector> landmarks) { std::vector output_images; output_images.reserve(boxes.size()); if (image.empty() || !image.data || !image.u) return output_images; else { std::vector> std_landmarks = { {38.2946f, 51.6963f}, {73.5318f, 51.5014f}, {56.0252f, 71.7366f}, {41.5493f, 92.3655f}, {70.7299f, 92.2041f} }; if (boxes.empty())return output_images; cv::Mat src(5, 2, CV_32FC1, std_landmarks.data()); for (int i = 0; i < landmarks.size(); i += 5) { cv::Mat dst(5, 2, CV_32FC1, landmarks.data() + i); cv::Mat m = SimilarTransform(dst, src); cv::Mat map_matrix; cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2); cv::Mat(m, map_matrix_r).copyTo(map_matrix); cv::Mat cropped_image_aligned; int outputSize = 112;// MAX(width, height); cv::warpAffine(image, cropped_image_aligned, map_matrix, { outputSize,outputSize }); if (cropped_image_aligned.empty()) { std::cout << "croppedImageAligned is empty." << std::endl; } output_images.emplace_back(cropped_image_aligned); } return output_images; } } std::vectorANSUtilityHelper::GetCroppedFaces(const cv::Mat& image, const std::vector> boxes) { if (image.empty() || !image.data || !image.u) return std::vector(); else { cv::Mat frame = image.clone(); std::vector croppedFaces; if (boxes.size() > 0) { for (int i = 0; i < boxes.size(); i++) { int x1, y1, x2, y2; x1 = boxes[i][0]; y1 = boxes[i][1]; x2 = boxes[i][2]; y2 = boxes[i][3]; cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2)); cv::Mat tempCrop = frame(facePos); croppedFaces.push_back(tempCrop); } } frame.release(); return croppedFaces; } } cv::Mat ANSUtilityHelper::GetCroppedFace(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2) { if (image.empty() || !image.data || !image.u) return cv::Mat(); else { cv::Mat frame = image.clone(); cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2)); cv::Mat tempCrop = frame(facePos); frame.release(); return tempCrop; } } cv::Mat ANSUtilityHelper::GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int croppedImageSize) { cv::Mat resizedImage; cv::Mat frame = image.clone(); try { if (image.empty() || !image.data) return resizedImage; // 1. Define the bounding rectangle for cropping cv::Rect facePos(cv::Point(x1, y1), cv::Point(x2, y2)); // Ensure the rectangle has valid dimensions if (facePos.width <= 0 || facePos.height <= 0) return resizedImage; // 2. Validate crop size; minimum 112x112 for OpenVINO FaceNet model croppedImageSize = std::max(croppedImageSize, 112); // 3. Apply Gaussian blur to the cropped region for smoother output cv::Mat tempCrop = frame(facePos); // Use clone to avoid modifying the input image //cv::GaussianBlur(tempCrop, tempCrop, cv::Size(3, 3), 0); // 4. Resize the cropped face to the required size cv::resize(tempCrop, resizedImage, cv::Size(croppedImageSize, croppedImageSize), 0, 0, cv::INTER_LANCZOS4); tempCrop.release(); } catch (const std::exception& e) { std::cerr << "Error in GetCroppedFaceScale: " << e.what() << std::endl; } frame.release(); return resizedImage; } cv::Mat ANSUtilityHelper::GetTransform(cv::Mat* src, cv::Mat* dst) { cv::Mat col_mean_src; reduce(*src, col_mean_src, 0, cv::REDUCE_AVG); for (int i = 0; i < src->rows; i++) { src->row(i) -= col_mean_src; } cv::Mat col_mean_dst; reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG); for (int i = 0; i < dst->rows; i++) { dst->row(i) -= col_mean_dst; } cv::Scalar mean, dev_src, dev_dst; cv::meanStdDev(*src, mean, dev_src); dev_src(0) = std::max(static_cast(std::numeric_limits::epsilon()), dev_src(0)); *src /= dev_src(0); cv::meanStdDev(*dst, mean, dev_dst); dev_dst(0) = std::max(static_cast(std::numeric_limits::epsilon()), dev_dst(0)); *dst /= dev_dst(0); cv::Mat w, u, vt; cv::SVD::compute((*src).t() * (*dst), w, u, vt); cv::Mat r = (u * vt).t(); cv::Mat m(2, 3, CV_32F); m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0)); m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t()); return m; } void ANSUtilityHelper::AlignFacesExt(std::vector* face_images, std::vector* landmarks_vec) { if (landmarks_vec->size() == 0) { return; } CV_Assert(face_images->size() == landmarks_vec->size()); cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F); for (size_t j = 0; j < face_images->size(); j++) { for (int i = 0; i < ref_landmarks.rows; i++) { ref_landmarks.at(i, 0) = ref_landmarks_normalized[2 * i] * face_images->at(j).cols; ref_landmarks.at(i, 1) = ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows; landmarks_vec->at(j).at(i, 0) *= face_images->at(j).cols; landmarks_vec->at(j).at(i, 1) *= face_images->at(j).rows; } cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j)); cv::warpAffine(face_images->at(j), face_images->at(j), m, { 112,112 }, cv::WARP_INVERSE_MAP); } } void ANSUtilityHelper::AlignFaces(std::vector* face_images, std::vector* landmarks_vec) { if (landmarks_vec->size() == 0) { return; } CV_Assert(face_images->size() == landmarks_vec->size()); cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F); for (size_t j = 0; j < face_images->size(); j++) { for (int i = 0; i < ref_landmarks.rows; i++) { ref_landmarks.at(i, 0) = ref_landmarks_normalized[2 * i] * face_images->at(j).cols; ref_landmarks.at(i, 1) = ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows; landmarks_vec->at(j).at(i, 0) *= face_images->at(j).cols; landmarks_vec->at(j).at(i, 1) *= face_images->at(j).rows; } cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j)); cv::warpAffine(face_images->at(j), face_images->at(j), m, face_images->at(j).size(), cv::WARP_INVERSE_MAP); cv::resize(face_images->at(j), face_images->at(j), cv::Size(112, 112)); } } std::pair ANSUtilityHelper::AlignFacesSCRFD( const cv::Mat& input_mat, const std::vector& face_landmark_5) { // Sanity check if (face_landmark_5.size() != 5) { throw std::runtime_error( "AlignFacesSCRFD: Expected 5 landmarks, got " + std::to_string(face_landmark_5.size())); } // --------------------------------------------------------------------- // Precomputed ArcFace template landmarks scaled to 112x112 // --------------------------------------------------------------------- static const std::vector kTemplate112 = []() { // ArcFace template landmarks (normalized to [0, 1]) const std::vector face_template = { {0.34191607f, 0.46157411f}, // left eye {0.65653393f, 0.45983393f}, // right eye {0.50022500f, 0.64050536f}, // nose tip {0.37097589f, 0.82469196f}, // left mouth {0.63151696f, 0.82325089f} // right mouth }; std::vector tpl; tpl.reserve(face_template.size()); for (const auto& pt : face_template) { tpl.emplace_back(pt.x * 112.0f, pt.y * 112.0f); } return tpl; }(); // --------------------------------------------------------------------- // Estimate similarity transform from input landmarks to template. // With exactly 5 point correspondences, RANSAC is unnecessary — // there are no outliers to reject. Use default method (least-squares). // --------------------------------------------------------------------- cv::Mat affine_matrix = cv::estimateAffinePartial2D( face_landmark_5, // src points kTemplate112 // dst points (precomputed) ); if (affine_matrix.empty()) { throw std::runtime_error( "AlignFacesSCRFD: Failed to estimate affine transformation"); } // --------------------------------------------------------------------- // Apply affine warp to align face to 112x112 // --------------------------------------------------------------------- cv::Mat aligned_face; cv::warpAffine( input_mat, aligned_face, affine_matrix, cv::Size(112, 112), cv::INTER_AREA, cv::BORDER_REPLICATE ); return std::make_pair(aligned_face, affine_matrix); } //std::pair ANSUtilityHelper::AlignFacesSCRFD( // const cv::Mat& input_mat, // const std::vector& face_landmark_5) //{ // // Sanity check // if (face_landmark_5.size() != 5) { // throw std::runtime_error( // "AlignFacesSCRFD: Expected 5 landmarks, got " + // std::to_string(face_landmark_5.size())); // } // // --------------------------------------------------------------------- // // Precomputed ArcFace template landmarks scaled to 112x112 // // --------------------------------------------------------------------- // static const std::vector kTemplate112 = []() { // // ArcFace template landmarks (normalized to [0, 1]) // const std::vector face_template = { // {0.34191607f, 0.46157411f}, // left eye // {0.65653393f, 0.45983393f}, // right eye // {0.50022500f, 0.64050536f}, // nose tip // {0.37097589f, 0.82469196f}, // left mouth // {0.63151696f, 0.82325089f} // right mouth // }; // std::vector normed_template; // normed_template.reserve(face_template.size()); // for (const auto& pt : face_template) { // normed_template.emplace_back(pt.x * 112.0f, pt.y * 112.0f); // } // return normed_template; // }(); // // --------------------------------------------------------------------- // // Use 3 keypoints (eyes + mouth center) to compute affine transform // // --------------------------------------------------------------------- // // input landmarks: [Leye, Reye, Nose, Lmouth, Rmouth] // std::vector src(3), dst(3); // // source: eyes + mouth center // src[0] = face_landmark_5[0]; // left eye // src[1] = face_landmark_5[1]; // right eye // src[2] = (face_landmark_5[3] + face_landmark_5[4]) * 0.5f; // mouth center // // destination: corresponding template points // dst[0] = kTemplate112[0]; // left eye // dst[1] = kTemplate112[1]; // right eye // dst[2] = (kTemplate112[3] + kTemplate112[4]) * 0.5f; // mouth center // cv::Mat affine_matrix = cv::getAffineTransform(src, dst); // if (affine_matrix.empty()) { // throw std::runtime_error("AlignFacesSCRFD: Failed to compute affine matrix"); // } // // --------------------------------------------------------------------- // // Apply affine warp to align face to 112x112 // // --------------------------------------------------------------------- // cv::Mat aligned_face; // cv::warpAffine( // input_mat, // aligned_face, // affine_matrix, // cv::Size(112, 112), // cv::INTER_AREA, // cv::BORDER_REPLICATE // ); // return std::make_pair(aligned_face, affine_matrix); //} //std::pair ANSUtilityHelper::AlignFacesSCRFD(const cv::Mat& input_mat,const std::vector& face_landmark_5) //{ // // Sanity check // if (face_landmark_5.size() != 5) { // throw std::runtime_error("AlignFacesSCRFD: Expected 5 landmarks, got " + std::to_string(face_landmark_5.size())); // } // // ArcFace template landmarks (normalized to [0, 1]) // const std::vector face_template = { // cv::Point2f(0.34191607f, 0.46157411f), // left eye // cv::Point2f(0.65653393f, 0.45983393f), // right eye // cv::Point2f(0.50022500f, 0.64050536f), // nose tip // cv::Point2f(0.37097589f, 0.82469196f), // left mouth // cv::Point2f(0.63151696f, 0.82325089f) // right mouth // }; // // Scale template to 112x112 output image // std::vector normed_template; // for (const auto& pt : face_template) { // normed_template.emplace_back(pt.x * 112.0f, pt.y * 112.0f); // } // // Estimate affine transform from input landmarks to template // cv::Mat inliers; // cv::Mat affine_matrix = cv::estimateAffinePartial2D( // face_landmark_5, // normed_template, // inliers, // cv::RANSAC, // 100.0 // ); // if (affine_matrix.empty()) { // throw std::runtime_error("AlignFacesSCRFD: Failed to estimate affine transformation"); // } // // Apply affine warp to align face // cv::Mat aligned_face; // cv::warpAffine( // input_mat, // aligned_face, // affine_matrix, // cv::Size(112, 112), // cv::INTER_AREA, // cv::BORDER_REPLICATE // ); // return std::make_pair(aligned_face, affine_matrix); //} bool ANSUtilityHelper::ModelOptimizer(std::string modelZipFilePath, std::string modelFileZipPassword, int fp16, std::string& optimisedModelFolder, int inputImageHeight, int inputImageWidth) { try { bool _fp16 = false; if (fp16 == 1) _fp16 = true; //A. Unzip model folder std::string _modelFolder = ""; _modelFolder.clear(); // 0. Check if the modelZipFilePath exist? if (!FileExist(modelZipFilePath)) { return false; } // 1. Unzip model zip file to a special location with folder name as model file (and version) std::string outputFolder; std::vector passwordArray; if (!modelFileZipPassword.empty()) passwordArray.push_back(modelFileZipPassword); passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag=="); passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg=="); passwordArray.push_back("AnsCustomModels20@$"); passwordArray.push_back("AnsDemoModels20@!"); std::string modelName = GetFileNameWithoutExtension(modelZipFilePath); size_t vectorSize = passwordArray.size(); for (size_t i = 0; i < vectorSize; i++) { if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false)) break; // Break the loop when the condition is met. } // 2. Check if the outputFolder exist if (!std::filesystem::exists(_modelFolder)) { return false; // That means the model file is not exist or the password is not correct } // 3. Create model file path std::string _modelFilePath; std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; } else { return false; } optimisedModelFolder = _modelFolder; // Specify options for GPU inference ANSCENTER::Options options; // This particular model only supports a fixed batch size of 1 options.optBatchSize = 1; options.maxBatchSize = 1; options.maxInputHeight = inputImageHeight; options.minInputHeight = inputImageHeight; options.optInputHeight = inputImageHeight; options.maxInputWidth = inputImageWidth; options.minInputWidth = inputImageWidth; options.optInputWidth = inputImageWidth; options.engineFileDir = _modelFolder; // Use FP16 precision to speed up inference if (_fp16) { options.precision = ANSCENTER::Precision::FP16; } else { options.precision = ANSCENTER::Precision::FP32; } const std::array SUB_VALS{ 0.f, 0.f, 0.f }; const std::array DIV_VALS{ 1.f, 1.f, 1.f }; const bool NORMALIZE = true; // Create our TensorRT inference engine std::unique_ptr> m_trtEngine = std::make_unique>(options); // Build the onnx model into a TensorRT engine file auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { const std::string errMsg = "Error: Unable to build the TensorRT engine. " "Try increasing TensorRT log severity to kVERBOSE."; return false; } return true; } catch (std::exception& e) { std::cout << "TENSORRTOD::OptimizeModel" << e.what();; return false; } } cv::Rect ANSUtilityHelper::BoundingBoxFromContour(std::vector contour) { if (contour.size() <= 0) { cv::Rect empty; return empty; } if (cv::contourArea(contour) < 1000) { cv::Rect empty; return empty; } /* NOTE: - Finding exremas. */ cv::Point minp(contour[0].x, contour[0].y), maxp(contour[0].x, contour[0].y); for (unsigned int i = 0; i < contour.size(); i++) { /* NOTE: - Updating local minima. */ if (contour[i].x < minp.x) { minp.x = contour[i].x; } if (contour[i].y < minp.y) { minp.y = contour[i].y; } /* NOTE: - Updating local maxima. */ if (contour[i].x > maxp.x) { maxp.x = contour[i].x; } if (contour[i].y > maxp.y) { maxp.y = contour[i].y; } } /* NOTE: - Creating a box and returning the box based on the extremas found. */ cv::Rect box(minp, maxp); return box; } std::string ANSUtilityHelper::PolygonToString(const std::vector& polygon) { if (polygon.empty()) { return ""; } std::string result; result.reserve(polygon.size() * 20); char buffer[64]; for (size_t i = 0; i < polygon.size(); ++i) { if (i > 0) { snprintf(buffer, sizeof(buffer), ";%.3f;%.3f", polygon[i].x, polygon[i].y); } else { snprintf(buffer, sizeof(buffer), "%.3f;%.3f", polygon[i].x, polygon[i].y); } result += buffer; } return result; } std::string ANSUtilityHelper::KeypointsToString(const std::vector& kps) { if (kps.empty()) { return ""; } std::string result; result.reserve(kps.size() * 10); char buffer[32]; for (size_t i = 0; i < kps.size(); ++i) { if (i > 0) result += ';'; snprintf(buffer, sizeof(buffer), "%.3f", kps[i]); result += buffer; } return result; } //std::string ANSUtilityHelper::PolygonToString(const std::vector& polygon) { // if (polygon.empty()) { // return ""; // Return empty string if kps is empty // } // std::ostringstream oss; // for (const auto& point : polygon) { // oss << point.x << ";" << point.y << ";"; // } // std::string result = oss.str(); // // Remove the trailing semicolon // if (!result.empty()) { // result.pop_back(); // } // return result; //} //std::string ANSUtilityHelper::KeypointsToString(const std::vector& kps) { // if (kps.empty()) { // return ""; // Return empty string if kps is empty // } // std::ostringstream oss; // for (const auto& kp : kps) { // oss << kp << ";"; // } // std::string result = oss.str(); // // Remove the trailing semicolon // if (!result.empty()) { // result.pop_back(); // } // return result; //} std::vector ANSUtilityHelper::RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight) { if (imageWidth <= 0 || imageHeight <= 0) { return {}; } // Normalized [0,1] corners of the rectangle (closed polygon: first == last) std::vector polygon = { { rect.x / imageWidth, rect.y / imageHeight }, // Top-left { (rect.x + rect.width) / imageWidth, rect.y / imageHeight }, // Top-right { (rect.x + rect.width) / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-right { rect.x / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-left { rect.x / imageWidth, rect.y / imageHeight } // Close }; return polygon; } std::vector ANSUtilityHelper::MaskToNormalizedPolygon( const cv::Mat& binaryMask, const cv::Rect& boundingBox, float imageWidth, float imageHeight, float simplificationEpsilon, int minContourArea, int maxPoints) { std::vector polygon; try { if (binaryMask.empty() || imageWidth <= 0 || imageHeight <= 0) return polygon; // The mask is already cropped to bounding box size cv::Mat mask8u; if (binaryMask.type() != CV_8UC1) binaryMask.convertTo(mask8u, CV_8UC1, 255.0); else mask8u = binaryMask; // Find contours std::vector> contours; cv::findContours(mask8u.clone(), contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); if (contours.empty()) return polygon; // Find largest contour int largestIdx = 0; double largestArea = 0.0; for (size_t i = 0; i < contours.size(); ++i) { double area = cv::contourArea(contours[i]); if (area > largestArea && area >= minContourArea) { largestArea = area; largestIdx = static_cast(i); } } if (largestArea < minContourArea) return polygon; // Simplify contour with approxPolyDP std::vector simplified; cv::approxPolyDP(contours[largestIdx], simplified, simplificationEpsilon, true); if (simplified.size() < 3) return polygon; // If still too many points after simplification, progressively increase // epsilon until we fit within maxPoints, then uniformly downsample as fallback if (maxPoints > 0 && static_cast(simplified.size()) > maxPoints) { // Try increasing epsilon to reduce points naturally (preserves shape better) double eps = simplificationEpsilon; for (int attempt = 0; attempt < 10 && static_cast(simplified.size()) > maxPoints; ++attempt) { eps *= 1.5; cv::approxPolyDP(contours[largestIdx], simplified, eps, true); } // Fallback: uniformly sample maxPoints from the contour if (static_cast(simplified.size()) > maxPoints) { std::vector downsampled; downsampled.reserve(maxPoints); const int n = static_cast(simplified.size()); for (int i = 0; i < maxPoints; ++i) { int idx = static_cast(std::round(static_cast(i) * (n - 1) / (maxPoints - 1))); downsampled.push_back(simplified[idx]); } simplified = std::move(downsampled); } } // Convert to normalized coordinates: // mask pixel (mx, my) -> image pixel (boundingBox.x + mx, boundingBox.y + my) -> normalized polygon.reserve(simplified.size() + 1); // +1 for closing point for (const auto& pt : simplified) { float nx = static_cast(pt.x + boundingBox.x) / imageWidth; float ny = static_cast(pt.y + boundingBox.y) / imageHeight; polygon.emplace_back( std::clamp(nx, 0.0f, 1.0f), std::clamp(ny, 0.0f, 1.0f)); } // Close the polygon: append first point at end so first == last if (polygon.size() >= 3) { polygon.push_back(polygon.front()); } } catch (...) { polygon.clear(); } return polygon; } float ANSUtilityHelper::calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2) { int xx1 = std::max(box1.x, box2.x); int yy1 = std::max(box1.y, box2.y); int xx2 = std::min(box1.x + box1.width, box2.x + box2.width); int yy2 = std::min(box1.y + box1.height, box2.y + box2.height); int width = std::max(0, xx2 - xx1); int height = std::max(0, yy2 - yy1); return static_cast(width * height); } float ANSUtilityHelper::calculate_bbox_iou(const Object& pred1, const Object& pred2) { float area1 = pred1.box.width * pred1.box.height; float area2 = pred2.box.width * pred2.box.height; float intersect = calculate_intersection_area(pred1.box, pred2.box); return intersect / (area1 + area2 - intersect); } float ANSUtilityHelper::calculate_bbox_ios(const Object& pred1, const Object& pred2) { float area1 = pred1.box.width * pred1.box.height; float area2 = pred2.box.width * pred2.box.height; float intersect = calculate_intersection_area(pred1.box, pred2.box); float smaller_area = std::min(area1, area2); return intersect / smaller_area; } bool ANSUtilityHelper::has_match(const Object& pred1, const Object& pred2, const std::string& match_type, float match_threshold) { bool threshold_condition = false; if (match_type == "IOU") { threshold_condition = calculate_bbox_iou(pred1, pred2) > match_threshold; } else if (match_type == "IOS") { threshold_condition = calculate_bbox_ios(pred1, pred2) > match_threshold; } else {// Default threshold_condition = calculate_bbox_iou(pred1, pred2) > match_threshold; } return threshold_condition; } float ANSUtilityHelper::get_merged_score(const Object& pred1, const Object& pred2) { return std::max(pred1.confidence, pred2.confidence); } cv::Rect ANSUtilityHelper::calculate_box_union(const cv::Rect& box1, const cv::Rect& box2) { int x1 = std::min(box1.x, box2.x); int y1 = std::min(box1.y, box2.y); int x2 = std::max(box1.x + box1.width, box2.x + box2.width); int y2 = std::max(box1.y + box1.height, box2.y + box2.height); return cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)); } cv::Rect ANSUtilityHelper::get_merged_bbox(const Object& pred1, const Object& pred2) { return calculate_box_union(pred1.box, pred2.box); } Object ANSUtilityHelper::merge_object_pair(const Object& obj1, const Object& obj2) { cv::Rect merged_bbox = get_merged_bbox(obj1, obj2); float merged_score = get_merged_score(obj1, obj2); int merged_class_id = get_merged_class_id(obj1, obj2); std::string merged_class_name = get_merged_category(obj1, obj2); Object merged_obj; merged_obj.box = merged_bbox; merged_obj.confidence = merged_score; merged_obj.classId = merged_class_id; merged_obj.className = merged_class_name; if (obj1.confidence > obj2.confidence) { merged_obj.trackId = obj1.trackId; merged_obj.mask = obj1.mask; merged_obj.polygon = obj1.polygon; merged_obj.kps = obj1.kps; merged_obj.extraInfo = obj1.extraInfo; } else { merged_obj.trackId = obj2.trackId; merged_obj.mask = obj2.mask; merged_obj.polygon = obj2.polygon; merged_obj.kps = obj2.kps; merged_obj.extraInfo = obj2.extraInfo; } return merged_obj; } std::vector ANSUtilityHelper::select_object_predictions(const std::vector& object_prediction_list, const std::map>& keep_to_merge_list, const std::string& match_metric, float match_threshold ) { std::vector selected_object_predictions; // Create a modifiable copy of object_prediction_list std::vector modifiable_predictions = object_prediction_list; for (const auto& [keep_ind, merge_ind_list] : keep_to_merge_list) { for (int merge_ind : merge_ind_list) { if (has_match(modifiable_predictions[keep_ind], modifiable_predictions[merge_ind], match_metric, match_threshold)) { modifiable_predictions[keep_ind] = merge_object_pair(modifiable_predictions[keep_ind], modifiable_predictions[merge_ind]); } } selected_object_predictions.push_back(modifiable_predictions[keep_ind]); } return selected_object_predictions; } std::map> ANSUtilityHelper::Greedy_NMM(const std::vector& object_predictions, const std::string& match_metric, float match_threshold) { std::map> keep_to_merge_list; std::vector areas(object_predictions.size()); // Calculate the area of each bounding box for (size_t i = 0; i < object_predictions.size(); ++i) { areas[i] = static_cast(object_predictions[i].box.width) * object_predictions[i].box.height; } // Sort indices based on confidence scores in ascending order std::vector order(object_predictions.size()); iota(order.begin(), order.end(), 0); // Fill order with 0, 1, ..., n-1 sort(order.begin(), order.end(), [&object_predictions](int a, int b) { return object_predictions[a].confidence < object_predictions[b].confidence; }); while (!order.empty()) { // Index of the prediction with the highest score int idx = order.back(); order.pop_back(); // Sanity check if (order.empty()) { keep_to_merge_list[idx] = {}; break; } // Create a list of indices for matched and unmatched boxes std::vector matched_indices; std::vector unmatched_indices; for (int other_idx : order) { // Calculate intersection area between `idx` and `other_idx` int xx1 = std::max(object_predictions[idx].box.x, object_predictions[other_idx].box.x); int yy1 = std::max(object_predictions[idx].box.y, object_predictions[other_idx].box.y); int xx2 = std::min(object_predictions[idx].box.x + object_predictions[idx].box.width, object_predictions[other_idx].box.x + object_predictions[other_idx].box.width); int yy2 = std::min(object_predictions[idx].box.y + object_predictions[idx].box.height, object_predictions[other_idx].box.y + object_predictions[other_idx].box.height); int w = std::max(0, xx2 - xx1); int h = std::max(0, yy2 - yy1); float inter = static_cast(w * h); float match_metric_value; if (match_metric == "IOU") { // Union for IoU float union_area = areas[idx] + areas[other_idx] - inter; match_metric_value = inter / union_area; } else if (match_metric == "IOS") { // Smaller area for IoS float smaller_area = std::min(areas[idx], areas[other_idx]); match_metric_value = inter / smaller_area; } else {// Default // Union for IoU float union_area = areas[idx] + areas[other_idx] - inter; match_metric_value = inter / union_area; } // Check if the match metric value exceeds the threshold if (match_metric_value >= match_threshold) { matched_indices.push_back(other_idx); } else { unmatched_indices.push_back(other_idx); } } // Update `order` to contain only unmatched indices, sorted by confidence scores sort(unmatched_indices.begin(), unmatched_indices.end(), [&object_predictions](int a, int b) { return object_predictions[a].confidence < object_predictions[b].confidence; }); order = unmatched_indices; // Add mapping for the current `idx` keep_to_merge_list[idx] = matched_indices; } return keep_to_merge_list; } std::vector ANSUtilityHelper::ApplyNMS(const std::vector& objects, float nmsThreshold) { std::vector finalObjects; std::map> keep_to_merge_list; keep_to_merge_list = Greedy_NMM(objects, "IOU", nmsThreshold); finalObjects = select_object_predictions(objects, keep_to_merge_list, "IOU", nmsThreshold); return finalObjects; } std::vector ANSUtilityHelper::GetPatches(cv::Mat& image, int tileWidth, int tileHeight, double overlap) { std::vector patches; int overLapWidth = static_cast(tileWidth * overlap); int overLapHeight = static_cast(tileHeight * overlap); int stepX = tileWidth - overLapWidth; int stepY = tileHeight - overLapHeight; for (int y = 0; y < image.rows; y += stepY) { for (int x = 0; x < image.cols; x += stepX) { int width = std::min(tileWidth, image.cols - x); int height = std::min(tileHeight, image.rows - y); patches.push_back(cv::Rect(x, y, width, height)); } } return patches; } std::vector ANSUtilityHelper::ExtractPatches(cv::Mat& image, std::vector& patchRegions) { std::vector patches; for (const auto& region : patchRegions) { patches.push_back(image(region).clone()); } return patches; } cv::Mat ANSUtilityHelper::ResizePatch(cv::Mat& patch, int modelWidth, int modelHeight) { cv::Mat resizedPatch; double aspectRatio = static_cast(patch.cols) / patch.rows; int newWidth, newHeight; if (aspectRatio > 1) { // Width > Height newWidth = modelWidth; newHeight = static_cast(modelWidth / aspectRatio); } else { newHeight = modelHeight; newWidth = static_cast(modelHeight * aspectRatio); } cv::resize(patch, resizedPatch, cv::Size(newWidth, newHeight)); return resizedPatch; } void ANSUtilityHelper::AdjustBoudingBox(Object& obj, int offsetX, int offsetY) { obj.box.x += offsetX; obj.box.y += offsetY; } /// MOVEMENT DETECTION FUNCTIONS // ============================================================================ // Constructor and Destructor // ============================================================================ MoveDetectsHandler::MoveDetectsHandler() { // Initialize any shared resources if needed } MoveDetectsHandler::~MoveDetectsHandler() { std::lock_guard lock(cameras_mutex); for (auto& [key, cameraData] : cameras) { cameraData.clear(); } cameras.clear(); } // ============================================================================ // Camera Management // ============================================================================ bool MoveDetectsHandler::hasCameraData(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); return cameras.find(camera_id) != cameras.end(); } void MoveDetectsHandler::removeCamera(const std::string& camera_id) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.clear(); cameras.erase(it); } } std::vector MoveDetectsHandler::getCameraIds() const { std::lock_guard lock(cameras_mutex); std::vector ids; ids.reserve(cameras.size()); for (const auto& [id, _] : cameras) { ids.push_back(id); } return ids; } bool MoveDetectsHandler::empty(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); return it == cameras.end() || it->second.control_frames.empty(); } void MoveDetectsHandler::clear(const std::string& camera_id) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.clear(); } } void MoveDetectsHandler::clearAll() { std::lock_guard lock(cameras_mutex); for (auto& [key, cameraData] : cameras) { cameraData.clear(); } cameras.clear(); } // ============================================================================ // Configuration Methods // ============================================================================ void MoveDetectsHandler::setThreshold(const std::string& camera_id, double threshold) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.psnr_threshold = std::clamp(threshold, 0.0, 100.0); } } void MoveDetectsHandler::setKeyFrameFrequency(const std::string& camera_id, size_t frequency) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.key_frame_frequency = std::max(size_t(1), frequency); } } void MoveDetectsHandler::setNumberOfControlFrames(const std::string& camera_id, size_t count) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.number_of_control_frames = std::clamp(count, size_t(1), size_t(100)); } } void MoveDetectsHandler::setThumbnailRatio(const std::string& camera_id, double ratio) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.thumbnail_ratio = std::clamp(ratio, 0.01, 1.0); it->second.thumbnail_size = cv::Size(0, 0); // Reset to recalculate } } void MoveDetectsHandler::setMaskEnabled(const std::string& camera_id, bool enabled) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.mask_enabled = enabled; } } void MoveDetectsHandler::setContoursEnabled(const std::string& camera_id, bool enabled) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.contours_enabled = enabled; } } void MoveDetectsHandler::setBboxEnabled(const std::string& camera_id, bool enabled) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.bbox_enabled = enabled; } } void MoveDetectsHandler::setContourThickness(const std::string& camera_id, int thickness) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.contours_size = std::max(1, thickness); } } void MoveDetectsHandler::setBboxThickness(const std::string& camera_id, int thickness) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.bbox_size = std::max(1, thickness); } } void MoveDetectsHandler::setMinObjectArea(const std::string& camera_id, double area) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.min_object_area = std::max(0.0, area); } } void MoveDetectsHandler::setMinObjectSize(const std::string& camera_id, int size) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.min_object_dimension = std::max(0, size); } } void MoveDetectsHandler::setMorphologyIterations(const std::string& camera_id, int iterations) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.morphology_iterations = std::clamp(iterations, 1, 50); } } // ============================================================================ // Configuration Getters // ============================================================================ double MoveDetectsHandler::getThreshold(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.psnr_threshold; } return 0.0; } size_t MoveDetectsHandler::getKeyFrameFrequency(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.key_frame_frequency; } return 0; } size_t MoveDetectsHandler::getNumberOfControlFrames(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.number_of_control_frames; } return 0; } double MoveDetectsHandler::getThumbnailRatio(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.thumbnail_ratio; } return 0.0; } bool MoveDetectsHandler::isMaskEnabled(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.mask_enabled; } return false; } bool MoveDetectsHandler::isContoursEnabled(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.contours_enabled; } return false; } bool MoveDetectsHandler::isBboxEnabled(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.bbox_enabled; } return false; } // ============================================================================ // State Query Methods // ============================================================================ bool MoveDetectsHandler::isMovementDetected(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); return it != cameras.end() && it->second.movement_detected; } bool MoveDetectsHandler::wasTransitionDetected(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); return it != cameras.end() && it->second.transition_detected; } double MoveDetectsHandler::getPSNRScore(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.most_recent_psnr_score; } return 0.0; } size_t MoveDetectsHandler::getFrameIndexWithMovement(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.frame_index_with_movement; } return 0; } std::chrono::milliseconds MoveDetectsHandler::getTimeSinceLastMovement(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it == cameras.end() || it->second.frame_index_with_movement == 0) { return std::chrono::milliseconds::max(); } auto now = std::chrono::high_resolution_clock::now(); return std::chrono::duration_cast( now - it->second.movement_last_detected ); } size_t MoveDetectsHandler::getControlFrameCount(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.control_frames.size(); } return 0; } size_t MoveDetectsHandler::getNextFrameIndex(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.next_frame_index; } return 0; } cv::Mat MoveDetectsHandler::getOutput(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.output.clone(); } return cv::Mat(); } cv::Mat MoveDetectsHandler::getMask(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.mask.clone(); } return cv::Mat(); } std::vector> MoveDetectsHandler::getContours(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.contours; } return std::vector>(); } // ============================================================================ // Statistics // ============================================================================ MoveDetectsHandler::CameraStats MoveDetectsHandler::getStats(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.stats; } return CameraStats(); } void MoveDetectsHandler::resetStats(const std::string& camera_id) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.stats.reset(); } } // ============================================================================ // Helper Functions // ============================================================================ bool MoveDetectsHandler::cameraExists(const std::string& camera_id) const { // Note: cameras_mutex should already be locked by caller return cameras.find(camera_id) != cameras.end(); } MoveDetectsHandler::CameraData& MoveDetectsHandler::getCameraData(const std::string& camera_id) { // Note: cameras_mutex should already be locked by caller return cameras.try_emplace(camera_id, CameraData{}).first->second; } const MoveDetectsHandler::CameraData* MoveDetectsHandler::getCameraDataConst(const std::string& camera_id) const { // Note: cameras_mutex should already be locked by caller auto it = cameras.find(camera_id); if (it == cameras.end()) { return nullptr; } return &it->second; } // ============================================================================ // Core Algorithm Functions // ============================================================================ double MoveDetectsHandler::psnr(const cv::Mat& src, const cv::Mat& dst) { if (src.empty() || dst.empty()) { std::cerr << "PSNR: Empty image(s)" << std::endl; return 0.0; } if (src.type() != dst.type()) { std::cerr << "PSNR: Type mismatch - src: " << src.type() << " dst: " << dst.type() << std::endl; return 0.0; } if (src.size() != dst.size()) { std::cerr << "PSNR: Size mismatch - src: " << src.size() << " dst: " << dst.size() << std::endl; return 0.0; } cv::Mat s1; cv::absdiff(src, dst, s1); s1.convertTo(s1, CV_32F); s1 = s1.mul(s1); cv::Scalar s = cv::sum(s1); double sse = s.val[0] + s.val[1] + s.val[2]; if (sse <= 1e-10) { // Identical images - return very high PSNR return 100.0; // Use a high but finite value instead of infinity } const double mse = sse / static_cast(src.channels() * src.total()); const double psnr_value = 10.0 * std::log10((255.0 * 255.0) / mse); // Sanity check if (std::isnan(psnr_value) || std::isinf(psnr_value)) { std::cerr << "PSNR: Invalid result (NaN or Inf), MSE=" << mse << std::endl; return 0.0; } return psnr_value; } cv::Mat MoveDetectsHandler::simple_colour_balance(const cv::Mat& src) { if (src.empty() || src.channels() != 3) { return src.clone(); } const float full_percent = 1.0f; const float half_percent = full_percent / 200.0f; std::vector tmpsplit; cv::split(src, tmpsplit); for (size_t idx = 0; idx < 3; idx++) { cv::Mat flat; tmpsplit[idx].reshape(1, 1).copyTo(flat); cv::sort(flat, flat, cv::SORT_EVERY_ROW + cv::SORT_ASCENDING); const int lo = flat.at(cvFloor(static_cast(flat.cols) * half_percent)); const int hi = flat.at(cvCeil(static_cast(flat.cols) * (1.0f - half_percent))); tmpsplit[idx].setTo(lo, tmpsplit[idx] < lo); tmpsplit[idx].setTo(hi, tmpsplit[idx] > hi); cv::normalize(tmpsplit[idx], tmpsplit[idx], 0, 255, cv::NORM_MINMAX); } cv::Mat output; cv::merge(tmpsplit, output); return output; } cv::Rect MoveDetectsHandler::BoundingBoxFromContour(const std::vector& contour) { if (contour.empty()) { return cv::Rect(); } if (cv::contourArea(contour) < 1000) { return cv::Rect(); } return cv::boundingRect(contour); } cv::Mat MoveDetectsHandler::computeMovementMask(const cv::Mat& control_frame, const cv::Mat& current_frame, const cv::Size& output_size, int morphology_iterations) { if (control_frame.empty() || current_frame.empty()) { std::cerr << "computeMovementMask: Empty input frame(s)" << std::endl; return cv::Mat(); } // Compute difference cv::Mat differences; cv::absdiff(control_frame, current_frame, differences); // Resize to output size cv::Mat differences_resized; cv::resize(differences, differences_resized, output_size, 0, 0, cv::INTER_CUBIC); // Convert to grayscale cv::Mat greyscale; cv::cvtColor(differences_resized, greyscale, cv::COLOR_BGR2GRAY); // NEW: Apply Gaussian blur to reduce noise BEFORE thresholding cv::Mat blurred; cv::GaussianBlur(greyscale, blurred, cv::Size(5, 5), 0); // NEW: Use adaptive threshold instead of Otsu for better noise handling cv::Mat threshold; // Try adaptive threshold first cv::adaptiveThreshold(blurred, threshold, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 11, 2); // Check if adaptive threshold produced too much noise int non_zero_adaptive = cv::countNonZero(threshold); double adaptive_percent = (100.0 * non_zero_adaptive) / threshold.total(); std::cout << "Adaptive threshold: " << adaptive_percent << "% of frame" << std::endl; // If adaptive produces too much noise, fall back to Otsu with higher threshold if (adaptive_percent > 25.0) { std::cout << "Adaptive too noisy, using Otsu..." << std::endl; cv::threshold(blurred, threshold, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU); // Check Otsu result int non_zero_otsu = cv::countNonZero(threshold); double otsu_percent = (100.0 * non_zero_otsu) / threshold.total(); std::cout << "Otsu threshold: " << otsu_percent << "% of frame" << std::endl; // If still too noisy, use fixed high threshold if (otsu_percent > 25.0) { std::cout << "Still too noisy, using fixed threshold 50" << std::endl; cv::threshold(blurred, threshold, 50, 255.0, cv::THRESH_BINARY); } } // NEW: More aggressive morphology cv::Mat element = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(5, 5)); // Close small gaps first cv::Mat closed; cv::morphologyEx(threshold, closed, cv::MORPH_CLOSE, element, cv::Point(-1, -1), 2); // Then dilate and erode cv::Mat dilated, eroded; cv::dilate(closed, dilated, element, cv::Point(-1, -1), morphology_iterations); cv::erode(dilated, eroded, element, cv::Point(-1, -1), morphology_iterations); // NEW: Open to remove small noise cv::Mat opened; cv::morphologyEx(eroded, opened, cv::MORPH_OPEN, element, cv::Point(-1, -1), 3); // Final check int final_pixels = cv::countNonZero(opened); double final_percent = (100.0 * final_pixels) / opened.total(); std::cout << "Final mask: " << final_percent << "% of frame" << std::endl; return opened; } std::vector MoveDetectsHandler::extractObjectsFromMask(const cv::Mat& mask, const cv::Mat& image, CameraData& camera, const std::string& camera_id) { std::vector outputObjects; if (mask.empty()) { return outputObjects; } std::vector hierarchy; cv::findContours(mask, camera.contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); for (const auto& contour : camera.contours) { double area = cv::contourArea(contour); if (area < camera.min_object_area) { continue; } cv::Rect boundingRect = cv::boundingRect(contour); // Clamp to image bounds boundingRect &= cv::Rect(0, 0, image.cols, image.rows); if (boundingRect.width < camera.min_object_dimension || boundingRect.height < camera.min_object_dimension || boundingRect.area() < camera.min_object_total_size) { continue; } // Draw visualizations if (camera.contours_enabled) { cv::polylines(camera.output, contour, true, cv::Scalar(0, 0, 255), camera.contours_size, camera.line_type); } if (camera.bbox_enabled) { cv::rectangle(camera.output, boundingRect, cv::Scalar(0, 255, 0), camera.bbox_size, camera.line_type); } Object obj; obj.classId = 0; obj.trackId = 0; obj.className = "movement"; obj.confidence = 1.0f; obj.box = boundingRect; obj.mask = image(boundingRect).clone(); obj.cameraId = camera_id; // Create polygon from contour (simplified to bounding box corners) obj.polygon = { cv::Point2f(static_cast(boundingRect.x), static_cast(boundingRect.y)), cv::Point2f(static_cast(boundingRect.x + boundingRect.width), static_cast(boundingRect.y)), cv::Point2f(static_cast(boundingRect.x + boundingRect.width), static_cast(boundingRect.y + boundingRect.height)), cv::Point2f(static_cast(boundingRect.x), static_cast(boundingRect.y + boundingRect.height)) }; if ((boundingRect.width >= 25) && (boundingRect.height >= 25))outputObjects.push_back(obj); } return outputObjects; } void MoveDetectsHandler::updateControlFrames(CameraData& camera, size_t frame_index, const cv::Mat& thumbnail) { if (thumbnail.empty()) { std::cerr << "updateControlFrames: Empty thumbnail!" << std::endl; return; } if (frame_index >= camera.next_key_frame || camera.control_frames.size() < camera.number_of_control_frames) { // Store a deep copy cv::Mat thumbnail_copy = thumbnail.clone(); // Ensure continuous memory if (!thumbnail_copy.isContinuous()) { thumbnail_copy = thumbnail_copy.clone(); } camera.control_frames[frame_index] = thumbnail_copy; // Remove oldest frames if exceeding limit while (camera.control_frames.size() > camera.number_of_control_frames) { auto oldest = camera.control_frames.begin(); oldest->second.release(); // Explicitly release memory camera.control_frames.erase(oldest); } camera.next_key_frame = frame_index + camera.key_frame_frequency; } } void MoveDetectsHandler::updateStatistics(CameraData& camera, double psnr_value, bool movement_detected, std::chrono::milliseconds processing_time) { camera.stats.total_frames_processed++; if (movement_detected) { camera.stats.frames_with_movement++; camera.stats.last_movement_time = std::chrono::high_resolution_clock::now(); } camera.stats.control_frames_count = camera.control_frames.size(); // Update PSNR statistics (skip infinity values from identical frames) if (!std::isinf(psnr_value)) { camera.stats.min_psnr = std::min(camera.stats.min_psnr, psnr_value); camera.stats.max_psnr = std::max(camera.stats.max_psnr, psnr_value); // Running average double n = static_cast(camera.stats.total_frames_processed); camera.stats.average_psnr = (camera.stats.average_psnr * (n - 1.0) + psnr_value) / n; } camera.stats.total_processing_time += processing_time; } //void MoveDetectsHandler::updateStatistics(CameraData& camera, double psnr_value, // bool movement_detected, // std::chrono::milliseconds processing_time) //{ // camera.stats.total_frames_processed++; // if (movement_detected) // { // camera.stats.frames_with_movement++; // camera.stats.last_movement_time = std::chrono::high_resolution_clock::now(); // } // camera.stats.control_frames_count = camera.control_frames.size(); // // Update PSNR statistics (skip infinity values from identical frames) // if (!std::isinf(psnr_value)) // { // camera.stats.min_psnr = std::min(camera.stats.min_psnr, psnr_value); // camera.stats.max_psnr = std::max(camera.stats.max_psnr, psnr_value); // // Running average // double n = static_cast(camera.stats.total_frames_processed); // camera.stats.average_psnr = // (camera.stats.average_psnr * (n - 1.0) + psnr_value) / n; // } // // ======================================================================== // // UPDATE TEMPORAL CONSISTENCY STATISTICS - THIS WAS MISSING! // // ======================================================================== // double n = static_cast(camera.stats.total_frames_processed); // camera.stats.average_temporal_consistency = // (camera.stats.average_temporal_consistency * (n - 1.0) + // camera.last_temporal_consistency_score) / n; // camera.stats.total_processing_time += processing_time; //} // ============================================================================ // Main Detection Methods // ============================================================================ std::vector MoveDetectsHandler::MovementDetect(const std::string& camera_id, cv::Mat& next_image) { std::lock_guard lock(cameras_mutex); CameraData& camera = getCameraData(camera_id); return MovementDetect(camera_id, camera.next_frame_index, next_image); } std::vector MoveDetectsHandler::MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image) { std::lock_guard lock(cameras_mutex); if (image.empty()) { return std::vector(); } // CRITICAL: Ensure consistent format cv::Mat processed_image; // Convert to BGR if needed if (image.channels() == 1) { cv::cvtColor(image, processed_image, cv::COLOR_GRAY2BGR); } else if (image.channels() == 4) { cv::cvtColor(image, processed_image, cv::COLOR_BGRA2BGR); } else if (image.channels() == 3) { // Already BGR, but ensure it's a continuous clone processed_image = image.clone(); } else { // Unsupported format return std::vector(); } // Ensure continuous memory layout if (!processed_image.isContinuous()) { processed_image = processed_image.clone(); } CameraData& camera = getCameraData(camera_id); return MovementDetectInternal(camera_id, frame_index, processed_image, camera); } void MoveDetectsHandler::setTemporalConsistency(const std::string& camera_id, bool enabled) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.temporal_consistency_enabled = enabled; } } void MoveDetectsHandler::setMaskOverlapThreshold(const std::string& camera_id, double threshold) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.mask_overlap_threshold = std::clamp(threshold, 0.0, 1.0); } } void MoveDetectsHandler::setTemporalHistorySize(const std::string& camera_id, size_t size) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.temporal_history_size = std::clamp(size, size_t(1), size_t(20)); } } void MoveDetectsHandler::setMinConsistentFrames(const std::string& camera_id, size_t frames) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.min_consistent_frames = std::max(size_t(1), frames); } } void MoveDetectsHandler::setLocationStabilityEnabled(const std::string& camera_id, bool enabled) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.location_stability_enabled = enabled; } } void MoveDetectsHandler::setMaxLocationJitter(const std::string& camera_id, double pixels) { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { it->second.max_location_jitter = std::max(0.0, pixels); } } bool MoveDetectsHandler::isTemporalConsistencyEnabled(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.temporal_consistency_enabled; } return false; } double MoveDetectsHandler::getTemporalConsistencyScore(const std::string& camera_id) const { std::lock_guard lock(cameras_mutex); auto it = cameras.find(camera_id); if (it != cameras.end()) { return it->second.last_temporal_consistency_score; } return 0.0; } // ============================================================================ // Temporal Consistency Helper Functions // ============================================================================ double MoveDetectsHandler::calculateMaskOverlap(const cv::Mat& mask1, const cv::Mat& mask2) { if (mask1.empty() || mask2.empty()) { return 0.0; } if (mask1.size() != mask2.size()) { return 0.0; } // Calculate intersection cv::Mat intersection; cv::bitwise_and(mask1, mask2, intersection); int intersection_pixels = cv::countNonZero(intersection); // Calculate union cv::Mat union_mask; cv::bitwise_or(mask1, mask2, union_mask); int union_pixels = cv::countNonZero(union_mask); if (union_pixels == 0) { return 0.0; } // IoU (Intersection over Union) double overlap = static_cast(intersection_pixels) / static_cast(union_pixels); return overlap; } cv::Point MoveDetectsHandler::calculateMaskCentroid(const cv::Mat& mask) { if (mask.empty()) { return cv::Point(-1, -1); } cv::Moments m = cv::moments(mask, true); if (m.m00 == 0) { return cv::Point(-1, -1); } cv::Point centroid; centroid.x = static_cast(m.m10 / m.m00); centroid.y = static_cast(m.m01 / m.m00); return centroid; } double MoveDetectsHandler::calculateLocationStability(const std::deque& centroids) { if (centroids.size() < 2) { return 0.0; } // Calculate total movement distance double total_distance = 0.0; for (size_t i = 1; i < centroids.size(); i++) { if (centroids[i].x < 0 || centroids[i - 1].x < 0) { continue; // Skip invalid centroids } double dx = centroids[i].x - centroids[i - 1].x; double dy = centroids[i].y - centroids[i - 1].y; double distance = std::sqrt(dx * dx + dy * dy); total_distance += distance; } // Average movement per frame double avg_movement = total_distance / (centroids.size() - 1); return avg_movement; } void MoveDetectsHandler::updateTemporalHistory(CameraData& camera, const cv::Mat& mask) { if (mask.empty()) { return; } // Add mask to history camera.mask_history.push_back(mask.clone()); // Maintain history size limit while (camera.mask_history.size() > camera.temporal_history_size) { camera.mask_history.front().release(); camera.mask_history.pop_front(); } // Calculate and store centroid cv::Point centroid = calculateMaskCentroid(mask); camera.centroid_history.push_back(centroid); // Maintain centroid history size while (camera.centroid_history.size() > camera.temporal_history_size) { camera.centroid_history.pop_front(); } } bool MoveDetectsHandler::checkTemporalConsistency(CameraData& camera, const cv::Mat& current_mask) { if (!camera.temporal_consistency_enabled) { camera.last_temporal_consistency_score = 1.0; return true; // Always pass if disabled } if (current_mask.empty()) { camera.last_temporal_consistency_score = 0.0; return false; } // If no history yet, accept the first detection if (camera.mask_history.empty()) { camera.last_temporal_consistency_score = 1.0; return true; } // Calculate overlap with recent masks double total_overlap = 0.0; int valid_comparisons = 0; for (const auto& historical_mask : camera.mask_history) { if (!historical_mask.empty()) { double overlap = calculateMaskOverlap(current_mask, historical_mask); total_overlap += overlap; valid_comparisons++; } } double avg_overlap = (valid_comparisons > 0) ? (total_overlap / valid_comparisons) : 0.0; camera.last_temporal_consistency_score = avg_overlap; std::cout << "Temporal consistency check:" << std::endl; std::cout << " Average overlap with history: " << (avg_overlap * 100) << "%" << std::endl; std::cout << " Required threshold: " << (camera.mask_overlap_threshold * 100) << "%" << std::endl; // Check mask overlap criterion bool overlap_passed = avg_overlap >= camera.mask_overlap_threshold; // Check location stability if enabled bool location_stable = true; if (camera.location_stability_enabled && camera.centroid_history.size() >= 2) { cv::Point current_centroid = calculateMaskCentroid(current_mask); if (current_centroid.x >= 0) { // Check jitter with most recent centroid const cv::Point& last_centroid = camera.centroid_history.back(); if (last_centroid.x >= 0) { double dx = current_centroid.x - last_centroid.x; double dy = current_centroid.y - last_centroid.y; double distance = std::sqrt(dx * dx + dy * dy); location_stable = distance <= camera.max_location_jitter; std::cout << " Location jitter: " << distance << " pixels (max: " << camera.max_location_jitter << ")" << std::endl; std::cout << " Location stable: " << (location_stable ? "YES" : "NO") << std::endl; } } } // Both criteria must pass bool passed = overlap_passed && location_stable; // Update consistent frame counter if (passed) { camera.consistent_frame_count++; std::cout << " Consistent frames: " << camera.consistent_frame_count << " / " << camera.min_consistent_frames << std::endl; } else { camera.consistent_frame_count = 0; std::cout << " Consistency check FAILED - resetting counter" << std::endl; } // Movement is only confirmed after min consecutive consistent frames bool confirmed = camera.consistent_frame_count >= camera.min_consistent_frames; std::cout << " Movement confirmed: " << (confirmed ? "YES" : "NO") << std::endl; return confirmed; } std::vector MoveDetectsHandler::MovementDetectInternal(const std::string& camera_id, const size_t frame_index, cv::Mat& image, CameraData& camera) { auto start_time = std::chrono::high_resolution_clock::now(); std::vector outputObjects; // DEBUG: Log input std::cout << "\n=== MovementDetect Debug ===" << std::endl; std::cout << "Camera ID: " << camera_id << std::endl; std::cout << "Frame Index: " << frame_index << std::endl; std::cout << "Image size: " << image.cols << "x" << image.rows << std::endl; std::cout << "Image type: " << image.type() << " (CV_8UC3=" << CV_8UC3 << ")" << std::endl; std::cout << "Image channels: " << image.channels() << std::endl; std::cout << "Image empty: " << (image.empty() ? "YES" : "NO") << std::endl; if (image.empty()) { std::cout << "ERROR: Empty image!" << std::endl; return outputObjects; } // Ensure image is continuous in memory if (!image.isContinuous()) { std::cout << "WARNING: Image is not continuous, cloning..." << std::endl; image = image.clone(); } camera.contours.clear(); // Initialize thumbnail size if needed if (camera.thumbnail_size.area() == 0) { camera.thumbnail_ratio = std::clamp(camera.thumbnail_ratio, 0.01, 1.0); camera.thumbnail_size.width = static_cast(image.cols * camera.thumbnail_ratio); camera.thumbnail_size.height = static_cast(image.rows * camera.thumbnail_ratio); camera.thumbnail_size.width = std::max(1, camera.thumbnail_size.width); camera.thumbnail_size.height = std::max(1, camera.thumbnail_size.height); std::cout << "Initialized thumbnail size: " << camera.thumbnail_size.width << "x" << camera.thumbnail_size.height << std::endl; } // Create thumbnail for comparison cv::Mat thumbnail; cv::resize(image, thumbnail, camera.thumbnail_size, 0, 0, cv::INTER_AREA); std::cout << "Thumbnail created: " << thumbnail.cols << "x" << thumbnail.rows << " type: " << thumbnail.type() << std::endl; // DEBUG: Control frames info std::cout << "Control frames count: " << camera.control_frames.size() << std::endl; std::cout << "Control frame indices: "; for (const auto& [idx, frame] : camera.control_frames) { std::cout << idx << " "; } std::cout << std::endl; if (camera.control_frames.empty()) { std::cout << "First frame - initializing control frame" << std::endl; camera.control_frames[frame_index] = thumbnail.clone(); camera.next_key_frame = frame_index + camera.key_frame_frequency; camera.next_frame_index = frame_index + 1; camera.output = image.clone(); camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0)); auto end_time = std::chrono::high_resolution_clock::now(); auto processing_time = std::chrono::duration_cast( end_time - start_time ); updateStatistics(camera, 0.0, false, processing_time); return outputObjects; } bool previous_movement_detected = camera.movement_detected; camera.movement_detected = false; std::cout << "Previous movement: " << (previous_movement_detected ? "YES" : "NO") << std::endl; std::cout << "PSNR Threshold: " << camera.psnr_threshold << std::endl; // Compare with control frames for (auto iter = camera.control_frames.rbegin(); iter != camera.control_frames.rend(); ++iter) { const cv::Mat& control_image = iter->second; // DEBUG: Verify control frame std::cout << "Comparing with control frame " << iter->first << " size: " << control_image.cols << "x" << control_image.rows << " type: " << control_image.type() << std::endl; if (control_image.empty()) { std::cout << "ERROR: Control frame is empty!" << std::endl; continue; } if (control_image.size() != thumbnail.size()) { std::cout << "ERROR: Size mismatch! Control: " << control_image.size() << " Thumbnail: " << thumbnail.size() << std::endl; continue; } if (control_image.type() != thumbnail.type()) { std::cout << "ERROR: Type mismatch! Control: " << control_image.type() << " Thumbnail: " << thumbnail.type() << std::endl; continue; } camera.most_recent_psnr_score = psnr(control_image, thumbnail); std::cout << "PSNR Score: " << std::fixed << std::setprecision(2) << camera.most_recent_psnr_score << std::endl; if (std::isinf(camera.most_recent_psnr_score)) { std::cout << "PSNR is infinity (identical frames), skipping..." << std::endl; continue; } if (camera.most_recent_psnr_score < camera.psnr_threshold) { std::cout << "*** MOVEMENT DETECTED *** (PSNR < threshold)" << std::endl; camera.movement_detected = true; camera.movement_last_detected = std::chrono::high_resolution_clock::now(); camera.frame_index_with_movement = frame_index; camera.mask = computeMovementMask(control_image, thumbnail, image.size(), camera.morphology_iterations); std::cout << "Mask created: " << camera.mask.cols << "x" << camera.mask.rows << " non-zero pixels: " << cv::countNonZero(camera.mask) << std::endl; break; } else { std::cout << "No movement (PSNR >= threshold)" << std::endl; } } camera.transition_detected = (previous_movement_detected != camera.movement_detected); std::cout << "Current movement: " << (camera.movement_detected ? "YES" : "NO") << std::endl; std::cout << "Transition detected: " << (camera.transition_detected ? "YES" : "NO") << std::endl; if (camera.mask.empty() || (camera.transition_detected && !camera.movement_detected)) { std::cout << "Resetting mask to zeros" << std::endl; camera.mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar(0)); } camera.output = image.clone(); if (!camera.mask.empty() && camera.movement_detected) { outputObjects = extractObjectsFromMask(camera.mask, image, camera, camera_id); std::cout << "Objects extracted: " << outputObjects.size() << std::endl; } // Update control frames if (frame_index >= camera.next_key_frame || camera.control_frames.size() < camera.number_of_control_frames) { std::cout << "Adding new control frame at index " << frame_index << std::endl; updateControlFrames(camera, frame_index, thumbnail); std::cout << "Next key frame will be at: " << camera.next_key_frame << std::endl; } camera.next_frame_index = frame_index + 1; auto end_time = std::chrono::high_resolution_clock::now(); auto processing_time = std::chrono::duration_cast( end_time - start_time ); updateStatistics(camera, camera.most_recent_psnr_score, camera.movement_detected, processing_time); std::cout << "Processing time: " << processing_time.count() << "ms" << std::endl; std::cout << "=== End Debug ===" << std::endl; return outputObjects; } } // namespace ANSCENTER