1145 lines
55 KiB
C
1145 lines
55 KiB
C
|
|
#ifndef ANSENGINECOMMON_H
|
||
|
|
#define ANSENGINECOMMON_H
|
||
|
|
#define ANSENGINE_API __declspec(dllexport)
|
||
|
|
#pragma once
|
||
|
|
#include <opencv2/opencv.hpp>
|
||
|
|
#include <opencv2/core/cuda.hpp>
|
||
|
|
#include <openvino/openvino.hpp>
|
||
|
|
#include "ANSLicense.h"
|
||
|
|
#include "Utility.h"
|
||
|
|
#include <algorithm>
|
||
|
|
#include <fstream>
|
||
|
|
#include <exception>
|
||
|
|
#include <random>
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <vector>
|
||
|
|
#include <string>
|
||
|
|
#include <map>
|
||
|
|
#include "ANSMOT.h"
|
||
|
|
#include "onnxruntime_cxx_api.h"
|
||
|
|
|
||
|
|
#define USEONNXOV
|
||
|
|
#define RETAINDETECTEDFRAMES 80
|
||
|
|
#define DEBUG_PRINT(x) std::cout << x << std::endl;
|
||
|
|
//#define DEBUGENGINE
|
||
|
|
//#define USE_TV_MODEL // Use model to detect if person is on TV screen or not
|
||
|
|
const int MAX_HISTORY_FACE = 5;
|
||
|
|
const int MAX_MISSING_FACE = 30;
|
||
|
|
const int MAX_TRACKS = 200;
|
||
|
|
const float PAD_TRACK_RATIO = 0.5f;
|
||
|
|
const float PAD_DETECT_RATIO = 1.0f;
|
||
|
|
const int MAX_MISSING_SCREEN = 1000;
|
||
|
|
|
||
|
|
namespace ANSCENTER
|
||
|
|
{
|
||
|
|
template <typename T>
|
||
|
|
typename std::enable_if<std::is_arithmetic<T>::value, T>::type
|
||
|
|
inline clamp(const T& value, const T& low, const T& high)
|
||
|
|
{
|
||
|
|
T validLow = low < high ? low : high;
|
||
|
|
T validHigh = low < high ? high : low;
|
||
|
|
|
||
|
|
if (value < validLow) return validLow;
|
||
|
|
if (value > validHigh) return validHigh;
|
||
|
|
return value;
|
||
|
|
}
|
||
|
|
struct Point {
|
||
|
|
int x, y;
|
||
|
|
};
|
||
|
|
struct ROIConfig {
|
||
|
|
bool Rectangle;
|
||
|
|
bool Polygon;
|
||
|
|
bool Line;
|
||
|
|
int MinItems;
|
||
|
|
int MaxItems;
|
||
|
|
std::string Name;
|
||
|
|
std::string ROIMatch;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct Parameter {
|
||
|
|
std::string Name;
|
||
|
|
std::string DataType;
|
||
|
|
int NoOfDecimals;
|
||
|
|
int MaxValue;
|
||
|
|
int MinValue;
|
||
|
|
std::string StartValue;
|
||
|
|
std::vector<std::string> ListItems;
|
||
|
|
std::string DefaultValue;
|
||
|
|
std::string Value;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct ROIValue {
|
||
|
|
std::string ROIMatch;
|
||
|
|
std::vector<Point> ROIPoints;
|
||
|
|
std::string Option;
|
||
|
|
std::string Name;
|
||
|
|
int OriginalImageSize;
|
||
|
|
};
|
||
|
|
struct Params {
|
||
|
|
std::vector<ROIConfig> ROI_Config;
|
||
|
|
std::vector<std::string> ROI_Options;
|
||
|
|
std::vector<Parameter> Parameters;
|
||
|
|
std::vector<ROIValue> ROI_Values;
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
/* Example
|
||
|
|
{
|
||
|
|
"ROI_Config":[
|
||
|
|
{
|
||
|
|
"Rectangle":true,
|
||
|
|
"Polygon":true,
|
||
|
|
"Line":false,
|
||
|
|
"MinItems":0,
|
||
|
|
"MaxItems":3,
|
||
|
|
"Name":"Traffic Light",
|
||
|
|
"ROI-Match":"All Corners"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"Rectangle":true,
|
||
|
|
"Polygon":false,
|
||
|
|
"Line":false,
|
||
|
|
"MinItems":1,
|
||
|
|
"MaxItems":1,
|
||
|
|
"Name":"Car Zone",
|
||
|
|
"ROI-Match":"All Corners"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"Rectangle":false,
|
||
|
|
"Polygon":false,
|
||
|
|
"Line":true,
|
||
|
|
"MinItems":1,
|
||
|
|
"MaxItems":2,
|
||
|
|
"Name":"Cross Line",
|
||
|
|
"ROI-Match":"All Corners"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"ROI_Options":[
|
||
|
|
"Inside ROI",
|
||
|
|
"Inside ROI",
|
||
|
|
"Both Directions"
|
||
|
|
],
|
||
|
|
"Parameters":[
|
||
|
|
{
|
||
|
|
"Name":"Para1",
|
||
|
|
"DataType":"Boolean",
|
||
|
|
"NoOfdecimals":0,
|
||
|
|
"MaxValue":0,
|
||
|
|
"MinValue":0,
|
||
|
|
"StartValue":"",
|
||
|
|
"ListItems":[],
|
||
|
|
"DefaultValue":"",
|
||
|
|
"Value":"true"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"Name":"Para2",
|
||
|
|
"DataType":"Integer",
|
||
|
|
"NoOfdecimals":0,
|
||
|
|
"MaxValue":5,
|
||
|
|
"MinValue":1,
|
||
|
|
"StartValue":"2",
|
||
|
|
"ListItems":[],
|
||
|
|
"DefaultValue":"",
|
||
|
|
"Value":"3"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"Name":"Para3",
|
||
|
|
"DataType":"List-Single",
|
||
|
|
"NoOfdecimals":0,
|
||
|
|
"MaxValue":0,
|
||
|
|
"MinValue":0,
|
||
|
|
"StartValue":"",
|
||
|
|
"ListItems":["A","B","C"],
|
||
|
|
"DefaultValue":"",
|
||
|
|
"Value":"A"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"Name":"Para4",
|
||
|
|
"DataType":"Range",
|
||
|
|
"NoOfdecimals":0,
|
||
|
|
"MaxValue":100,
|
||
|
|
"MinValue":50,
|
||
|
|
"StartValue":">,60",
|
||
|
|
"ListItems":[">","<"],
|
||
|
|
"DefaultValue":"",
|
||
|
|
"Value":">,52.000000"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"ROI_Values":[
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[
|
||
|
|
{"x":269,"y":134},
|
||
|
|
{"x":777,"y":134},
|
||
|
|
{"x":777,"y":457},
|
||
|
|
{"x":269,"y":457}
|
||
|
|
],
|
||
|
|
"Option":"Inside ROI",
|
||
|
|
"Name":"Car Zone 1",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[{"x":280,"y":613},{"x":1108,"y":280}],
|
||
|
|
"Option":"Above",
|
||
|
|
"Name":"Cross Line 1",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[{"x":1511,"y":383},{"x":1283,"y":754}],
|
||
|
|
"Option":"Left side",
|
||
|
|
"Name":"Cross Line 2",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[
|
||
|
|
{"x":229,"y":161},
|
||
|
|
{"x":964,"y":161},
|
||
|
|
{"x":964,"y":628},
|
||
|
|
{"x":229,"y":628}
|
||
|
|
],
|
||
|
|
"Option":"Left side",
|
||
|
|
"Name":"Traffic Light 1",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[
|
||
|
|
{"x":1115,"y":304},
|
||
|
|
{"x":1730,"y":304},
|
||
|
|
{"x":1730,"y":695},
|
||
|
|
{"x":1115,"y":695}
|
||
|
|
],
|
||
|
|
"Option":"Left side",
|
||
|
|
"Name":"Traffic Light 2",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"ROI-Match":"Centre Point",
|
||
|
|
"ROIPoints":[
|
||
|
|
{"x":678,"y":683},
|
||
|
|
{"x":1217,"y":683},
|
||
|
|
{"x":1217,"y":1026},
|
||
|
|
{"x":678,"y":1026}
|
||
|
|
],
|
||
|
|
"Option":"Left side",
|
||
|
|
"Name":"Traffic Light 3",
|
||
|
|
"OriginalImageSize":1920
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
*/
|
||
|
|
struct MetaData
|
||
|
|
{
|
||
|
|
float imageThreshold;
|
||
|
|
float pixelThreshold;
|
||
|
|
float min;
|
||
|
|
float max;
|
||
|
|
int inferSize[2]; // h w
|
||
|
|
int imageSize[2]; // h w
|
||
|
|
std::vector<float> _mean;
|
||
|
|
std::vector<float> _std;
|
||
|
|
};
|
||
|
|
struct Resize
|
||
|
|
{
|
||
|
|
cv::Mat resizedImage;
|
||
|
|
int dw;
|
||
|
|
int dh;
|
||
|
|
};
|
||
|
|
struct Object
|
||
|
|
{
|
||
|
|
int classId{ 0 };
|
||
|
|
int trackId{ 0 };
|
||
|
|
std::string className{};
|
||
|
|
float confidence{ 0.0 };
|
||
|
|
cv::Rect box{};
|
||
|
|
std::vector<cv::Point2f> polygon; // polygon that contain x1,y1,x2,y2,x3,y3,x4,y4 (for both segmentation and pose estimation)
|
||
|
|
cv::Mat mask{}; // image in box (cropped)
|
||
|
|
cv::cuda::GpuMat gpuMask{}; // GPU-resident face crop (set by NV12 affine warp, avoids re-upload)
|
||
|
|
std::vector<float> kps{}; // Pose exsimate keypoints, containing x1,y1,x2,y2,... or oriented box x,y,width,height,angle
|
||
|
|
std::string extraInfo; // More information such as facial recognition
|
||
|
|
std::string cameraId; // Use to check if this object belongs to any camera
|
||
|
|
// std::string attributes;
|
||
|
|
};
|
||
|
|
struct FaceResultObject {
|
||
|
|
int trackId{ 0 };
|
||
|
|
std::string userId;
|
||
|
|
std::string userName;
|
||
|
|
float similarity;
|
||
|
|
bool isUnknown;
|
||
|
|
bool isMasked; // If the face is masked
|
||
|
|
cv::Rect box{}; // Face bounding box
|
||
|
|
cv::Mat mask;
|
||
|
|
std::vector<cv::Point2f> polygon; // polygon that contain x1 ,y1,x2,y2,x3,y3,x4,y4
|
||
|
|
std::vector<float> kps{}; // Containing landmarks
|
||
|
|
float confidence{ 0.0 };
|
||
|
|
std::string extraInformation;
|
||
|
|
std::string cameraId;
|
||
|
|
// std::string attributes; // Face attributes (in Json format)
|
||
|
|
};
|
||
|
|
struct BoundingBox {
|
||
|
|
int x;
|
||
|
|
int y;
|
||
|
|
int width;
|
||
|
|
int height;
|
||
|
|
BoundingBox() : x(0), y(0), width(0), height(0) {}
|
||
|
|
BoundingBox(int x_, int y_, int width_, int height_)
|
||
|
|
: x(x_), y(y_), width(width_), height(height_) {}
|
||
|
|
};
|
||
|
|
struct KeyPoint {
|
||
|
|
float x; ///< X-coordinate of the keypoint
|
||
|
|
float y; ///< Y-coordinate of the keypoint
|
||
|
|
float confidence; ///< Confidence score of the keypoint
|
||
|
|
KeyPoint(float x_ = 0, float y_ = 0, float conf_ = 0)
|
||
|
|
: x(x_), y(y_), confidence(conf_) {
|
||
|
|
}
|
||
|
|
};
|
||
|
|
// A group holds a set of objects and the union (bounding box) of all their boxes.
|
||
|
|
struct Group {
|
||
|
|
std::vector<Object> objects;
|
||
|
|
cv::Rect unionBox;
|
||
|
|
};
|
||
|
|
typedef std::pair<cv::Scalar, cv::Scalar> Range;
|
||
|
|
|
||
|
|
class ANSENGINE_API ANNHUBClassifier
|
||
|
|
{
|
||
|
|
private:
|
||
|
|
std::vector<double> nInput; //ANN inputs
|
||
|
|
std::vector<double> nOutput; //ANN outputs
|
||
|
|
std::vector<std::vector<double>> IW;
|
||
|
|
std::vector<std::vector<double>> LW;
|
||
|
|
std::vector<double> Ib;
|
||
|
|
std::vector<double> Lb;
|
||
|
|
// Structural parameters
|
||
|
|
int nInputNodes, nHiddenNodes, nOutputNodes;
|
||
|
|
int hiddenActivation; // default =2
|
||
|
|
int outputActivation; // default =2
|
||
|
|
int dataNormalisationModeInput; // default =1;
|
||
|
|
int dataNormalisationModeOutput; // default =1;
|
||
|
|
|
||
|
|
// Preprocessing and postprocessing settings
|
||
|
|
std::vector<double> xmaxInput, xminInput; // Maximum and minimum of inputs
|
||
|
|
double ymaxInput, yminInput; // Maximum and minimum of inputs
|
||
|
|
std::vector<double> xmaxOutput, xminOutput; // Maximum and minimum of outputs
|
||
|
|
double ymaxOutput, yminOutput; // Maximum and minimum of outputs
|
||
|
|
|
||
|
|
// Control creation
|
||
|
|
unsigned char isCreated;
|
||
|
|
std::string _licenseKey;
|
||
|
|
bool _licenseValid{ false };
|
||
|
|
bool _isInitialized{ false };
|
||
|
|
std::string _modelFilePath;
|
||
|
|
|
||
|
|
private:
|
||
|
|
|
||
|
|
void PreProcessing(std::vector<double>& Input); // mode =0--> linear, mode =1 mapminmax, mode =2 standarddev
|
||
|
|
void PostProcessing(std::vector<double>& Output); // mode =0--> linear, mode =1 mapminmax, mode =2 standarddev
|
||
|
|
void Create(int inputNodes, int HiddenNodes, int outputNodes);
|
||
|
|
void FreeNeuralNetwork();
|
||
|
|
void CheckLicense();
|
||
|
|
int ImportANNFromFile(std::string filename);
|
||
|
|
|
||
|
|
public:
|
||
|
|
ANNHUBClassifier();
|
||
|
|
~ANNHUBClassifier();
|
||
|
|
bool Init(std::string licenseKey, std::string modelFilePath);
|
||
|
|
std::vector<double> Inference(std::vector<double> ip);
|
||
|
|
void Destroy();
|
||
|
|
int GetOutputNode() { return nOutputNodes; };
|
||
|
|
private:
|
||
|
|
void ReLu(std::vector<double>& iVal, std::vector<double>& oVal);
|
||
|
|
void LogSig(std::vector<double>& iVal, std::vector<double>& oVal);
|
||
|
|
void TanSig(std::vector<double>& iVal, std::vector<double>& oVal);
|
||
|
|
void PureLin(std::vector<double>& iVal, std::vector<double>& oVal);
|
||
|
|
void SoftMax(std::vector<double>& iVal, std::vector<double>& oVal);
|
||
|
|
void ActivationFunction(std::vector<double>& iVal, std::vector<double>& oVal, int mode);
|
||
|
|
};
|
||
|
|
class ANSENGINE_API MoveDetectsHandler
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
// Constructor and Destructor
|
||
|
|
MoveDetectsHandler();
|
||
|
|
~MoveDetectsHandler();
|
||
|
|
|
||
|
|
// Main detection methods
|
||
|
|
std::vector<Object> MovementDetect(const std::string& camera_id, cv::Mat& next_image);
|
||
|
|
std::vector<Object> MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image);
|
||
|
|
|
||
|
|
// Camera management
|
||
|
|
bool hasCameraData(const std::string& camera_id) const;
|
||
|
|
void removeCamera(const std::string& camera_id);
|
||
|
|
std::vector<std::string> getCameraIds() const;
|
||
|
|
|
||
|
|
// Configuration methods
|
||
|
|
void setThreshold(const std::string& camera_id, double threshold);
|
||
|
|
void setKeyFrameFrequency(const std::string& camera_id, size_t frequency);
|
||
|
|
void setNumberOfControlFrames(const std::string& camera_id, size_t count);
|
||
|
|
void setThumbnailRatio(const std::string& camera_id, double ratio);
|
||
|
|
void setMaskEnabled(const std::string& camera_id, bool enabled);
|
||
|
|
void setContoursEnabled(const std::string& camera_id, bool enabled);
|
||
|
|
void setBboxEnabled(const std::string& camera_id, bool enabled);
|
||
|
|
void setContourThickness(const std::string& camera_id, int thickness);
|
||
|
|
void setBboxThickness(const std::string& camera_id, int thickness);
|
||
|
|
void setMinObjectArea(const std::string& camera_id, double area);
|
||
|
|
void setMinObjectSize(const std::string& camera_id, int size);
|
||
|
|
void setMorphologyIterations(const std::string& camera_id, int iterations);
|
||
|
|
|
||
|
|
// Temporal consistency settings
|
||
|
|
void setTemporalConsistency(const std::string& camera_id, bool enabled);
|
||
|
|
void setMaskOverlapThreshold(const std::string& camera_id, double threshold);
|
||
|
|
void setTemporalHistorySize(const std::string& camera_id, size_t size);
|
||
|
|
void setMinConsistentFrames(const std::string& camera_id, size_t frames);
|
||
|
|
void setLocationStabilityEnabled(const std::string& camera_id, bool enabled);
|
||
|
|
void setMaxLocationJitter(const std::string& camera_id, double pixels);
|
||
|
|
|
||
|
|
// Getters for configuration
|
||
|
|
double getThreshold(const std::string& camera_id) const;
|
||
|
|
size_t getKeyFrameFrequency(const std::string& camera_id) const;
|
||
|
|
size_t getNumberOfControlFrames(const std::string& camera_id) const;
|
||
|
|
double getThumbnailRatio(const std::string& camera_id) const;
|
||
|
|
bool isMaskEnabled(const std::string& camera_id) const;
|
||
|
|
bool isContoursEnabled(const std::string& camera_id) const;
|
||
|
|
bool isBboxEnabled(const std::string& camera_id) const;
|
||
|
|
bool isTemporalConsistencyEnabled(const std::string& camera_id) const;
|
||
|
|
|
||
|
|
// State query methods
|
||
|
|
bool isMovementDetected(const std::string& camera_id) const;
|
||
|
|
bool wasTransitionDetected(const std::string& camera_id) const;
|
||
|
|
double getPSNRScore(const std::string& camera_id) const;
|
||
|
|
size_t getFrameIndexWithMovement(const std::string& camera_id) const;
|
||
|
|
std::chrono::milliseconds getTimeSinceLastMovement(const std::string& camera_id) const;
|
||
|
|
size_t getControlFrameCount(const std::string& camera_id) const;
|
||
|
|
size_t getNextFrameIndex(const std::string& camera_id) const;
|
||
|
|
double getTemporalConsistencyScore(const std::string& camera_id) const;
|
||
|
|
|
||
|
|
// Public utility methods
|
||
|
|
bool empty(const std::string& camera_id) const;
|
||
|
|
void clear(const std::string& camera_id);
|
||
|
|
void clearAll();
|
||
|
|
cv::Mat getOutput(const std::string& camera_id) const;
|
||
|
|
cv::Mat getMask(const std::string& camera_id) const;
|
||
|
|
std::vector<std::vector<cv::Point>> getContours(const std::string& camera_id) const;
|
||
|
|
|
||
|
|
// Statistics
|
||
|
|
struct CameraStats {
|
||
|
|
size_t total_frames_processed = 0;
|
||
|
|
size_t frames_with_movement = 0;
|
||
|
|
size_t frames_rejected_by_temporal_check = 0;
|
||
|
|
size_t control_frames_count = 0;
|
||
|
|
double average_psnr = 0.0;
|
||
|
|
double min_psnr = std::numeric_limits<double>::max();
|
||
|
|
double max_psnr = 0.0;
|
||
|
|
double average_temporal_consistency = 0.0;
|
||
|
|
std::chrono::milliseconds total_processing_time{ 0 };
|
||
|
|
std::chrono::high_resolution_clock::time_point last_movement_time;
|
||
|
|
|
||
|
|
// Reset stats
|
||
|
|
void reset()
|
||
|
|
{
|
||
|
|
total_frames_processed = 0;
|
||
|
|
frames_with_movement = 0;
|
||
|
|
frames_rejected_by_temporal_check = 0;
|
||
|
|
control_frames_count = 0;
|
||
|
|
average_psnr = 0.0;
|
||
|
|
min_psnr = std::numeric_limits<double>::max();
|
||
|
|
max_psnr = 0.0;
|
||
|
|
average_temporal_consistency = 0.0;
|
||
|
|
total_processing_time = std::chrono::milliseconds{ 0 };
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
CameraStats getStats(const std::string& camera_id) const;
|
||
|
|
void resetStats(const std::string& camera_id);
|
||
|
|
|
||
|
|
private:
|
||
|
|
struct CameraData
|
||
|
|
{
|
||
|
|
// Detection state
|
||
|
|
bool movement_detected = false;
|
||
|
|
bool transition_detected = false;
|
||
|
|
size_t next_frame_index = 0;
|
||
|
|
size_t next_key_frame = 0;
|
||
|
|
double most_recent_psnr_score = 0.0;
|
||
|
|
size_t frame_index_with_movement = 0;
|
||
|
|
double max_change_percentage = 20.0; // Max % of frame that can change
|
||
|
|
double min_change_percentage = 1.0; // Min % of frame that must change
|
||
|
|
|
||
|
|
std::chrono::high_resolution_clock::time_point movement_last_detected;
|
||
|
|
|
||
|
|
// Control frames storage
|
||
|
|
std::map<size_t, cv::Mat> control_frames;
|
||
|
|
|
||
|
|
// Output data
|
||
|
|
cv::Mat output;
|
||
|
|
cv::Mat mask;
|
||
|
|
std::vector<std::vector<cv::Point>> contours;
|
||
|
|
|
||
|
|
// Configuration parameters
|
||
|
|
size_t key_frame_frequency = 20;
|
||
|
|
size_t number_of_control_frames = 10;
|
||
|
|
double psnr_threshold = 45.0;
|
||
|
|
double thumbnail_ratio = 0.05;
|
||
|
|
cv::Size thumbnail_size = cv::Size(0, 0);
|
||
|
|
|
||
|
|
// Visual options
|
||
|
|
bool mask_enabled = true;
|
||
|
|
bool contours_enabled = true;
|
||
|
|
bool bbox_enabled = true;
|
||
|
|
cv::LineTypes line_type = cv::LINE_4;
|
||
|
|
int contours_size = 1;
|
||
|
|
int bbox_size = 1;
|
||
|
|
|
||
|
|
// Filtering parameters
|
||
|
|
double min_object_area = 1000.0;
|
||
|
|
int min_object_dimension = 5;
|
||
|
|
int min_object_total_size = 25;
|
||
|
|
|
||
|
|
// Morphology parameters
|
||
|
|
int morphology_iterations = 10;
|
||
|
|
|
||
|
|
// Temporal consistency parameters
|
||
|
|
bool temporal_consistency_enabled = true;
|
||
|
|
double mask_overlap_threshold = 0.05; // 5% overlap with previous required
|
||
|
|
size_t temporal_history_size = 5; // Keep last N masks
|
||
|
|
size_t min_consistent_frames = 3; // Need N consecutive consistent frames
|
||
|
|
bool location_stability_enabled = true;
|
||
|
|
double max_location_jitter = 50.0; // Max pixel movement between frames
|
||
|
|
|
||
|
|
// Temporal consistency state
|
||
|
|
std::deque<cv::Mat> mask_history;
|
||
|
|
std::deque<cv::Point> centroid_history;
|
||
|
|
size_t consistent_frame_count = 0;
|
||
|
|
double last_temporal_consistency_score = 0.0;
|
||
|
|
|
||
|
|
// Statistics
|
||
|
|
CameraStats stats;
|
||
|
|
|
||
|
|
// Clear function to release memory
|
||
|
|
void clear()
|
||
|
|
{
|
||
|
|
for (auto& [index, frame] : control_frames)
|
||
|
|
{
|
||
|
|
frame.release();
|
||
|
|
}
|
||
|
|
control_frames.clear();
|
||
|
|
output.release();
|
||
|
|
mask.release();
|
||
|
|
contours.clear();
|
||
|
|
|
||
|
|
// Clear temporal history
|
||
|
|
for (auto& m : mask_history)
|
||
|
|
{
|
||
|
|
m.release();
|
||
|
|
}
|
||
|
|
mask_history.clear();
|
||
|
|
centroid_history.clear();
|
||
|
|
|
||
|
|
// Reset state
|
||
|
|
movement_detected = false;
|
||
|
|
transition_detected = false;
|
||
|
|
most_recent_psnr_score = 0.0;
|
||
|
|
frame_index_with_movement = 0;
|
||
|
|
thumbnail_size = cv::Size(0, 0);
|
||
|
|
consistent_frame_count = 0;
|
||
|
|
last_temporal_consistency_score = 0.0;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// Private member functions
|
||
|
|
double psnr(const cv::Mat& src, const cv::Mat& dst);
|
||
|
|
cv::Mat simple_colour_balance(const cv::Mat& src);
|
||
|
|
cv::Rect BoundingBoxFromContour(const std::vector<cv::Point>& contour);
|
||
|
|
|
||
|
|
// Multi-camera data storage
|
||
|
|
std::unordered_map<std::string, CameraData> cameras;
|
||
|
|
mutable std::recursive_mutex cameras_mutex;
|
||
|
|
|
||
|
|
// Helper functions
|
||
|
|
CameraData& getCameraData(const std::string& camera_id);
|
||
|
|
const CameraData* getCameraDataConst(const std::string& camera_id) const;
|
||
|
|
bool cameraExists(const std::string& camera_id) const;
|
||
|
|
|
||
|
|
// Processing helpers
|
||
|
|
cv::Mat computeMovementMask(const cv::Mat& control_frame, const cv::Mat& current_frame,
|
||
|
|
const cv::Size& output_size, int morphology_iterations);
|
||
|
|
std::vector<Object> extractObjectsFromMask(const cv::Mat& mask, const cv::Mat& image,
|
||
|
|
CameraData& camera, const std::string& camera_id);
|
||
|
|
void updateControlFrames(CameraData& camera, size_t frame_index, const cv::Mat& thumbnail);
|
||
|
|
void updateStatistics(CameraData& camera, double psnr, bool movement_detected,
|
||
|
|
std::chrono::milliseconds processing_time);
|
||
|
|
|
||
|
|
// Temporal consistency helpers
|
||
|
|
bool checkTemporalConsistency(CameraData& camera, const cv::Mat& current_mask);
|
||
|
|
double calculateMaskOverlap(const cv::Mat& mask1, const cv::Mat& mask2);
|
||
|
|
cv::Point calculateMaskCentroid(const cv::Mat& mask);
|
||
|
|
double calculateLocationStability(const std::deque<cv::Point>& centroids);
|
||
|
|
void updateTemporalHistory(CameraData& camera, const cv::Mat& mask);
|
||
|
|
|
||
|
|
std::vector<Object> MovementDetectInternal(const std::string& camera_id,
|
||
|
|
const size_t frame_index,
|
||
|
|
cv::Mat& image,
|
||
|
|
CameraData& camera);
|
||
|
|
};
|
||
|
|
class ANSENGINE_API ANSUtilityHelper {
|
||
|
|
public:
|
||
|
|
static std::vector<std::string> Split(const std::string& s, char delimiter);
|
||
|
|
static std::vector<cv::Point> StringToPolygon(const std::string& input);
|
||
|
|
static cv::Mat CropPolygon(const cv::Mat& image, const std::vector<cv::Point>& polygon);
|
||
|
|
static cv::Mat CropFromStringPolygon(const cv::Mat& image, const std::string& strPolygon);
|
||
|
|
static std::vector<cv::Rect> GetBoundingBoxesFromString(std::string strBBoxes);
|
||
|
|
static std::vector<ANSCENTER::Object> GetDetectionsFromString(const std::string& strDets);
|
||
|
|
static std::vector<float> StringToKeypoints(const std::string& str);
|
||
|
|
static std::vector<cv::Point2f> PolygonFromString(const std::string& str);
|
||
|
|
static ANSCENTER::Params ParseCustomParameters(const std::string& paramsJson);
|
||
|
|
static std::string SerializeCustomParamters(const ANSCENTER::Params& params);
|
||
|
|
|
||
|
|
static bool ParseActiveROIMode(const std::string activeROIMode,int & mode,double & detectionScore, std::vector<int> & trackingObjectIds);
|
||
|
|
static cv::Rect GetBoundingBoxFromPolygon(const std::vector<cv::Point>& polygon);
|
||
|
|
static std::string VectorDetectionToJsonString(const std::vector<Object>& dets);
|
||
|
|
static cv::Mat ReadImagePath(const std::string& imagePath);
|
||
|
|
static cv::Mat ReadImageStreamBase64(const std::string& imageStreamBase64);
|
||
|
|
static cv::Mat FormatToSquare(const cv::Mat& source);
|
||
|
|
static unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh);
|
||
|
|
static std::vector<std::string> GetConfigFileContent(std::string modelConfigFile, ModelType& modelType, std::vector<int>& inputShape);
|
||
|
|
static MetaData GetJson(const std::string& jsonPath);
|
||
|
|
static std::vector<unsigned char> DecodeBase64(const std::string& base64);
|
||
|
|
static cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width, const std::string& interpolation);
|
||
|
|
static cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right);
|
||
|
|
static cv::Mat Divide(const cv::Mat& src, float divide = 255.0);
|
||
|
|
static cv::Mat Normalize(cv::Mat& src, const std::vector<float>& mean, const std::vector<float>& std, bool to_rgb = false, bool inplace = true);
|
||
|
|
static cv::Mat Transpose(const cv::Mat& src);
|
||
|
|
static cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type, float val);
|
||
|
|
static cv::Mat JpegStringToMat(const std::string& jpegString);
|
||
|
|
static cv::Mat MeanAxis0(const cv::Mat& src);
|
||
|
|
static cv::Mat ElementwiseMinus(const cv::Mat& A, const cv::Mat& B);
|
||
|
|
static cv::Mat VarAxis0(const cv::Mat& src);
|
||
|
|
static int MatrixRank(cv::Mat M);
|
||
|
|
static cv::Mat SimilarTransform(cv::Mat& dst, cv::Mat& src);
|
||
|
|
static std::vector<cv::Mat> AlignFaceWithFivePoints(const cv::Mat& image,
|
||
|
|
const std::vector<std::array<int, 4>> boxes,
|
||
|
|
std::vector<std::array<float, 2>> landmarks);
|
||
|
|
static std::vector<cv::Mat>GetCroppedFaces(const cv::Mat& image, const std::vector<std::array<int, 4>> boxes);
|
||
|
|
static cv::Mat GetCroppedFace(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2);
|
||
|
|
static cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize);
|
||
|
|
|
||
|
|
// For openVINO face alignment
|
||
|
|
static cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst);
|
||
|
|
static void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec);
|
||
|
|
static void AlignFacesExt(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec);
|
||
|
|
static std::pair<cv::Mat, cv::Mat> AlignFacesSCRFD(const cv::Mat& input_mat, const std::vector<cv::Point2f>& face_landmark_5);
|
||
|
|
|
||
|
|
// Model optimsation (for TensorRT)
|
||
|
|
static bool ModelOptimizer(std::string modelZipFilePath, std::string modelFileZipPassword, int fp16, std::string& optimisedModelFolder, int inputImageHeight=640, int inputImageWidth=640);
|
||
|
|
|
||
|
|
// For tiled inference
|
||
|
|
static std::vector<Object> ApplyNMS(const std::vector<Object>& detections, float nmsThreshold = 0.4);
|
||
|
|
static void AdjustBoudingBox(Object& obj, int offsetX, int offsetY);
|
||
|
|
static std::vector<cv::Rect> GetPatches(cv::Mat& image, int tileWidth, int tileHeight, double overlap);
|
||
|
|
static std::vector<cv::Mat> ExtractPatches(cv::Mat& image, std::vector<cv::Rect>& patchRegions);
|
||
|
|
static cv::Mat ResizePatch(cv::Mat& patch, int modelWidth, int modelHeight);
|
||
|
|
static std::map<int, std::vector<int>> Greedy_NMM(const std::vector<Object>& object_predictions, const std::string& match_metric = "IOU", float match_threshold = 0.5);
|
||
|
|
static float calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2);
|
||
|
|
static float calculate_bbox_iou(const Object& pred1, const Object& pred2);
|
||
|
|
static float calculate_bbox_ios(const Object& pred1, const Object& pred2);
|
||
|
|
static bool has_match(const Object& pred1, const Object& pred2, const std::string& match_type = "IOU", float match_threshold = 0.5);
|
||
|
|
static float get_merged_score(const Object& pred1, const Object& pred2);
|
||
|
|
static cv::Rect calculate_box_union(const cv::Rect& box1, const cv::Rect& box2);
|
||
|
|
static cv::Rect get_merged_bbox(const Object& pred1, const Object& pred2);
|
||
|
|
static int get_merged_class_id(const Object& pred1, const Object& pred2) {
|
||
|
|
return (pred1.confidence > pred2.confidence) ? pred1.classId : pred2.classId;
|
||
|
|
}
|
||
|
|
static std::string get_merged_category(const Object& pred1, const Object& pred2) {
|
||
|
|
return (pred1.confidence > pred2.confidence) ? pred1.className : pred2.className;
|
||
|
|
}
|
||
|
|
static Object merge_object_pair(const Object& obj1, const Object& obj2);
|
||
|
|
static std::vector<Object> select_object_predictions(const std::vector<Object>& object_prediction_list,const std::map<int, std::vector<int>>& keep_to_merge_list,const std::string& match_metric,float match_threshold);
|
||
|
|
static cv::Rect BoundingBoxFromContour(std::vector<cv::Point> contour);
|
||
|
|
static std::string PolygonToString(const std::vector<cv::Point2f>& polygon);
|
||
|
|
static std::string KeypointsToString(const std::vector<float>& kps);
|
||
|
|
static std::vector<cv::Point2f> RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight);
|
||
|
|
static std::vector<cv::Point2f> MaskToNormalizedPolygon(const cv::Mat& binaryMask, const cv::Rect& boundingBox, float imageWidth, float imageHeight, float simplificationEpsilon = 2.0f, int minContourArea = 10, int maxPoints = 50);
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
class ANSENGINE_API ANSFRBase {
|
||
|
|
protected:
|
||
|
|
bool _licenseValid{ false };
|
||
|
|
bool _isInitialized{ false };
|
||
|
|
std::string _licenseKey;
|
||
|
|
std::string _modelFolder;
|
||
|
|
std::string _modelConfigFile;
|
||
|
|
SPDLogger& _logger = SPDLogger::GetInstance("ANSFR", false);
|
||
|
|
void CheckLicense();
|
||
|
|
|
||
|
|
public:
|
||
|
|
virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
|
||
|
|
virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
|
||
|
|
virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
|
||
|
|
virtual std::vector<FaceResultObject> Match(const cv::Mat& input, const std::vector<ANSCENTER::Object>& bBox, const std::map<std::string, std::string>& userDict) = 0; //
|
||
|
|
virtual std::vector<float> Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) = 0; // Run inference and get embedding information from a cropped image (the first bbox)
|
||
|
|
virtual cv::Mat GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) = 0;
|
||
|
|
std::string GetModelFolder() { return _modelFolder; };
|
||
|
|
virtual bool UpdateParamater(double knownPersonThreshold)=0;
|
||
|
|
// For face
|
||
|
|
virtual void Init()=0;
|
||
|
|
virtual void AddEmbedding(const std::string& className, float embedding[])=0;
|
||
|
|
virtual void AddEmbedding(const std::string& className, const std::vector<float>& embedding) =0;
|
||
|
|
virtual bool Destroy()=0;
|
||
|
|
virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses
|
||
|
|
~ANSFRBase();
|
||
|
|
|
||
|
|
// Utility functions
|
||
|
|
std::vector<float> L2Normalize(const std::vector<float>& values);
|
||
|
|
float CosineSimilarity(const std::vector<float>& a, const std::vector<float>& b, bool normalized);
|
||
|
|
};
|
||
|
|
class ANSENGINE_API ANSODBase {
|
||
|
|
protected:
|
||
|
|
bool _licenseValid{ false };
|
||
|
|
bool _isInitialized{ false };
|
||
|
|
Params _params;
|
||
|
|
std::string _licenseKey;
|
||
|
|
std::string _modelFolder;
|
||
|
|
std::string _modelConfigFile;
|
||
|
|
ModelConfig _modelConfig;
|
||
|
|
SPDLogger& _logger = SPDLogger::GetInstance("ANSOD", false);
|
||
|
|
// Debug benchmarking flag — when enabled, logs per-stage timing
|
||
|
|
// for the full inference pipeline (preprocess, inference, postprocess, tracking, etc.)
|
||
|
|
bool _debugFlag{ false };
|
||
|
|
std::string _classFilePath;
|
||
|
|
std::vector <std::string> _classes;
|
||
|
|
bool _loadEngineOnCreation{ true };
|
||
|
|
bool _skipEngineCache{ false };
|
||
|
|
bool _forceNoPool{ false };
|
||
|
|
|
||
|
|
void CheckLicense();
|
||
|
|
void LoadClassesFromString();
|
||
|
|
void LoadClassesFromFile();
|
||
|
|
std::recursive_mutex _mutex;
|
||
|
|
MoveDetectsHandler _handler;
|
||
|
|
size_t QUEUE_SIZE = 20;
|
||
|
|
|
||
|
|
// Multi-object tracker (MOT) — per-camera instances lazy-created in ApplyTracking
|
||
|
|
bool _trackerEnabled = false;
|
||
|
|
TrackerType _trackerType = TrackerType::BYTETRACK;
|
||
|
|
int _trackerMotType = 1; // ANSMOT motType int
|
||
|
|
std::string _trackerParams; // JSON params applied to each per-camera tracker
|
||
|
|
|
||
|
|
// Detection stabilization config
|
||
|
|
bool _stabilizationEnabled = false;
|
||
|
|
size_t _stabilizationQueueSize = 20; // history depth (frames)
|
||
|
|
int _stabilizationMaxConsecutiveMisses = 5; // stop interpolating after N misses
|
||
|
|
float _stabilizationConfidenceDecay = 0.85f; // per-miss confidence multiplier
|
||
|
|
float _stabilizationMinConfidence = 0.15f; // floor below which ghosts are dropped
|
||
|
|
|
||
|
|
// #1 Confidence hysteresis: two-threshold system
|
||
|
|
float _hysteresisEnterThreshold = 0.0f; // 0 = auto (use detectionScoreThreshold)
|
||
|
|
float _hysteresisKeepThreshold = 0.0f; // 0 = auto (enterThreshold * 0.65)
|
||
|
|
|
||
|
|
// #2 Temporal confidence smoothing (EMA)
|
||
|
|
float _emaAlpha = 0.3f; // EMA weight for new observation (0..1)
|
||
|
|
|
||
|
|
// #5 Track-aware confidence boost for established tracks
|
||
|
|
int _trackBoostMinFrames = 10; // frames before boost kicks in
|
||
|
|
float _trackBoostAmount = 0.05f; // confidence bonus for established tracks
|
||
|
|
|
||
|
|
// #7 Class consistency — prevent sudden class switches on established tracks
|
||
|
|
int _classConsistencyMinFrames = 5; // consecutive frames of new class required to accept switch
|
||
|
|
// Adaptive screens
|
||
|
|
struct ImageSection {
|
||
|
|
cv::Rect region;
|
||
|
|
int priority;
|
||
|
|
ImageSection(const cv::Rect& r) : region(r), priority(0) {}
|
||
|
|
};
|
||
|
|
cv::Size previousImageSize = cv::Size(0, 0);
|
||
|
|
|
||
|
|
// For active windows
|
||
|
|
std::vector<ImageSection> cachedSections;
|
||
|
|
int _currentPriority{ 0 }; // None
|
||
|
|
cv::Rect _detectedArea;// This is active windows.
|
||
|
|
|
||
|
|
int _retainDetectedArea{ 0 };
|
||
|
|
bool _isObjectDetected{ false };
|
||
|
|
struct CameraData
|
||
|
|
{
|
||
|
|
std::deque<std::vector<Object>> _detectionQueue; // That stores the detection results
|
||
|
|
|
||
|
|
// Per-camera tracker instance (lazy-created in ApplyTracking)
|
||
|
|
ANSCENTER::ANSMOT* _tracker = nullptr;
|
||
|
|
|
||
|
|
// ── Stabilization state ──────────────────────────────
|
||
|
|
struct TrackedObjectHistory {
|
||
|
|
int classId = 0;
|
||
|
|
std::string className;
|
||
|
|
std::string extraInfo; // preserve last-known extraInfo for ghost objects
|
||
|
|
cv::Rect lastBox;
|
||
|
|
float lastConfidence = 0.f;
|
||
|
|
float smoothedConfidence = 0.f; // EMA-smoothed confidence
|
||
|
|
int consecutiveMisses = 0; // frames since last raw detection
|
||
|
|
int totalDetections = 0; // lifetime detection count
|
||
|
|
int frameFirstSeen = 0; // frame counter when first detected
|
||
|
|
bool isEstablished = false; // true once totalDetections >= trackBoostMinFrames
|
||
|
|
|
||
|
|
// #7 Class consistency — resist sudden class switches
|
||
|
|
int pendingClassId = -1; // candidate new class (-1 = none)
|
||
|
|
std::string pendingClassName;
|
||
|
|
int pendingClassStreak = 0; // consecutive frames with pendingClass
|
||
|
|
};
|
||
|
|
std::unordered_map<int, TrackedObjectHistory> _trackHistories; // trackId -> history
|
||
|
|
int _stabilizationFrameCounter = 0;
|
||
|
|
|
||
|
|
void clear()
|
||
|
|
{
|
||
|
|
if (_tracker) {
|
||
|
|
ReleaseANSMOTHandle(&_tracker);
|
||
|
|
_tracker = nullptr;
|
||
|
|
}
|
||
|
|
for (auto& detectionVector : _detectionQueue)
|
||
|
|
{
|
||
|
|
detectionVector.clear(); // Clear each vector of Objects
|
||
|
|
}
|
||
|
|
_detectionQueue.clear(); // Clear the deque itself
|
||
|
|
_trackHistories.clear();
|
||
|
|
_stabilizationFrameCounter = 0;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
// Multi-camera data storage
|
||
|
|
std::unordered_map<std::string, CameraData> _cameras;
|
||
|
|
CameraData _defaultCamera; // Default camera data if camera_id is not provided
|
||
|
|
|
||
|
|
// Tracker: convert detections → TrackerObject, run tracker, assign trackIds via IoU match
|
||
|
|
std::vector<Object> ApplyTracking(std::vector<Object>& detections, const std::string& camera_id);
|
||
|
|
|
||
|
|
// Stabilize detections: fill gaps with tracker-predicted objects, apply confidence decay
|
||
|
|
std::vector<Object> StabilizeDetections(std::vector<Object>& detections, const std::string& camera_id);
|
||
|
|
|
||
|
|
// extraInfo stabilization tag helpers
|
||
|
|
static void TagStabilized(std::string& extraInfo);
|
||
|
|
static void UntagStabilized(std::string& extraInfo);
|
||
|
|
static bool IsTaggedStabilized(const std::string& extraInfo);
|
||
|
|
|
||
|
|
// Other functions can be used;
|
||
|
|
bool isSimilarObject(const Object& obj1, const Object& obj2);
|
||
|
|
bool isOverlayObject(const Object& obj1, const Object& obj2);
|
||
|
|
// Helper: returns the Euclidean distance between two points.
|
||
|
|
float distance(const cv::Point2f& a, const cv::Point2f& b) {
|
||
|
|
float dx = a.x - b.x;
|
||
|
|
float dy = a.y - b.y;
|
||
|
|
return std::sqrt(dx * dx + dy * dy);
|
||
|
|
}
|
||
|
|
// Compute the union (bounding box) of two rectangles.
|
||
|
|
cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) {
|
||
|
|
int x = std::min(a.x, b.x);
|
||
|
|
int y = std::min(a.y, b.y);
|
||
|
|
int x2 = std::max(a.x + a.width, b.x + b.width);
|
||
|
|
int y2 = std::max(a.y + a.height, b.y + b.height);
|
||
|
|
return cv::Rect(x, y, x2 - x, y2 - y);
|
||
|
|
}
|
||
|
|
cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
|
||
|
|
// Check if two rectangles overlap (i.e. intersection area > 0).
|
||
|
|
bool isOverlap(const cv::Rect& a, const cv::Rect& b) {
|
||
|
|
return ((a & b).area() > 0);
|
||
|
|
}
|
||
|
|
std::string GetOpenVINODevice(ov::Core &core);
|
||
|
|
|
||
|
|
// Function to seperate screen size
|
||
|
|
double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect);
|
||
|
|
std::vector<ImageSection> divideImage(const cv::Mat& image);
|
||
|
|
std::vector<ImageSection> createSlideScreens(const cv::Mat& image);
|
||
|
|
int getHighestPriorityRegion();
|
||
|
|
int getLowestPriorityRegion();
|
||
|
|
cv::Rect getRegionByPriority(int priority);
|
||
|
|
std::vector<Object> AdjustDetectedBoundingBoxes(const std::vector<Object>& detectionsInROI,
|
||
|
|
const cv::Rect& roi, const cv::Size& fullImageSize,
|
||
|
|
float aspectRatio = 0.9f, // width at least 2x height
|
||
|
|
int padding = 10 // base padding
|
||
|
|
);
|
||
|
|
void UpdateNoDetectionCondition();
|
||
|
|
void UpdateActiveROI(const cv::Mat& frame, ANSCENTER::Object detectedObj);
|
||
|
|
bool IsValidObject(const Object& obj, std::vector<int> objectIds);
|
||
|
|
public:
|
||
|
|
[[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
|
||
|
|
[[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
|
||
|
|
[[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap);
|
||
|
|
[[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
|
||
|
|
[[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input) = 0;
|
||
|
|
[[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id) = 0;
|
||
|
|
[[nodiscard]] virtual std::vector<std::vector<Object>> RunInferencesBatch(const std::vector<cv::Mat>& inputs, const std::string& camera_id);
|
||
|
|
|
||
|
|
[[nodiscard]] std::vector<Object> RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, const std::string& camera_id);
|
||
|
|
[[nodiscard]] std::vector<Object> RunInference(const cv::Mat& input, std::vector<cv::Point> Polygon, const std::string& camera_id);
|
||
|
|
[[nodiscard]] std::vector<Object> RunInferences(const cv::Mat& input, int tiledWidth, int tiledHeight, double overLap, const std::string& camera_id); // split image to slides and run inference
|
||
|
|
[[nodiscard]] std::vector<Object> RunInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, const std::string& camera_id) ;
|
||
|
|
[[nodiscard]] std::vector<Object> RunTiledInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, int tiledWith, int tiledHeight, double overLap, const std::string& camera_id);
|
||
|
|
[[nodiscard]] std::vector<Object> DetectMovement(const cv::Mat& input, const std::string& camera_id);
|
||
|
|
[[nodiscard]] std::vector<cv::Rect> GenerateFixedROIs(const std::vector<Object>& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
|
||
|
|
[[nodiscard]] cv::Rect GenerateMinimumSquareBoundingBox(const std::vector<ANSCENTER::Object>& detectedObjects, int minSize = 640);
|
||
|
|
void UpdateAndFilterDetectionObjects(std::vector<Object>& detectionObjects, int threshold);
|
||
|
|
[[nodiscard]] bool ContainsIntersectingObject(const std::vector<Object>& movementObjects, const Object& result);
|
||
|
|
[[nodiscard]] cv::Rect GetActiveWindow(const cv::Mat& input);
|
||
|
|
[[nodiscard]] Params GetParameters() { return _params; } //
|
||
|
|
[[nodiscard]] virtual bool ConfigureParameters(Params& param); //
|
||
|
|
[[nodiscard]] virtual bool SetParameters(const Params& param); //ANSVIS will set the parameters
|
||
|
|
[[nodiscard]] bool UpdateDetectionThreshold(float detectionScore);
|
||
|
|
[[nodiscard]] std::vector<Object> RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode);// Get detected objects
|
||
|
|
// New API to suppor dynamic inference
|
||
|
|
[[nodiscard]] std::vector<Object> RunDynamicInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id);
|
||
|
|
[[nodiscard]] std::vector<Object> RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id);
|
||
|
|
void SetLoadEngineOnCreation(bool loadEngineOnCreation) { _loadEngineOnCreation = loadEngineOnCreation; }
|
||
|
|
virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses
|
||
|
|
void SetSkipEngineCache(bool skip) { _skipEngineCache = skip; } // propagated to Engine<T> before buildLoadNetwork
|
||
|
|
void SetForceNoPool(bool force) { _forceNoPool = force; } // propagated to Engine<T> before buildLoadNetwork
|
||
|
|
|
||
|
|
/// Enable/disable internal debug benchmarking.
|
||
|
|
/// When enabled, per-stage timing (preprocess, inference, postprocess, tracking, etc.)
|
||
|
|
/// is logged via _logger at info level for every inference call.
|
||
|
|
void ActivateDebugger(bool debugFlag) { _debugFlag = debugFlag; }
|
||
|
|
|
||
|
|
// Multi-object tracker (MOT) control
|
||
|
|
bool SetTracker(TrackerType trackerType, bool enabled);
|
||
|
|
|
||
|
|
// Detection stabilization control
|
||
|
|
// Auto-enables tracker if not already enabled. Tracker auto-enables stabilization too.
|
||
|
|
bool SetStabilization(bool enabled, int historySize = 20, int maxMisses = 5);
|
||
|
|
|
||
|
|
// Fine-tune all stabilization parameters at once (JSON input).
|
||
|
|
// Keys (all optional — omit to keep current value):
|
||
|
|
// "hysteresis_enter" : float — confidence to start tracking (0=auto from model threshold)
|
||
|
|
// "hysteresis_keep" : float — confidence to keep tracking (0=auto, 65% of enter)
|
||
|
|
// "ema_alpha" : float — EMA weight for new observation (0..1, default 0.3)
|
||
|
|
// "track_boost_min_frames" : int — frames before boost kicks in (default 10)
|
||
|
|
// "track_boost_amount" : float — confidence bonus for established tracks (default 0.05)
|
||
|
|
// "class_consistency_frames": int — consecutive frames of new class to accept switch (default 5)
|
||
|
|
// "confidence_decay" : float — per-miss decay multiplier for ghosts (default 0.85)
|
||
|
|
// "min_confidence" : float — floor below which ghosts are dropped (default 0.15)
|
||
|
|
bool SetStabilizationParameters(const std::string& jsonParams);
|
||
|
|
|
||
|
|
bool SetTrackerParameters(const std::string& jsonParams);
|
||
|
|
|
||
|
|
/// Set the text prompt for segmentation (pre-tokenized).
|
||
|
|
/// Override in subclasses that support text-prompted segmentation (e.g. ANSSAM3, ANSONNXSAM3).
|
||
|
|
virtual bool SetPrompt(const std::vector<int64_t>& inputIds,
|
||
|
|
const std::vector<int64_t>& attentionMask) {
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Set the text prompt by tokenizing the given text.
|
||
|
|
/// Requires merges.txt (CLIP BPE vocabulary) in the model folder.
|
||
|
|
/// Override in subclasses that support text-prompted segmentation.
|
||
|
|
virtual bool SetPrompt(const std::string& text) { return true; }
|
||
|
|
|
||
|
|
std::vector<Object> _detectedObjects;
|
||
|
|
[[nodiscard]] ModelConfig GetModelConfig();
|
||
|
|
// Function to add or retrieve camera data by ID
|
||
|
|
CameraData& GetCameraData(const std::string& cameraId) {
|
||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
try
|
||
|
|
{
|
||
|
|
if (_cameras.empty())
|
||
|
|
{
|
||
|
|
std::cerr << "Warning: _cameras is initially empty." << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Use try_emplace to insert a default CameraData if cameraId does not exist
|
||
|
|
auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{});
|
||
|
|
|
||
|
|
if (inserted)
|
||
|
|
{
|
||
|
|
std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
return iterator->second; // Return the reference to CameraData
|
||
|
|
}
|
||
|
|
catch (const std::exception& ex)
|
||
|
|
{
|
||
|
|
std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl;
|
||
|
|
return _defaultCamera;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
void EnqueueDetection(const std::vector<Object>& detectedObjects, const std::string& cameraId);
|
||
|
|
[[nodiscard]] bool RunInference(const cv::Mat& input, const std::string& camera_id, std::string& detectionResult);
|
||
|
|
[[nodiscard]] std::deque<std::vector<Object>> DequeueDetection(const std::string& cameraId);
|
||
|
|
[[nodiscard]] virtual bool Destroy()=0;
|
||
|
|
~ANSODBase();
|
||
|
|
protected:
|
||
|
|
std::vector<Object> RunInferenceInScanningMode(const cv::Mat& input, const std::string& camera_id);// Get detected objects
|
||
|
|
std::vector<Object> RunInferenceInTrackingMode(const cv::Mat& input, const std::string& camera_id, std::vector<int> trackingObjectIds);// Get detected objects
|
||
|
|
};
|
||
|
|
class ANSENGINE_API ANSFDBase {
|
||
|
|
protected:
|
||
|
|
bool _licenseValid{ false };
|
||
|
|
bool _isInitialized{ false };
|
||
|
|
std::string _licenseKey;
|
||
|
|
std::string _modelFolder;
|
||
|
|
std::string _faceAttrModelFolder;
|
||
|
|
|
||
|
|
ModelConfig _modelConfig;
|
||
|
|
std::string _modelConfigFile;
|
||
|
|
std::string _imageProcessingModelFile;
|
||
|
|
MoveDetectsHandler _handler;
|
||
|
|
const size_t QUEUE_SIZE = 10;
|
||
|
|
std::recursive_mutex _mutex;
|
||
|
|
ANSCENTER::EngineType engineType;
|
||
|
|
|
||
|
|
bool _facelivenessEngineValid{ false };
|
||
|
|
Ort::Env* _ortLivenessEnv = nullptr;
|
||
|
|
Ort::SessionOptions* _ortLivenessSessionOptions = nullptr;
|
||
|
|
Ort::Session* _livenessSession = nullptr;
|
||
|
|
std::string _livenessInputName;
|
||
|
|
std::string _livenessOutputName;
|
||
|
|
ANSCENTER::ANSMOT* _faceTracker = nullptr;
|
||
|
|
#ifdef USE_TV_MODEL
|
||
|
|
ANSCENTER::ANSMOT* _tvTracker = nullptr;
|
||
|
|
std::unique_ptr<ANSODBase>_tvDetector = nullptr;
|
||
|
|
std::vector<Object> TrackTVScreens(const std::vector<Object>& tvObjects);
|
||
|
|
bool InsideScreen(const cv::Rect& tvBox, const cv::Rect& faceBox);
|
||
|
|
#endif
|
||
|
|
bool _useTvDetector{ false };
|
||
|
|
std::unordered_map<int, int> _mMissingTrackFrames; // Track ID and number of missing face
|
||
|
|
std::unordered_map<int, int> _mMissingTrackScreen; // Track ID and number of missing screen
|
||
|
|
std::unordered_map<int, std::deque<int>> _mTrackHistory; //History of liveness attribute
|
||
|
|
std::unordered_map<int, std::vector<Object>> _mTrackScreen; //History of Screen's position
|
||
|
|
// Adaptive screens
|
||
|
|
struct ImageSection {
|
||
|
|
cv::Rect region;
|
||
|
|
int priority;
|
||
|
|
ImageSection(const cv::Rect& r) : region(r), priority(0) {}
|
||
|
|
};
|
||
|
|
cv::Size previousImageSize = cv::Size(0, 0);
|
||
|
|
std::vector<ImageSection> cachedSections;
|
||
|
|
int _currentPriority{ 0 }; // None
|
||
|
|
cv::Rect _detectedArea;// Area where license plate are detected
|
||
|
|
|
||
|
|
//AsyncPipeline* _pipeline = nullptr;
|
||
|
|
SPDLogger& _logger = SPDLogger::GetInstance("ANSFD", false);
|
||
|
|
EngineType _engineType;
|
||
|
|
void CheckLicense();
|
||
|
|
void Cleanup();
|
||
|
|
// Other functions can be used;
|
||
|
|
bool isSimilarObject(const Object& obj1, const Object& obj2);
|
||
|
|
bool isOverlayObject(const Object& obj1, const Object& obj2);
|
||
|
|
struct CameraData
|
||
|
|
{
|
||
|
|
std::deque<std::vector<Object>> _detectionQueue; // That stores the detection results
|
||
|
|
void clear()
|
||
|
|
{
|
||
|
|
for (auto& detectionVector : _detectionQueue)
|
||
|
|
{
|
||
|
|
detectionVector.clear(); // Clear each vector of Objects
|
||
|
|
}
|
||
|
|
_detectionQueue.clear(); // Clear the deque itself
|
||
|
|
}
|
||
|
|
};
|
||
|
|
CameraData _defaultCamera;
|
||
|
|
|
||
|
|
// Helper: returns the Euclidean distance between two points.
|
||
|
|
float distance(const cv::Point2f& a, const cv::Point2f& b) {
|
||
|
|
float dx = a.x - b.x;
|
||
|
|
float dy = a.y - b.y;
|
||
|
|
return std::sqrt(dx * dx + dy * dy);
|
||
|
|
}
|
||
|
|
// Compute the union (bounding box) of two rectangles.
|
||
|
|
cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) {
|
||
|
|
int x = std::min(a.x, b.x);
|
||
|
|
int y = std::min(a.y, b.y);
|
||
|
|
int x2 = std::max(a.x + a.width, b.x + b.width);
|
||
|
|
int y2 = std::max(a.y + a.height, b.y + b.height);
|
||
|
|
return cv::Rect(x, y, x2 - x, y2 - y);
|
||
|
|
}
|
||
|
|
cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
|
||
|
|
// Check if two rectangles overlap (i.e. intersection area > 0).
|
||
|
|
bool isOverlap(const cv::Rect& a, const cv::Rect& b) {
|
||
|
|
return ((a & b).area() > 0);
|
||
|
|
}
|
||
|
|
bool isValidFace(const std::vector<cv::Point2f>& landmarks, const cv::Rect& faceRect, float maxEyeAngle = 25, int offsetX = 0, int offsetY = 0, const cv::Mat& frame = cv::Mat(), float minBlurScore = 15.0f);
|
||
|
|
// Multi-camera data storage
|
||
|
|
std::unordered_map<std::string, CameraData> _cameras;
|
||
|
|
//cv::Mat EnhanceImage(const cv::Mat inputImage, int cropedImageSize);
|
||
|
|
cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize);
|
||
|
|
std::string GetOpenVINODevice(ov::Core& core);
|
||
|
|
|
||
|
|
cv::Mat Preprocess(cv::Mat& input_mat, std::vector<cv::Point2f>& face_landmark_5, cv::Mat& preprocessed_mat);
|
||
|
|
|
||
|
|
// Function to seperate screen size
|
||
|
|
double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect);
|
||
|
|
std::vector<ImageSection> divideImage(const cv::Mat& image);
|
||
|
|
std::vector<ANSFDBase::ImageSection> createSlideScreens(const cv::Mat& image);
|
||
|
|
int getHighestPriorityRegion();
|
||
|
|
int getLowestPriorityRegion();
|
||
|
|
cv::Rect getRegionByPriority(int priority);
|
||
|
|
std::vector<Object> AdjustDetectedBoundingBoxes(const std::vector<Object>& detectionsInROI,
|
||
|
|
const cv::Rect& roi, const cv::Size& fullImageSize,
|
||
|
|
float aspectRatio = 0.9f, // width at least 2x height
|
||
|
|
int padding = 10 // base padding
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
// Track faces
|
||
|
|
std::vector<Object> TrackFaces(const cv::Mat& inputImage, const std::vector<Object>& faceObjects);
|
||
|
|
template<typename MapTrackData, typename MapMissingFrames>
|
||
|
|
void CleanUpTracks(std::vector<Object>& currentObjects,
|
||
|
|
MapTrackData& trackDataMap,
|
||
|
|
MapMissingFrames& missingFramesMap,
|
||
|
|
int maxMissing,
|
||
|
|
int maxTracks);
|
||
|
|
public:
|
||
|
|
[[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
|
||
|
|
[[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
|
||
|
|
[[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap);
|
||
|
|
[[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
|
||
|
|
[[nodiscard]] virtual std::vector<ANSCENTER::Object> RunInference(const cv::Mat& input, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0;
|
||
|
|
[[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0;
|
||
|
|
[[nodiscard]] virtual bool Destroy() = 0;
|
||
|
|
[[nodiscard]] std::vector<Object> DetectMovement(const cv::Mat& input, const std::string& camera_id);
|
||
|
|
[[nodiscard]] cv::Rect GenerateMinimumSquareBoundingBox(const std::vector<ANSCENTER::Object>& detectedObjects, int minSize = 640);
|
||
|
|
[[nodiscard]] std::vector<cv::Rect> GenerateFixedROIs(const std::vector<Object>& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
|
||
|
|
[[nodiscard]] bool ContainsIntersectingObject(const std::vector<Object>& movementObjects, const Object& result);
|
||
|
|
void UpdateAndFilterDetectionObjects(std::vector<Object>& detectionObjects, int threshold);
|
||
|
|
[[nodiscard]] bool UpdateDetectionThreshold(float detectionScore);
|
||
|
|
[[nodiscard]] float GetDetectionThreshold();
|
||
|
|
[[nodiscard]] ModelConfig GetModelConfig();
|
||
|
|
// Function to add or retrieve camera data by ID
|
||
|
|
CameraData& GetCameraData(const std::string& cameraId)
|
||
|
|
{
|
||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
try
|
||
|
|
{
|
||
|
|
if (_cameras.empty())
|
||
|
|
{
|
||
|
|
std::cerr << "Warning: _cameras is initially empty." << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Use try_emplace to insert a default CameraData if cameraId does not exist
|
||
|
|
auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{});
|
||
|
|
|
||
|
|
if (inserted)
|
||
|
|
{
|
||
|
|
std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
return iterator->second; // Return the reference to CameraData
|
||
|
|
}
|
||
|
|
catch (const std::exception& ex)
|
||
|
|
{
|
||
|
|
std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl;
|
||
|
|
return _defaultCamera;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
void EnqueueDetection(const std::vector<Object>& detectedObjects, const std::string& cameraId);
|
||
|
|
std::deque<std::vector<Object>> DequeueDetection(const std::string& cameraId);
|
||
|
|
// Face liveness functions
|
||
|
|
bool LoadLivenessModel(std::string antiSpoofModelPath, bool isGPU = true);
|
||
|
|
bool InitializeLivenessModel(std::string licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword);
|
||
|
|
std::pair<int, float> PredictLiveness(const cv::Mat& faceImage);
|
||
|
|
std::pair<int, float> LivenessPostProcessing(const float* pOutput);
|
||
|
|
|
||
|
|
std::vector<Object> ValidateLivenessFaces(const cv::Mat& inputImage, const std::vector<Object>& faceObjects, const std::string& camera_id);
|
||
|
|
float ComputeIoU(const cv::Rect& a, const cv::Rect& b);
|
||
|
|
virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses
|
||
|
|
~ANSFDBase();
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif
|