#ifndef ANSENGINECOMMON_H #define ANSENGINECOMMON_H #define ANSENGINE_API __declspec(dllexport) #pragma once #include #include #include #include "ANSLicense.h" #include "Utility.h" #include #include #include #include #include #include #include #include #include #include #include #include "ANSMOT.h" #include "onnxruntime_cxx_api.h" #define USEONNXOV #define RETAINDETECTEDFRAMES 80 #define DEBUG_PRINT(x) std::cout << x << std::endl; //#define DEBUGENGINE //#define USE_TV_MODEL // Use model to detect if person is on TV screen or not const int MAX_HISTORY_FACE = 5; const int MAX_MISSING_FACE = 30; const int MAX_TRACKS = 200; const float PAD_TRACK_RATIO = 0.5f; const float PAD_DETECT_RATIO = 1.0f; const int MAX_MISSING_SCREEN = 1000; namespace ANSCENTER { template typename std::enable_if::value, T>::type inline clamp(const T& value, const T& low, const T& high) { T validLow = low < high ? low : high; T validHigh = low < high ? high : low; if (value < validLow) return validLow; if (value > validHigh) return validHigh; return value; } struct Point { int x, y; }; struct ROIConfig { bool Rectangle; bool Polygon; bool Line; int MinItems; int MaxItems; std::string Name; std::string ROIMatch; }; struct Parameter { std::string Name; std::string DataType; int NoOfDecimals; int MaxValue; int MinValue; std::string StartValue; std::vector ListItems; std::string DefaultValue; std::string Value; }; struct ROIValue { std::string ROIMatch; std::vector ROIPoints; std::string Option; std::string Name; int OriginalImageSize; }; struct Params { std::vector ROI_Config; std::vector ROI_Options; std::vector Parameters; std::vector ROI_Values; }; /* Example { "ROI_Config":[ { "Rectangle":true, "Polygon":true, "Line":false, "MinItems":0, "MaxItems":3, "Name":"Traffic Light", "ROI-Match":"All Corners" }, { "Rectangle":true, "Polygon":false, "Line":false, "MinItems":1, "MaxItems":1, "Name":"Car Zone", "ROI-Match":"All Corners" }, { "Rectangle":false, "Polygon":false, "Line":true, "MinItems":1, "MaxItems":2, "Name":"Cross Line", "ROI-Match":"All Corners" } ], "ROI_Options":[ "Inside ROI", "Inside ROI", "Both Directions" ], "Parameters":[ { "Name":"Para1", "DataType":"Boolean", "NoOfdecimals":0, "MaxValue":0, "MinValue":0, "StartValue":"", "ListItems":[], "DefaultValue":"", "Value":"true" }, { "Name":"Para2", "DataType":"Integer", "NoOfdecimals":0, "MaxValue":5, "MinValue":1, "StartValue":"2", "ListItems":[], "DefaultValue":"", "Value":"3" }, { "Name":"Para3", "DataType":"List-Single", "NoOfdecimals":0, "MaxValue":0, "MinValue":0, "StartValue":"", "ListItems":["A","B","C"], "DefaultValue":"", "Value":"A" }, { "Name":"Para4", "DataType":"Range", "NoOfdecimals":0, "MaxValue":100, "MinValue":50, "StartValue":">,60", "ListItems":[">","<"], "DefaultValue":"", "Value":">,52.000000" } ], "ROI_Values":[ { "ROI-Match":"Centre Point", "ROIPoints":[ {"x":269,"y":134}, {"x":777,"y":134}, {"x":777,"y":457}, {"x":269,"y":457} ], "Option":"Inside ROI", "Name":"Car Zone 1", "OriginalImageSize":1920 }, { "ROI-Match":"Centre Point", "ROIPoints":[{"x":280,"y":613},{"x":1108,"y":280}], "Option":"Above", "Name":"Cross Line 1", "OriginalImageSize":1920 }, { "ROI-Match":"Centre Point", "ROIPoints":[{"x":1511,"y":383},{"x":1283,"y":754}], "Option":"Left side", "Name":"Cross Line 2", "OriginalImageSize":1920 }, { "ROI-Match":"Centre Point", "ROIPoints":[ {"x":229,"y":161}, {"x":964,"y":161}, {"x":964,"y":628}, {"x":229,"y":628} ], "Option":"Left side", "Name":"Traffic Light 1", "OriginalImageSize":1920 }, { "ROI-Match":"Centre Point", "ROIPoints":[ {"x":1115,"y":304}, {"x":1730,"y":304}, {"x":1730,"y":695}, {"x":1115,"y":695} ], "Option":"Left side", "Name":"Traffic Light 2", "OriginalImageSize":1920 }, { "ROI-Match":"Centre Point", "ROIPoints":[ {"x":678,"y":683}, {"x":1217,"y":683}, {"x":1217,"y":1026}, {"x":678,"y":1026} ], "Option":"Left side", "Name":"Traffic Light 3", "OriginalImageSize":1920 } ] } */ struct MetaData { float imageThreshold; float pixelThreshold; float min; float max; int inferSize[2]; // h w int imageSize[2]; // h w std::vector _mean; std::vector _std; }; struct Resize { cv::Mat resizedImage; int dw; int dh; }; struct Object { int classId{ 0 }; int trackId{ 0 }; std::string className{}; float confidence{ 0.0 }; cv::Rect box{}; std::vector polygon; // polygon that contain x1,y1,x2,y2,x3,y3,x4,y4 (for both segmentation and pose estimation) cv::Mat mask{}; // image in box (cropped) cv::cuda::GpuMat gpuMask{}; // GPU-resident face crop (set by NV12 affine warp, avoids re-upload) std::vector kps{}; // Pose exsimate keypoints, containing x1,y1,x2,y2,... or oriented box x,y,width,height,angle std::string extraInfo; // More information such as facial recognition std::string cameraId; // Use to check if this object belongs to any camera // std::string attributes; }; struct FaceResultObject { int trackId{ 0 }; std::string userId; std::string userName; float similarity; bool isUnknown; bool isMasked; // If the face is masked cv::Rect box{}; // Face bounding box cv::Mat mask; std::vector polygon; // polygon that contain x1 ,y1,x2,y2,x3,y3,x4,y4 std::vector kps{}; // Containing landmarks float confidence{ 0.0 }; std::string extraInformation; std::string cameraId; // std::string attributes; // Face attributes (in Json format) }; struct BoundingBox { int x; int y; int width; int height; BoundingBox() : x(0), y(0), width(0), height(0) {} BoundingBox(int x_, int y_, int width_, int height_) : x(x_), y(y_), width(width_), height(height_) {} }; struct KeyPoint { float x; ///< X-coordinate of the keypoint float y; ///< Y-coordinate of the keypoint float confidence; ///< Confidence score of the keypoint KeyPoint(float x_ = 0, float y_ = 0, float conf_ = 0) : x(x_), y(y_), confidence(conf_) { } }; // A group holds a set of objects and the union (bounding box) of all their boxes. struct Group { std::vector objects; cv::Rect unionBox; }; typedef std::pair Range; class ANSENGINE_API ANNHUBClassifier { private: std::vector nInput; //ANN inputs std::vector nOutput; //ANN outputs std::vector> IW; std::vector> LW; std::vector Ib; std::vector Lb; // Structural parameters int nInputNodes, nHiddenNodes, nOutputNodes; int hiddenActivation; // default =2 int outputActivation; // default =2 int dataNormalisationModeInput; // default =1; int dataNormalisationModeOutput; // default =1; // Preprocessing and postprocessing settings std::vector xmaxInput, xminInput; // Maximum and minimum of inputs double ymaxInput, yminInput; // Maximum and minimum of inputs std::vector xmaxOutput, xminOutput; // Maximum and minimum of outputs double ymaxOutput, yminOutput; // Maximum and minimum of outputs // Control creation unsigned char isCreated; std::string _licenseKey; bool _licenseValid{ false }; bool _isInitialized{ false }; std::string _modelFilePath; private: void PreProcessing(std::vector& Input); // mode =0--> linear, mode =1 mapminmax, mode =2 standarddev void PostProcessing(std::vector& Output); // mode =0--> linear, mode =1 mapminmax, mode =2 standarddev void Create(int inputNodes, int HiddenNodes, int outputNodes); void FreeNeuralNetwork(); void CheckLicense(); int ImportANNFromFile(std::string filename); public: ANNHUBClassifier(); ~ANNHUBClassifier(); bool Init(std::string licenseKey, std::string modelFilePath); std::vector Inference(std::vector ip); void Destroy(); int GetOutputNode() { return nOutputNodes; }; private: void ReLu(std::vector& iVal, std::vector& oVal); void LogSig(std::vector& iVal, std::vector& oVal); void TanSig(std::vector& iVal, std::vector& oVal); void PureLin(std::vector& iVal, std::vector& oVal); void SoftMax(std::vector& iVal, std::vector& oVal); void ActivationFunction(std::vector& iVal, std::vector& oVal, int mode); }; class ANSENGINE_API MoveDetectsHandler { public: // Constructor and Destructor MoveDetectsHandler(); ~MoveDetectsHandler(); // Main detection methods std::vector MovementDetect(const std::string& camera_id, cv::Mat& next_image); std::vector MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image); // Camera management bool hasCameraData(const std::string& camera_id) const; void removeCamera(const std::string& camera_id); std::vector getCameraIds() const; // Configuration methods void setThreshold(const std::string& camera_id, double threshold); void setKeyFrameFrequency(const std::string& camera_id, size_t frequency); void setNumberOfControlFrames(const std::string& camera_id, size_t count); void setThumbnailRatio(const std::string& camera_id, double ratio); void setMaskEnabled(const std::string& camera_id, bool enabled); void setContoursEnabled(const std::string& camera_id, bool enabled); void setBboxEnabled(const std::string& camera_id, bool enabled); void setContourThickness(const std::string& camera_id, int thickness); void setBboxThickness(const std::string& camera_id, int thickness); void setMinObjectArea(const std::string& camera_id, double area); void setMinObjectSize(const std::string& camera_id, int size); void setMorphologyIterations(const std::string& camera_id, int iterations); // Temporal consistency settings void setTemporalConsistency(const std::string& camera_id, bool enabled); void setMaskOverlapThreshold(const std::string& camera_id, double threshold); void setTemporalHistorySize(const std::string& camera_id, size_t size); void setMinConsistentFrames(const std::string& camera_id, size_t frames); void setLocationStabilityEnabled(const std::string& camera_id, bool enabled); void setMaxLocationJitter(const std::string& camera_id, double pixels); // Getters for configuration double getThreshold(const std::string& camera_id) const; size_t getKeyFrameFrequency(const std::string& camera_id) const; size_t getNumberOfControlFrames(const std::string& camera_id) const; double getThumbnailRatio(const std::string& camera_id) const; bool isMaskEnabled(const std::string& camera_id) const; bool isContoursEnabled(const std::string& camera_id) const; bool isBboxEnabled(const std::string& camera_id) const; bool isTemporalConsistencyEnabled(const std::string& camera_id) const; // State query methods bool isMovementDetected(const std::string& camera_id) const; bool wasTransitionDetected(const std::string& camera_id) const; double getPSNRScore(const std::string& camera_id) const; size_t getFrameIndexWithMovement(const std::string& camera_id) const; std::chrono::milliseconds getTimeSinceLastMovement(const std::string& camera_id) const; size_t getControlFrameCount(const std::string& camera_id) const; size_t getNextFrameIndex(const std::string& camera_id) const; double getTemporalConsistencyScore(const std::string& camera_id) const; // Public utility methods bool empty(const std::string& camera_id) const; void clear(const std::string& camera_id); void clearAll(); cv::Mat getOutput(const std::string& camera_id) const; cv::Mat getMask(const std::string& camera_id) const; std::vector> getContours(const std::string& camera_id) const; // Statistics struct CameraStats { size_t total_frames_processed = 0; size_t frames_with_movement = 0; size_t frames_rejected_by_temporal_check = 0; size_t control_frames_count = 0; double average_psnr = 0.0; double min_psnr = std::numeric_limits::max(); double max_psnr = 0.0; double average_temporal_consistency = 0.0; std::chrono::milliseconds total_processing_time{ 0 }; std::chrono::high_resolution_clock::time_point last_movement_time; // Reset stats void reset() { total_frames_processed = 0; frames_with_movement = 0; frames_rejected_by_temporal_check = 0; control_frames_count = 0; average_psnr = 0.0; min_psnr = std::numeric_limits::max(); max_psnr = 0.0; average_temporal_consistency = 0.0; total_processing_time = std::chrono::milliseconds{ 0 }; } }; CameraStats getStats(const std::string& camera_id) const; void resetStats(const std::string& camera_id); private: struct CameraData { // Detection state bool movement_detected = false; bool transition_detected = false; size_t next_frame_index = 0; size_t next_key_frame = 0; double most_recent_psnr_score = 0.0; size_t frame_index_with_movement = 0; double max_change_percentage = 20.0; // Max % of frame that can change double min_change_percentage = 1.0; // Min % of frame that must change std::chrono::high_resolution_clock::time_point movement_last_detected; // Control frames storage std::map control_frames; // Output data cv::Mat output; cv::Mat mask; std::vector> contours; // Configuration parameters size_t key_frame_frequency = 20; size_t number_of_control_frames = 10; double psnr_threshold = 45.0; double thumbnail_ratio = 0.05; cv::Size thumbnail_size = cv::Size(0, 0); // Visual options bool mask_enabled = true; bool contours_enabled = true; bool bbox_enabled = true; cv::LineTypes line_type = cv::LINE_4; int contours_size = 1; int bbox_size = 1; // Filtering parameters double min_object_area = 1000.0; int min_object_dimension = 5; int min_object_total_size = 25; // Morphology parameters int morphology_iterations = 10; // Temporal consistency parameters bool temporal_consistency_enabled = true; double mask_overlap_threshold = 0.05; // 5% overlap with previous required size_t temporal_history_size = 5; // Keep last N masks size_t min_consistent_frames = 3; // Need N consecutive consistent frames bool location_stability_enabled = true; double max_location_jitter = 50.0; // Max pixel movement between frames // Temporal consistency state std::deque mask_history; std::deque centroid_history; size_t consistent_frame_count = 0; double last_temporal_consistency_score = 0.0; // Statistics CameraStats stats; // Clear function to release memory void clear() { for (auto& [index, frame] : control_frames) { frame.release(); } control_frames.clear(); output.release(); mask.release(); contours.clear(); // Clear temporal history for (auto& m : mask_history) { m.release(); } mask_history.clear(); centroid_history.clear(); // Reset state movement_detected = false; transition_detected = false; most_recent_psnr_score = 0.0; frame_index_with_movement = 0; thumbnail_size = cv::Size(0, 0); consistent_frame_count = 0; last_temporal_consistency_score = 0.0; } }; // Private member functions double psnr(const cv::Mat& src, const cv::Mat& dst); cv::Mat simple_colour_balance(const cv::Mat& src); cv::Rect BoundingBoxFromContour(const std::vector& contour); // Multi-camera data storage std::unordered_map cameras; mutable std::recursive_mutex cameras_mutex; // Helper functions CameraData& getCameraData(const std::string& camera_id); const CameraData* getCameraDataConst(const std::string& camera_id) const; bool cameraExists(const std::string& camera_id) const; // Processing helpers cv::Mat computeMovementMask(const cv::Mat& control_frame, const cv::Mat& current_frame, const cv::Size& output_size, int morphology_iterations); std::vector extractObjectsFromMask(const cv::Mat& mask, const cv::Mat& image, CameraData& camera, const std::string& camera_id); void updateControlFrames(CameraData& camera, size_t frame_index, const cv::Mat& thumbnail); void updateStatistics(CameraData& camera, double psnr, bool movement_detected, std::chrono::milliseconds processing_time); // Temporal consistency helpers bool checkTemporalConsistency(CameraData& camera, const cv::Mat& current_mask); double calculateMaskOverlap(const cv::Mat& mask1, const cv::Mat& mask2); cv::Point calculateMaskCentroid(const cv::Mat& mask); double calculateLocationStability(const std::deque& centroids); void updateTemporalHistory(CameraData& camera, const cv::Mat& mask); std::vector MovementDetectInternal(const std::string& camera_id, const size_t frame_index, cv::Mat& image, CameraData& camera); }; class ANSENGINE_API ANSUtilityHelper { public: static std::vector Split(const std::string& s, char delimiter); static std::vector StringToPolygon(const std::string& input); static cv::Mat CropPolygon(const cv::Mat& image, const std::vector& polygon); static cv::Mat CropFromStringPolygon(const cv::Mat& image, const std::string& strPolygon); static std::vector GetBoundingBoxesFromString(std::string strBBoxes); static std::vector GetDetectionsFromString(const std::string& strDets); static std::vector StringToKeypoints(const std::string& str); static std::vector PolygonFromString(const std::string& str); static ANSCENTER::Params ParseCustomParameters(const std::string& paramsJson); static std::string SerializeCustomParamters(const ANSCENTER::Params& params); static bool ParseActiveROIMode(const std::string activeROIMode,int & mode,double & detectionScore, std::vector & trackingObjectIds); static cv::Rect GetBoundingBoxFromPolygon(const std::vector& polygon); static std::string VectorDetectionToJsonString(const std::vector& dets); static cv::Mat ReadImagePath(const std::string& imagePath); static cv::Mat ReadImageStreamBase64(const std::string& imageStreamBase64); static cv::Mat FormatToSquare(const cv::Mat& source); static unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh); static std::vector GetConfigFileContent(std::string modelConfigFile, ModelType& modelType, std::vector& inputShape); static MetaData GetJson(const std::string& jsonPath); static std::vector DecodeBase64(const std::string& base64); static cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width, const std::string& interpolation); static cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right); static cv::Mat Divide(const cv::Mat& src, float divide = 255.0); static cv::Mat Normalize(cv::Mat& src, const std::vector& mean, const std::vector& std, bool to_rgb = false, bool inplace = true); static cv::Mat Transpose(const cv::Mat& src); static cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type, float val); static cv::Mat JpegStringToMat(const std::string& jpegString); static cv::Mat MeanAxis0(const cv::Mat& src); static cv::Mat ElementwiseMinus(const cv::Mat& A, const cv::Mat& B); static cv::Mat VarAxis0(const cv::Mat& src); static int MatrixRank(cv::Mat M); static cv::Mat SimilarTransform(cv::Mat& dst, cv::Mat& src); static std::vector AlignFaceWithFivePoints(const cv::Mat& image, const std::vector> boxes, std::vector> landmarks); static std::vectorGetCroppedFaces(const cv::Mat& image, const std::vector> boxes); static cv::Mat GetCroppedFace(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2); static cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize); // For openVINO face alignment static cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst); static void AlignFaces(std::vector* face_images, std::vector* landmarks_vec); static void AlignFacesExt(std::vector* face_images, std::vector* landmarks_vec); static std::pair AlignFacesSCRFD(const cv::Mat& input_mat, const std::vector& face_landmark_5); // Model optimsation (for TensorRT) static bool ModelOptimizer(std::string modelZipFilePath, std::string modelFileZipPassword, int fp16, std::string& optimisedModelFolder, int inputImageHeight=640, int inputImageWidth=640); // For tiled inference static std::vector ApplyNMS(const std::vector& detections, float nmsThreshold = 0.4); static void AdjustBoudingBox(Object& obj, int offsetX, int offsetY); static std::vector GetPatches(cv::Mat& image, int tileWidth, int tileHeight, double overlap); static std::vector ExtractPatches(cv::Mat& image, std::vector& patchRegions); static cv::Mat ResizePatch(cv::Mat& patch, int modelWidth, int modelHeight); static std::map> Greedy_NMM(const std::vector& object_predictions, const std::string& match_metric = "IOU", float match_threshold = 0.5); static float calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2); static float calculate_bbox_iou(const Object& pred1, const Object& pred2); static float calculate_bbox_ios(const Object& pred1, const Object& pred2); static bool has_match(const Object& pred1, const Object& pred2, const std::string& match_type = "IOU", float match_threshold = 0.5); static float get_merged_score(const Object& pred1, const Object& pred2); static cv::Rect calculate_box_union(const cv::Rect& box1, const cv::Rect& box2); static cv::Rect get_merged_bbox(const Object& pred1, const Object& pred2); static int get_merged_class_id(const Object& pred1, const Object& pred2) { return (pred1.confidence > pred2.confidence) ? pred1.classId : pred2.classId; } static std::string get_merged_category(const Object& pred1, const Object& pred2) { return (pred1.confidence > pred2.confidence) ? pred1.className : pred2.className; } static Object merge_object_pair(const Object& obj1, const Object& obj2); static std::vector select_object_predictions(const std::vector& object_prediction_list,const std::map>& keep_to_merge_list,const std::string& match_metric,float match_threshold); static cv::Rect BoundingBoxFromContour(std::vector contour); static std::string PolygonToString(const std::vector& polygon); static std::string KeypointsToString(const std::vector& kps); static std::vector RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight); // Convert a UTF-8 encoded string to UTF-16LE byte string. // Useful for LabVIEW which can display UTF-16LE Unicode text on Windows. // Returns a std::string containing the raw UTF-16LE bytes (2 bytes per character). static std::string ConvertUTF8ToUTF16LE(const std::string& utf8Str); // Decode JSON Unicode escape sequences (\uXXXX) to UTF-16LE byte string. // Input: ASCII string with \uXXXX escapes (e.g., "\\u6c5f\\u6771 599") // Output: UTF-16LE byte string for LabVIEW display. // ASCII characters pass through as 2-byte UTF-16LE (e.g., 'A' -> 0x41 0x00). static std::string DecodeJsonUnicodeToUTF16LE(const std::string& escapedStr); static std::vector MaskToNormalizedPolygon(const cv::Mat& binaryMask, const cv::Rect& boundingBox, float imageWidth, float imageHeight, float simplificationEpsilon = 2.0f, int minContourArea = 10, int maxPoints = 50); }; class ANSENGINE_API ANSFRBase { protected: bool _licenseValid{ false }; bool _isInitialized{ false }; std::string _licenseKey; std::string _modelFolder; std::string _modelConfigFile; SPDLogger& _logger = SPDLogger::GetInstance("ANSFR", false); void CheckLicense(); public: virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap); virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword); virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder); virtual std::vector Match(const cv::Mat& input, const std::vector& bBox, const std::map& userDict) = 0; // virtual std::vector Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) = 0; // Run inference and get embedding information from a cropped image (the first bbox) virtual cv::Mat GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) = 0; std::string GetModelFolder() { return _modelFolder; }; virtual bool UpdateParamater(double knownPersonThreshold)=0; // For face virtual void Init()=0; virtual void AddEmbedding(const std::string& className, float embedding[])=0; virtual void AddEmbedding(const std::string& className, const std::vector& embedding) =0; virtual bool Destroy()=0; virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses ~ANSFRBase(); // Utility functions std::vector L2Normalize(const std::vector& values); float CosineSimilarity(const std::vector& a, const std::vector& b, bool normalized); }; class ANSENGINE_API ANSODBase { protected: bool _licenseValid{ false }; bool _isInitialized{ false }; Params _params; std::string _licenseKey; std::string _modelFolder; std::string _modelConfigFile; ModelConfig _modelConfig; SPDLogger& _logger = SPDLogger::GetInstance("ANSOD", false); // Debug benchmarking flag — when enabled, logs per-stage timing // for the full inference pipeline (preprocess, inference, postprocess, tracking, etc.) bool _debugFlag{ false }; std::string _classFilePath; std::vector _classes; bool _loadEngineOnCreation{ true }; bool _skipEngineCache{ false }; bool _forceNoPool{ false }; void CheckLicense(); void LoadClassesFromString(); void LoadClassesFromFile(); std::recursive_mutex _mutex; // Guard: true while Initialize/LoadModel is in progress on any thread. // Inference callers can check this to fail-fast instead of blocking. std::atomic _modelLoading{ false }; // Try to lock _mutex with a timeout. Returns a unique_lock that // evaluates to true on success. On timeout, logs a warning with // the caller name and returns an unlocked unique_lock. std::unique_lock TryLockWithTimeout( const char* caller, unsigned int timeoutMs = 5000) { const auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs); std::unique_lock lk(_mutex, std::defer_lock); while (!lk.try_lock()) { if (std::chrono::steady_clock::now() >= deadline) { _logger.LogWarn(caller, "Mutex acquisition timed out after " + std::to_string(timeoutMs) + " ms" + (_modelLoading.load() ? " (model loading in progress)" : ""), __FILE__, __LINE__); return lk; // unlocked } std::this_thread::sleep_for(std::chrono::milliseconds(1)); } return lk; // locked } // Pre-inference gate: checks _modelLoading, validates state // (_modelLoadValid, _licenseValid, _isInitialized), and acquires // _mutex with a timeout. Returns true if inference may proceed. // On failure (loading in progress, timeout, or invalid state) // returns false and the caller should return {}. bool PreInferenceCheck(const char* caller) { if (_modelLoading.load()) return false; auto lk = TryLockWithTimeout(caller); if (!lk.owns_lock()) return false; if (!_licenseValid || !_isInitialized) return false; return true; // lock released here — caller proceeds unlocked } // RAII helper: sets _modelLoading=true on construction, false on destruction. // Use in Initialize/LoadModel/LoadModelFromFolder to guarantee the flag // is always cleared, even on exceptions or early returns. struct ModelLoadingGuard { std::atomic& flag; explicit ModelLoadingGuard(std::atomic& f) : flag(f) { flag.store(true); } ~ModelLoadingGuard() { flag.store(false); } ModelLoadingGuard(const ModelLoadingGuard&) = delete; ModelLoadingGuard& operator=(const ModelLoadingGuard&) = delete; }; MoveDetectsHandler _handler; size_t QUEUE_SIZE = 20; // Multi-object tracker (MOT) — per-camera instances lazy-created in ApplyTracking bool _trackerEnabled = false; TrackerType _trackerType = TrackerType::BYTETRACK; int _trackerMotType = 1; // ANSMOT motType int std::string _trackerParams; // JSON params applied to each per-camera tracker // Detection stabilization config bool _stabilizationEnabled = false; size_t _stabilizationQueueSize = 20; // history depth (frames) int _stabilizationMaxConsecutiveMisses = 5; // stop interpolating after N misses float _stabilizationConfidenceDecay = 0.85f; // per-miss confidence multiplier float _stabilizationMinConfidence = 0.15f; // floor below which ghosts are dropped // #1 Confidence hysteresis: two-threshold system float _hysteresisEnterThreshold = 0.0f; // 0 = auto (use detectionScoreThreshold) float _hysteresisKeepThreshold = 0.0f; // 0 = auto (enterThreshold * 0.65) // #2 Temporal confidence smoothing (EMA) float _emaAlpha = 0.3f; // EMA weight for new observation (0..1) // #5 Track-aware confidence boost for established tracks int _trackBoostMinFrames = 10; // frames before boost kicks in float _trackBoostAmount = 0.05f; // confidence bonus for established tracks // #7 Class consistency — prevent sudden class switches on established tracks int _classConsistencyMinFrames = 5; // consecutive frames of new class required to accept switch // Adaptive screens struct ImageSection { cv::Rect region; int priority; ImageSection(const cv::Rect& r) : region(r), priority(0) {} }; cv::Size previousImageSize = cv::Size(0, 0); // For active windows std::vector cachedSections; int _currentPriority{ 0 }; // None cv::Rect _detectedArea;// This is active windows. int _retainDetectedArea{ 0 }; bool _isObjectDetected{ false }; struct CameraData { std::deque> _detectionQueue; // That stores the detection results // Per-camera tracker instance (lazy-created in ApplyTracking) ANSCENTER::ANSMOT* _tracker = nullptr; // ── Stabilization state ────────────────────────────── struct TrackedObjectHistory { int classId = 0; std::string className; std::string extraInfo; // preserve last-known extraInfo for ghost objects cv::Rect lastBox; float lastConfidence = 0.f; float smoothedConfidence = 0.f; // EMA-smoothed confidence int consecutiveMisses = 0; // frames since last raw detection int totalDetections = 0; // lifetime detection count int frameFirstSeen = 0; // frame counter when first detected bool isEstablished = false; // true once totalDetections >= trackBoostMinFrames // #7 Class consistency — resist sudden class switches int pendingClassId = -1; // candidate new class (-1 = none) std::string pendingClassName; int pendingClassStreak = 0; // consecutive frames with pendingClass }; std::unordered_map _trackHistories; // trackId -> history int _stabilizationFrameCounter = 0; void clear() { if (_tracker) { ReleaseANSMOTHandle(&_tracker); _tracker = nullptr; } for (auto& detectionVector : _detectionQueue) { detectionVector.clear(); // Clear each vector of Objects } _detectionQueue.clear(); // Clear the deque itself _trackHistories.clear(); _stabilizationFrameCounter = 0; } }; // Multi-camera data storage std::unordered_map _cameras; CameraData _defaultCamera; // Default camera data if camera_id is not provided // Tracker: convert detections → TrackerObject, run tracker, assign trackIds via IoU match std::vector ApplyTracking(std::vector& detections, const std::string& camera_id); // Stabilize detections: fill gaps with tracker-predicted objects, apply confidence decay std::vector StabilizeDetections(std::vector& detections, const std::string& camera_id); // extraInfo stabilization tag helpers static void TagStabilized(std::string& extraInfo); static void UntagStabilized(std::string& extraInfo); static bool IsTaggedStabilized(const std::string& extraInfo); // Other functions can be used; bool isSimilarObject(const Object& obj1, const Object& obj2); bool isOverlayObject(const Object& obj1, const Object& obj2); // Helper: returns the Euclidean distance between two points. float distance(const cv::Point2f& a, const cv::Point2f& b) { float dx = a.x - b.x; float dy = a.y - b.y; return std::sqrt(dx * dx + dy * dy); } // Compute the union (bounding box) of two rectangles. cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) { int x = std::min(a.x, b.x); int y = std::min(a.y, b.y); int x2 = std::max(a.x + a.width, b.x + b.width); int y2 = std::max(a.y + a.height, b.y + b.height); return cv::Rect(x, y, x2 - x, y2 - y); } cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight); // Check if two rectangles overlap (i.e. intersection area > 0). bool isOverlap(const cv::Rect& a, const cv::Rect& b) { return ((a & b).area() > 0); } std::string GetOpenVINODevice(ov::Core &core); // Function to seperate screen size double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect); std::vector divideImage(const cv::Mat& image); std::vector createSlideScreens(const cv::Mat& image); int getHighestPriorityRegion(); int getLowestPriorityRegion(); cv::Rect getRegionByPriority(int priority); std::vector AdjustDetectedBoundingBoxes(const std::vector& detectionsInROI, const cv::Rect& roi, const cv::Size& fullImageSize, float aspectRatio = 0.9f, // width at least 2x height int padding = 10 // base padding ); void UpdateNoDetectionCondition(); void UpdateActiveROI(const cv::Mat& frame, ANSCENTER::Object detectedObj); bool IsValidObject(const Object& obj, std::vector objectIds); public: [[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap); [[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword); [[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap); [[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder); [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input) = 0; [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input, const std::string& camera_id) = 0; [[nodiscard]] virtual std::vector> RunInferencesBatch(const std::vector& inputs, const std::string& camera_id); [[nodiscard]] std::vector RunInference(const cv::Mat& input, std::vector Bbox, const std::string& camera_id); [[nodiscard]] std::vector RunInference(const cv::Mat& input, std::vector Polygon, const std::string& camera_id); [[nodiscard]] std::vector RunInferences(const cv::Mat& input, int tiledWidth, int tiledHeight, double overLap, const std::string& camera_id); // split image to slides and run inference [[nodiscard]] std::vector RunInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, const std::string& camera_id) ; [[nodiscard]] std::vector RunTiledInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, int tiledWith, int tiledHeight, double overLap, const std::string& camera_id); [[nodiscard]] std::vector DetectMovement(const cv::Mat& input, const std::string& camera_id); [[nodiscard]] std::vector GenerateFixedROIs(const std::vector& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight); [[nodiscard]] cv::Rect GenerateMinimumSquareBoundingBox(const std::vector& detectedObjects, int minSize = 640); void UpdateAndFilterDetectionObjects(std::vector& detectionObjects, int threshold); [[nodiscard]] bool ContainsIntersectingObject(const std::vector& movementObjects, const Object& result); [[nodiscard]] cv::Rect GetActiveWindow(const cv::Mat& input); [[nodiscard]] Params GetParameters() { return _params; } // [[nodiscard]] virtual bool ConfigureParameters(Params& param); // [[nodiscard]] virtual bool SetParameters(const Params& param); //ANSVIS will set the parameters [[nodiscard]] bool UpdateDetectionThreshold(float detectionScore); [[nodiscard]] std::vector RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode);// Get detected objects // New API to suppor dynamic inference [[nodiscard]] std::vector RunDynamicInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id); [[nodiscard]] std::vector RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id); void SetLoadEngineOnCreation(bool loadEngineOnCreation) { _loadEngineOnCreation = loadEngineOnCreation; } virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses void SetSkipEngineCache(bool skip) { _skipEngineCache = skip; } // propagated to Engine before buildLoadNetwork void SetForceNoPool(bool force) { _forceNoPool = force; } // propagated to Engine before buildLoadNetwork /// Enable/disable internal debug benchmarking. /// When enabled, per-stage timing (preprocess, inference, postprocess, tracking, etc.) /// is logged via _logger at info level for every inference call. void ActivateDebugger(bool debugFlag) { _debugFlag = debugFlag; } // Multi-object tracker (MOT) control bool SetTracker(TrackerType trackerType, bool enabled); // Detection stabilization control // Auto-enables tracker if not already enabled. Tracker auto-enables stabilization too. bool SetStabilization(bool enabled, int historySize = 20, int maxMisses = 5); // Fine-tune all stabilization parameters at once (JSON input). // Keys (all optional — omit to keep current value): // "hysteresis_enter" : float — confidence to start tracking (0=auto from model threshold) // "hysteresis_keep" : float — confidence to keep tracking (0=auto, 65% of enter) // "ema_alpha" : float — EMA weight for new observation (0..1, default 0.3) // "track_boost_min_frames" : int — frames before boost kicks in (default 10) // "track_boost_amount" : float — confidence bonus for established tracks (default 0.05) // "class_consistency_frames": int — consecutive frames of new class to accept switch (default 5) // "confidence_decay" : float — per-miss decay multiplier for ghosts (default 0.85) // "min_confidence" : float — floor below which ghosts are dropped (default 0.15) bool SetStabilizationParameters(const std::string& jsonParams); bool SetTrackerParameters(const std::string& jsonParams); /// Set the text prompt for segmentation (pre-tokenized). /// Override in subclasses that support text-prompted segmentation (e.g. ANSSAM3, ANSONNXSAM3). virtual bool SetPrompt(const std::vector& inputIds, const std::vector& attentionMask) { return true; } /// Set the text prompt by tokenizing the given text. /// Requires merges.txt (CLIP BPE vocabulary) in the model folder. /// Override in subclasses that support text-prompted segmentation. virtual bool SetPrompt(const std::string& text) { return true; } std::vector _detectedObjects; [[nodiscard]] ModelConfig GetModelConfig(); // Function to add or retrieve camera data by ID CameraData& GetCameraData(const std::string& cameraId) { std::lock_guard lock(_mutex); try { if (_cameras.empty()) { std::cerr << "Warning: _cameras is initially empty." << std::endl; } // Use try_emplace to insert a default CameraData if cameraId does not exist auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{}); if (inserted) { std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl; } return iterator->second; // Return the reference to CameraData } catch (const std::exception& ex) { std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl; return _defaultCamera; } } void EnqueueDetection(const std::vector& detectedObjects, const std::string& cameraId); [[nodiscard]] bool RunInference(const cv::Mat& input, const std::string& camera_id, std::string& detectionResult); [[nodiscard]] std::deque> DequeueDetection(const std::string& cameraId); [[nodiscard]] virtual bool Destroy()=0; ~ANSODBase(); protected: std::vector RunInferenceInScanningMode(const cv::Mat& input, const std::string& camera_id);// Get detected objects std::vector RunInferenceInTrackingMode(const cv::Mat& input, const std::string& camera_id, std::vector trackingObjectIds);// Get detected objects }; class ANSENGINE_API ANSFDBase { protected: bool _licenseValid{ false }; bool _isInitialized{ false }; std::string _licenseKey; std::string _modelFolder; std::string _faceAttrModelFolder; ModelConfig _modelConfig; std::string _modelConfigFile; std::string _imageProcessingModelFile; MoveDetectsHandler _handler; const size_t QUEUE_SIZE = 10; std::recursive_mutex _mutex; std::atomic _modelLoading{ false }; // Pre-inference gate for ANSFDBase subclasses bool PreInferenceCheck(const char* caller) { if (_modelLoading.load()) return false; auto lk = TryLockWithTimeout(caller); if (!lk.owns_lock()) return false; if (!_licenseValid || !_isInitialized) return false; return true; } struct ModelLoadingGuard { std::atomic& flag; explicit ModelLoadingGuard(std::atomic& f) : flag(f) { flag.store(true); } ~ModelLoadingGuard() { flag.store(false); } ModelLoadingGuard(const ModelLoadingGuard&) = delete; ModelLoadingGuard& operator=(const ModelLoadingGuard&) = delete; }; std::unique_lock TryLockWithTimeout( const char* caller, unsigned int timeoutMs = 5000) { const auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs); std::unique_lock lk(_mutex, std::defer_lock); while (!lk.try_lock()) { if (std::chrono::steady_clock::now() >= deadline) { _logger.LogWarn(caller, "Mutex acquisition timed out after " + std::to_string(timeoutMs) + " ms" + (_modelLoading.load() ? " (model loading in progress)" : ""), __FILE__, __LINE__); return lk; } std::this_thread::sleep_for(std::chrono::milliseconds(1)); } return lk; } ANSCENTER::EngineType engineType; bool _facelivenessEngineValid{ false }; Ort::Env* _ortLivenessEnv = nullptr; Ort::SessionOptions* _ortLivenessSessionOptions = nullptr; Ort::Session* _livenessSession = nullptr; std::string _livenessInputName; std::string _livenessOutputName; ANSCENTER::ANSMOT* _faceTracker = nullptr; #ifdef USE_TV_MODEL ANSCENTER::ANSMOT* _tvTracker = nullptr; std::unique_ptr_tvDetector = nullptr; std::vector TrackTVScreens(const std::vector& tvObjects); bool InsideScreen(const cv::Rect& tvBox, const cv::Rect& faceBox); #endif bool _useTvDetector{ false }; std::unordered_map _mMissingTrackFrames; // Track ID and number of missing face std::unordered_map _mMissingTrackScreen; // Track ID and number of missing screen std::unordered_map> _mTrackHistory; //History of liveness attribute std::unordered_map> _mTrackScreen; //History of Screen's position // Adaptive screens struct ImageSection { cv::Rect region; int priority; ImageSection(const cv::Rect& r) : region(r), priority(0) {} }; cv::Size previousImageSize = cv::Size(0, 0); std::vector cachedSections; int _currentPriority{ 0 }; // None cv::Rect _detectedArea;// Area where license plate are detected //AsyncPipeline* _pipeline = nullptr; SPDLogger& _logger = SPDLogger::GetInstance("ANSFD", false); EngineType _engineType; void CheckLicense(); void Cleanup(); // Other functions can be used; bool isSimilarObject(const Object& obj1, const Object& obj2); bool isOverlayObject(const Object& obj1, const Object& obj2); struct CameraData { std::deque> _detectionQueue; // That stores the detection results void clear() { for (auto& detectionVector : _detectionQueue) { detectionVector.clear(); // Clear each vector of Objects } _detectionQueue.clear(); // Clear the deque itself } }; CameraData _defaultCamera; // Helper: returns the Euclidean distance between two points. float distance(const cv::Point2f& a, const cv::Point2f& b) { float dx = a.x - b.x; float dy = a.y - b.y; return std::sqrt(dx * dx + dy * dy); } // Compute the union (bounding box) of two rectangles. cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) { int x = std::min(a.x, b.x); int y = std::min(a.y, b.y); int x2 = std::max(a.x + a.width, b.x + b.width); int y2 = std::max(a.y + a.height, b.y + b.height); return cv::Rect(x, y, x2 - x, y2 - y); } cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight); // Check if two rectangles overlap (i.e. intersection area > 0). bool isOverlap(const cv::Rect& a, const cv::Rect& b) { return ((a & b).area() > 0); } bool isValidFace(const std::vector& landmarks, const cv::Rect& faceRect, float maxEyeAngle = 25, int offsetX = 0, int offsetY = 0, const cv::Mat& frame = cv::Mat(), float minBlurScore = 15.0f); // Multi-camera data storage std::unordered_map _cameras; //cv::Mat EnhanceImage(const cv::Mat inputImage, int cropedImageSize); cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize); std::string GetOpenVINODevice(ov::Core& core); cv::Mat Preprocess(cv::Mat& input_mat, std::vector& face_landmark_5, cv::Mat& preprocessed_mat); // Function to seperate screen size double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect); std::vector divideImage(const cv::Mat& image); std::vector createSlideScreens(const cv::Mat& image); int getHighestPriorityRegion(); int getLowestPriorityRegion(); cv::Rect getRegionByPriority(int priority); std::vector AdjustDetectedBoundingBoxes(const std::vector& detectionsInROI, const cv::Rect& roi, const cv::Size& fullImageSize, float aspectRatio = 0.9f, // width at least 2x height int padding = 10 // base padding ); // Track faces std::vector TrackFaces(const cv::Mat& inputImage, const std::vector& faceObjects); template void CleanUpTracks(std::vector& currentObjects, MapTrackData& trackDataMap, MapMissingFrames& missingFramesMap, int maxMissing, int maxTracks); public: [[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap); [[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword); [[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap); [[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder); [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0; [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0; [[nodiscard]] virtual bool Destroy() = 0; [[nodiscard]] std::vector DetectMovement(const cv::Mat& input, const std::string& camera_id); [[nodiscard]] cv::Rect GenerateMinimumSquareBoundingBox(const std::vector& detectedObjects, int minSize = 640); [[nodiscard]] std::vector GenerateFixedROIs(const std::vector& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight); [[nodiscard]] bool ContainsIntersectingObject(const std::vector& movementObjects, const Object& result); void UpdateAndFilterDetectionObjects(std::vector& detectionObjects, int threshold); [[nodiscard]] bool UpdateDetectionThreshold(float detectionScore); [[nodiscard]] float GetDetectionThreshold(); [[nodiscard]] ModelConfig GetModelConfig(); // Function to add or retrieve camera data by ID CameraData& GetCameraData(const std::string& cameraId) { std::lock_guard lock(_mutex); try { if (_cameras.empty()) { std::cerr << "Warning: _cameras is initially empty." << std::endl; } // Use try_emplace to insert a default CameraData if cameraId does not exist auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{}); if (inserted) { std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl; } return iterator->second; // Return the reference to CameraData } catch (const std::exception& ex) { std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl; return _defaultCamera; } } void EnqueueDetection(const std::vector& detectedObjects, const std::string& cameraId); std::deque> DequeueDetection(const std::string& cameraId); // Face liveness functions bool LoadLivenessModel(std::string antiSpoofModelPath, bool isGPU = true); bool InitializeLivenessModel(std::string licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword); std::pair PredictLiveness(const cv::Mat& faceImage); std::pair LivenessPostProcessing(const float* pOutput); std::vector ValidateLivenessFaces(const cv::Mat& inputImage, const std::vector& faceObjects, const std::string& camera_id); float ComputeIoU(const cv::Rect& a, const cv::Rect& b); virtual void SetMaxSlotsPerGpu(int n) {} // override in TRT-based subclasses ~ANSFDBase(); }; } // Unicode conversion utilities for LabVIEW wrapper classes extern "C" ANSENGINE_API int ANSEngine_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result, int includeBOM = 1); extern "C" ANSENGINE_API int ANSEngine_ConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result); #endif