modules/ANSODEngine/ANSEngineCommon.h

#ifndef ANSENGINECOMMON_H
#define ANSENGINECOMMON_H
#define ANSENGINE_API __declspec(dllexport)
#pragma once
#include <opencv2/opencv.hpp>
#include <opencv2/core/cuda.hpp>
#include <openvino/openvino.hpp>
#include "ANSLicense.h"
#include "Utility.h"
#include <algorithm>
#include <fstream>
#include <exception>
#include <random>
#include <stdlib.h>
#include <stdio.h>
#include <vector>
#include <string>
#include <map>
#include "ANSMOT.h"
#include "onnxruntime_cxx_api.h"

#define USEONNXOV
#define RETAINDETECTEDFRAMES 80
#define DEBUG_PRINT(x) std::cout << x << std::endl;
//#define DEBUGENGINE
//#define USE_TV_MODEL // Use model to detect if person is on TV screen or not
const int MAX_HISTORY_FACE = 5;
const int MAX_MISSING_FACE = 30;
const int MAX_TRACKS = 200;
const float PAD_TRACK_RATIO = 0.5f;
const float PAD_DETECT_RATIO = 1.0f;
const int MAX_MISSING_SCREEN = 1000;

namespace ANSCENTER
{
    template <typename T>
    typename std::enable_if<std::is_arithmetic<T>::value, T>::type
        inline clamp(const T& value, const T& low, const T& high)
    {
        T validLow = low < high ? low : high;
        T validHigh = low < high ? high : low;

        if (value < validLow) return validLow;
        if (value > validHigh) return validHigh;
        return value;
    }
    struct Point {
        int x, y;
    };
    struct ROIConfig {
        bool Rectangle;
        bool Polygon;
        bool Line;
        int MinItems;
        int MaxItems;
        std::string Name;
        std::string ROIMatch;
    };

    struct Parameter {
        std::string Name;
        std::string DataType;
        int NoOfDecimals;
        int MaxValue;
        int MinValue;
        std::string StartValue;
        std::vector<std::string> ListItems;
        std::string DefaultValue;
        std::string Value;
    };

    struct ROIValue {
        std::string ROIMatch;
        std::vector<Point> ROIPoints;
        std::string Option;
        std::string Name;
        int OriginalImageSize;
    };
    struct Params {
        std::vector<ROIConfig> ROI_Config;
        std::vector<std::string> ROI_Options;
        std::vector<Parameter> Parameters;
        std::vector<ROIValue> ROI_Values;
    };


    /* Example
    {
  "ROI_Config":[
    {
      "Rectangle":true,
      "Polygon":true,
      "Line":false,
      "MinItems":0,
      "MaxItems":3,
      "Name":"Traffic Light",
      "ROI-Match":"All Corners"
    },
    {
      "Rectangle":true,
      "Polygon":false,
      "Line":false,
      "MinItems":1,
      "MaxItems":1,
      "Name":"Car Zone",
      "ROI-Match":"All Corners"
    },
    {
      "Rectangle":false,
      "Polygon":false,
      "Line":true,
      "MinItems":1,
      "MaxItems":2,
      "Name":"Cross Line",
      "ROI-Match":"All Corners"
    }
  ],
  "ROI_Options":[
    "Inside ROI",
    "Inside ROI",
    "Both Directions"
  ],
  "Parameters":[
    {
      "Name":"Para1",
      "DataType":"Boolean",
      "NoOfdecimals":0,
      "MaxValue":0,
      "MinValue":0,
      "StartValue":"",
      "ListItems":[],
      "DefaultValue":"",
      "Value":"true"
    },
    {
      "Name":"Para2",
      "DataType":"Integer",
      "NoOfdecimals":0,
      "MaxValue":5,
      "MinValue":1,
      "StartValue":"2",
      "ListItems":[],
      "DefaultValue":"",
      "Value":"3"
    },
    {
      "Name":"Para3",
      "DataType":"List-Single",
      "NoOfdecimals":0,
      "MaxValue":0,
      "MinValue":0,
      "StartValue":"",
      "ListItems":["A","B","C"],
      "DefaultValue":"",
      "Value":"A"
    },
    {
      "Name":"Para4",
      "DataType":"Range",
      "NoOfdecimals":0,
      "MaxValue":100,
      "MinValue":50,
      "StartValue":">,60",
      "ListItems":[">","<"],
      "DefaultValue":"",
      "Value":">,52.000000"
    }
  ],
  "ROI_Values":[
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[
        {"x":269,"y":134},
        {"x":777,"y":134},
        {"x":777,"y":457},
        {"x":269,"y":457}
      ],
      "Option":"Inside ROI",
      "Name":"Car Zone 1",
      "OriginalImageSize":1920
    },
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[{"x":280,"y":613},{"x":1108,"y":280}],
      "Option":"Above",
      "Name":"Cross Line 1",
      "OriginalImageSize":1920
    },
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[{"x":1511,"y":383},{"x":1283,"y":754}],
      "Option":"Left side",
      "Name":"Cross Line 2",
      "OriginalImageSize":1920
    },
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[
        {"x":229,"y":161},
        {"x":964,"y":161},
        {"x":964,"y":628},
        {"x":229,"y":628}
      ],
      "Option":"Left side",
      "Name":"Traffic Light 1",
      "OriginalImageSize":1920
    },
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[
        {"x":1115,"y":304},
        {"x":1730,"y":304},
        {"x":1730,"y":695},
        {"x":1115,"y":695}
      ],
      "Option":"Left side",
      "Name":"Traffic Light 2",
      "OriginalImageSize":1920
    },
    {
      "ROI-Match":"Centre Point",
      "ROIPoints":[
        {"x":678,"y":683},
        {"x":1217,"y":683},
        {"x":1217,"y":1026},
        {"x":678,"y":1026}
      ],
      "Option":"Left side",
      "Name":"Traffic Light 3",
      "OriginalImageSize":1920
    }
  ]
}
    */
    struct MetaData
    {
        float imageThreshold;
        float pixelThreshold;
        float min;
        float max;
        int inferSize[2];  // h w
        int imageSize[2];  // h w
        std::vector<float> _mean;
        std::vector<float> _std;
    };
    struct Resize
    {
        cv::Mat resizedImage;
        int dw;
        int dh;
    };
    struct Object
    {
        int classId{ 0 };
        int trackId{ 0 };
        std::string className{};
        float confidence{ 0.0 };
        cv::Rect box{};
		std::vector<cv::Point2f> polygon;    // polygon that contain x1,y1,x2,y2,x3,y3,x4,y4 (for both segmentation and pose estimation)
        cv::Mat mask{};                      // image in box (cropped)
        cv::cuda::GpuMat gpuMask{};          // GPU-resident face crop (set by NV12 affine warp, avoids re-upload)
		std::vector<float> kps{};            // Pose exsimate keypoints, containing x1,y1,x2,y2,...  or oriented box x,y,width,height,angle
        std::string extraInfo;               // More information such as facial recognition
		std::string cameraId;                // Use to check if this object belongs to any camera
       // std::string attributes;
    };
    struct FaceResultObject {
        int             trackId{ 0 };
        std::string     userId;
        std::string     userName;
        float           similarity;
        bool            isUnknown;
		bool 		    isMasked;            // If the face is masked
        cv::Rect        box{};               // Face bounding box
        cv::Mat         mask;
        std::vector<cv::Point2f> polygon;    // polygon that contain x1 ,y1,x2,y2,x3,y3,x4,y4
        std::vector<float> kps{};            // Containing landmarks
        float           confidence{ 0.0 };
        std::string     extraInformation;
		std::string     cameraId;
       // std::string     attributes;          // Face attributes (in Json format)
    };
    struct BoundingBox {
        int x;
        int y;
        int width;
        int height;
        BoundingBox() : x(0), y(0), width(0), height(0) {}
        BoundingBox(int x_, int y_, int width_, int height_)
            : x(x_), y(y_), width(width_), height(height_) {}
    };
    struct KeyPoint {
        float x;         ///< X-coordinate of the keypoint
        float y;         ///< Y-coordinate of the keypoint
        float confidence; ///< Confidence score of the keypoint
        KeyPoint(float x_ = 0, float y_ = 0, float conf_ = 0)
            : x(x_), y(y_), confidence(conf_) {
        }
    };
    // A group holds a set of objects and the union (bounding box) of all their boxes.
    struct Group {
        std::vector<Object> objects;
        cv::Rect unionBox;
    };
    typedef std::pair<cv::Scalar, cv::Scalar> Range;
	
    class  ANSENGINE_API ANNHUBClassifier
    {
    private:
        std::vector<double> nInput;			//ANN inputs
        std::vector<double> nOutput;		//ANN outputs
        std::vector<std::vector<double>> IW;
        std::vector<std::vector<double>> LW;
        std::vector<double> Ib;
        std::vector<double> Lb;
        //  Structural parameters
        int	   nInputNodes, nHiddenNodes, nOutputNodes;
        int    hiddenActivation;				// default =2	
        int    outputActivation;				// default =2
        int    dataNormalisationModeInput; 		// default =1;
        int    dataNormalisationModeOutput; 	// default =1;

        // Preprocessing and postprocessing settings 
        std::vector<double> xmaxInput, xminInput;			// Maximum and minimum of inputs
        double ymaxInput, yminInput;						// Maximum and minimum of inputs
        std::vector<double> xmaxOutput, xminOutput;			// Maximum and minimum of outputs
        double ymaxOutput, yminOutput;						// Maximum and minimum of outputs

        // Control creation
        unsigned char   isCreated;
        std::string     _licenseKey;
        bool            _licenseValid{ false };
        bool		    _isInitialized{ false };
        std::string     _modelFilePath;

    private:

        void 	PreProcessing(std::vector<double>& Input);				//  mode =0--> linear, mode =1 mapminmax, mode =2 standarddev
        void 	PostProcessing(std::vector<double>& Output);			//  mode =0--> linear, mode =1 mapminmax, mode =2 standarddev        
        void 	Create(int inputNodes, int HiddenNodes, int outputNodes);
        void    FreeNeuralNetwork();
        void	CheckLicense();
        int	    ImportANNFromFile(std::string filename);

    public:
        ANNHUBClassifier();
        ~ANNHUBClassifier();
        bool    Init(std::string licenseKey, std::string modelFilePath);
        std::vector<double> Inference(std::vector<double> ip);
        void    Destroy();
        int     GetOutputNode() { return nOutputNodes; };
    private: 
        void ReLu(std::vector<double>& iVal, std::vector<double>& oVal);
        void LogSig(std::vector<double>& iVal, std::vector<double>& oVal);
        void TanSig(std::vector<double>& iVal, std::vector<double>& oVal);
        void PureLin(std::vector<double>& iVal, std::vector<double>& oVal);
        void SoftMax(std::vector<double>& iVal, std::vector<double>& oVal);
        void ActivationFunction(std::vector<double>& iVal, std::vector<double>& oVal, int mode);
    };  
    class ANSENGINE_API MoveDetectsHandler
    {
    public:
        // Constructor and Destructor
        MoveDetectsHandler();
        ~MoveDetectsHandler();

        // Main detection methods
        std::vector<Object> MovementDetect(const std::string& camera_id, cv::Mat& next_image);
        std::vector<Object> MovementDetect(const std::string& camera_id, const size_t frame_index, cv::Mat& image);

        // Camera management
        bool hasCameraData(const std::string& camera_id) const;
        void removeCamera(const std::string& camera_id);
        std::vector<std::string> getCameraIds() const;

        // Configuration methods
        void setThreshold(const std::string& camera_id, double threshold);
        void setKeyFrameFrequency(const std::string& camera_id, size_t frequency);
        void setNumberOfControlFrames(const std::string& camera_id, size_t count);
        void setThumbnailRatio(const std::string& camera_id, double ratio);
        void setMaskEnabled(const std::string& camera_id, bool enabled);
        void setContoursEnabled(const std::string& camera_id, bool enabled);
        void setBboxEnabled(const std::string& camera_id, bool enabled);
        void setContourThickness(const std::string& camera_id, int thickness);
        void setBboxThickness(const std::string& camera_id, int thickness);
        void setMinObjectArea(const std::string& camera_id, double area);
        void setMinObjectSize(const std::string& camera_id, int size);
        void setMorphologyIterations(const std::string& camera_id, int iterations);

        // Temporal consistency settings
        void setTemporalConsistency(const std::string& camera_id, bool enabled);
        void setMaskOverlapThreshold(const std::string& camera_id, double threshold);
        void setTemporalHistorySize(const std::string& camera_id, size_t size);
        void setMinConsistentFrames(const std::string& camera_id, size_t frames);
        void setLocationStabilityEnabled(const std::string& camera_id, bool enabled);
        void setMaxLocationJitter(const std::string& camera_id, double pixels);

        // Getters for configuration
        double getThreshold(const std::string& camera_id) const;
        size_t getKeyFrameFrequency(const std::string& camera_id) const;
        size_t getNumberOfControlFrames(const std::string& camera_id) const;
        double getThumbnailRatio(const std::string& camera_id) const;
        bool isMaskEnabled(const std::string& camera_id) const;
        bool isContoursEnabled(const std::string& camera_id) const;
        bool isBboxEnabled(const std::string& camera_id) const;
        bool isTemporalConsistencyEnabled(const std::string& camera_id) const;

        // State query methods
        bool isMovementDetected(const std::string& camera_id) const;
        bool wasTransitionDetected(const std::string& camera_id) const;
        double getPSNRScore(const std::string& camera_id) const;
        size_t getFrameIndexWithMovement(const std::string& camera_id) const;
        std::chrono::milliseconds getTimeSinceLastMovement(const std::string& camera_id) const;
        size_t getControlFrameCount(const std::string& camera_id) const;
        size_t getNextFrameIndex(const std::string& camera_id) const;
        double getTemporalConsistencyScore(const std::string& camera_id) const;

        // Public utility methods
        bool empty(const std::string& camera_id) const;
        void clear(const std::string& camera_id);
        void clearAll();
        cv::Mat getOutput(const std::string& camera_id) const;
        cv::Mat getMask(const std::string& camera_id) const;
        std::vector<std::vector<cv::Point>> getContours(const std::string& camera_id) const;

        // Statistics
        struct CameraStats {
            size_t total_frames_processed = 0;
            size_t frames_with_movement = 0;
            size_t frames_rejected_by_temporal_check = 0;
            size_t control_frames_count = 0;
            double average_psnr = 0.0;
            double min_psnr = std::numeric_limits<double>::max();
            double max_psnr = 0.0;
            double average_temporal_consistency = 0.0;
            std::chrono::milliseconds total_processing_time{ 0 };
            std::chrono::high_resolution_clock::time_point last_movement_time;

            // Reset stats
            void reset()
            {
                total_frames_processed = 0;
                frames_with_movement = 0;
                frames_rejected_by_temporal_check = 0;
                control_frames_count = 0;
                average_psnr = 0.0;
                min_psnr = std::numeric_limits<double>::max();
                max_psnr = 0.0;
                average_temporal_consistency = 0.0;
                total_processing_time = std::chrono::milliseconds{ 0 };
            }
        };

        CameraStats getStats(const std::string& camera_id) const;
        void resetStats(const std::string& camera_id);

    private:
        struct CameraData
        {
            // Detection state
            bool movement_detected = false;
            bool transition_detected = false;
            size_t next_frame_index = 0;
            size_t next_key_frame = 0;
            double most_recent_psnr_score = 0.0;
            size_t frame_index_with_movement = 0;
            double max_change_percentage = 20.0;  // Max % of frame that can change
            double min_change_percentage = 1.0;   // Min % of frame that must change

            std::chrono::high_resolution_clock::time_point movement_last_detected;

            // Control frames storage
            std::map<size_t, cv::Mat> control_frames;

            // Output data
            cv::Mat output;
            cv::Mat mask;
            std::vector<std::vector<cv::Point>> contours;

            // Configuration parameters
            size_t key_frame_frequency = 20;
            size_t number_of_control_frames = 10;
            double psnr_threshold = 45.0;
            double thumbnail_ratio = 0.05;
            cv::Size thumbnail_size = cv::Size(0, 0);

            // Visual options
            bool mask_enabled = true;
            bool contours_enabled = true;
            bool bbox_enabled = true;
            cv::LineTypes line_type = cv::LINE_4;
            int contours_size = 1;
            int bbox_size = 1;

            // Filtering parameters
            double min_object_area = 1000.0;
            int min_object_dimension = 5;
            int min_object_total_size = 25;

            // Morphology parameters
            int morphology_iterations = 10;

            // Temporal consistency parameters
            bool temporal_consistency_enabled = true;
            double mask_overlap_threshold = 0.05; // 5% overlap with previous required
            size_t temporal_history_size = 5; // Keep last N masks
            size_t min_consistent_frames = 3; // Need N consecutive consistent frames
            bool location_stability_enabled = true;
            double max_location_jitter = 50.0; // Max pixel movement between frames

            // Temporal consistency state
            std::deque<cv::Mat> mask_history;
            std::deque<cv::Point> centroid_history;
            size_t consistent_frame_count = 0;
            double last_temporal_consistency_score = 0.0;

            // Statistics
            CameraStats stats;

            // Clear function to release memory
            void clear()
            {
                for (auto& [index, frame] : control_frames)
                {
                    frame.release();
                }
                control_frames.clear();
                output.release();
                mask.release();
                contours.clear();

                // Clear temporal history
                for (auto& m : mask_history)
                {
                    m.release();
                }
                mask_history.clear();
                centroid_history.clear();

                // Reset state
                movement_detected = false;
                transition_detected = false;
                most_recent_psnr_score = 0.0;
                frame_index_with_movement = 0;
                thumbnail_size = cv::Size(0, 0);
                consistent_frame_count = 0;
                last_temporal_consistency_score = 0.0;
            }
        };

        // Private member functions
        double psnr(const cv::Mat& src, const cv::Mat& dst);
        cv::Mat simple_colour_balance(const cv::Mat& src);
        cv::Rect BoundingBoxFromContour(const std::vector<cv::Point>& contour);

        // Multi-camera data storage
        std::unordered_map<std::string, CameraData> cameras;
        mutable std::recursive_mutex cameras_mutex;

        // Helper functions
        CameraData& getCameraData(const std::string& camera_id);
        const CameraData* getCameraDataConst(const std::string& camera_id) const;
        bool cameraExists(const std::string& camera_id) const;

        // Processing helpers
        cv::Mat computeMovementMask(const cv::Mat& control_frame, const cv::Mat& current_frame,
            const cv::Size& output_size, int morphology_iterations);
        std::vector<Object> extractObjectsFromMask(const cv::Mat& mask, const cv::Mat& image,
            CameraData& camera, const std::string& camera_id);
        void updateControlFrames(CameraData& camera, size_t frame_index, const cv::Mat& thumbnail);
        void updateStatistics(CameraData& camera, double psnr, bool movement_detected,
            std::chrono::milliseconds processing_time);

        // Temporal consistency helpers
        bool checkTemporalConsistency(CameraData& camera, const cv::Mat& current_mask);
        double calculateMaskOverlap(const cv::Mat& mask1, const cv::Mat& mask2);
        cv::Point calculateMaskCentroid(const cv::Mat& mask);
        double calculateLocationStability(const std::deque<cv::Point>& centroids);
        void updateTemporalHistory(CameraData& camera, const cv::Mat& mask);

        std::vector<Object> MovementDetectInternal(const std::string& camera_id,
            const size_t frame_index,
            cv::Mat& image,
            CameraData& camera);
    };
    class ANSENGINE_API ANSUtilityHelper {
    public:
        static std::vector<std::string> Split(const std::string& s, char delimiter);
        static std::vector<cv::Point> StringToPolygon(const std::string& input);
        static cv::Mat CropPolygon(const cv::Mat& image, const std::vector<cv::Point>& polygon);
        static cv::Mat CropFromStringPolygon(const cv::Mat& image, const std::string& strPolygon);
        static std::vector<cv::Rect> GetBoundingBoxesFromString(std::string strBBoxes);
		static std::vector<ANSCENTER::Object> GetDetectionsFromString(const std::string& strDets);
        static std::vector<float> StringToKeypoints(const std::string& str);
        static std::vector<cv::Point2f> PolygonFromString(const std::string& str);
        static ANSCENTER::Params ParseCustomParameters(const std::string& paramsJson);
		static std::string SerializeCustomParamters(const ANSCENTER::Params& params);

		static bool ParseActiveROIMode(const std::string activeROIMode,int & mode,double & detectionScore, std::vector<int> & trackingObjectIds);
        static cv::Rect GetBoundingBoxFromPolygon(const std::vector<cv::Point>& polygon);
        static std::string VectorDetectionToJsonString(const std::vector<Object>& dets);
        static cv::Mat ReadImagePath(const std::string& imagePath);
        static cv::Mat ReadImageStreamBase64(const std::string& imageStreamBase64);
        static cv::Mat FormatToSquare(const cv::Mat& source);
        static unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh);
        static std::vector<std::string> GetConfigFileContent(std::string modelConfigFile, ModelType& modelType, std::vector<int>& inputShape);
        static MetaData GetJson(const std::string& jsonPath);
        static std::vector<unsigned char> DecodeBase64(const std::string& base64);
        static cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width, const std::string& interpolation);
        static cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right);
        static cv::Mat Divide(const cv::Mat& src, float divide = 255.0);
        static cv::Mat Normalize(cv::Mat& src, const std::vector<float>& mean, const std::vector<float>& std, bool to_rgb = false, bool inplace = true);
        static cv::Mat Transpose(const cv::Mat& src);
        static cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type, float val);
        static cv::Mat JpegStringToMat(const std::string& jpegString);
        static cv::Mat MeanAxis0(const cv::Mat& src);
        static cv::Mat ElementwiseMinus(const cv::Mat& A, const cv::Mat& B);
        static cv::Mat VarAxis0(const cv::Mat& src);
        static int MatrixRank(cv::Mat M);
        static cv::Mat SimilarTransform(cv::Mat& dst, cv::Mat& src);
        static std::vector<cv::Mat> AlignFaceWithFivePoints(const cv::Mat& image,
            const std::vector<std::array<int, 4>> boxes,
            std::vector<std::array<float, 2>> landmarks);
        static std::vector<cv::Mat>GetCroppedFaces(const cv::Mat& image, const std::vector<std::array<int, 4>> boxes);
        static cv::Mat GetCroppedFace(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2);
        static cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize);
 
        // For openVINO face alignment
        static cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst);
        static void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec);
        static void AlignFacesExt(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec);
        static std::pair<cv::Mat, cv::Mat> AlignFacesSCRFD(const cv::Mat& input_mat, const std::vector<cv::Point2f>& face_landmark_5);

        // Model optimsation (for TensorRT)
        static bool ModelOptimizer(std::string modelZipFilePath, std::string modelFileZipPassword, int fp16, std::string& optimisedModelFolder, int inputImageHeight=640, int inputImageWidth=640);

		// For tiled inference
        static std::vector<Object>  ApplyNMS(const std::vector<Object>& detections, float nmsThreshold = 0.4);
        static void  AdjustBoudingBox(Object& obj, int offsetX, int offsetY);
        static std::vector<cv::Rect> GetPatches(cv::Mat& image, int tileWidth, int tileHeight, double overlap);
        static std::vector<cv::Mat>  ExtractPatches(cv::Mat& image, std::vector<cv::Rect>& patchRegions);
        static cv::Mat ResizePatch(cv::Mat& patch, int modelWidth, int modelHeight);
        static std::map<int, std::vector<int>> Greedy_NMM(const std::vector<Object>& object_predictions, const std::string& match_metric = "IOU", float match_threshold = 0.5);
        static float calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2);
        static float calculate_bbox_iou(const Object& pred1, const Object& pred2);
        static float calculate_bbox_ios(const Object& pred1, const Object& pred2);
        static bool has_match(const Object& pred1, const Object& pred2, const std::string& match_type = "IOU", float match_threshold = 0.5);
        static float get_merged_score(const Object& pred1, const Object& pred2);
        static cv::Rect calculate_box_union(const cv::Rect& box1, const cv::Rect& box2);
        static cv::Rect get_merged_bbox(const Object& pred1, const Object& pred2);
        static int get_merged_class_id(const Object& pred1, const Object& pred2) {
            return (pred1.confidence > pred2.confidence) ? pred1.classId : pred2.classId;
        }
        static std::string get_merged_category(const Object& pred1, const Object& pred2) {
            return (pred1.confidence > pred2.confidence) ? pred1.className : pred2.className;
        }
        static Object merge_object_pair(const Object& obj1, const Object& obj2);
        static std::vector<Object> select_object_predictions(const std::vector<Object>& object_prediction_list,const std::map<int, std::vector<int>>& keep_to_merge_list,const std::string& match_metric,float match_threshold);
        static cv::Rect BoundingBoxFromContour(std::vector<cv::Point> contour);
        static std::string PolygonToString(const std::vector<cv::Point2f>& polygon);
        static std::string  KeypointsToString(const std::vector<float>& kps);
        static std::vector<cv::Point2f> RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight);

        // Convert a UTF-8 encoded string to UTF-16LE byte string.
        // Useful for LabVIEW which can display UTF-16LE Unicode text on Windows.
        // Returns a std::string containing the raw UTF-16LE bytes (2 bytes per character).
        static std::string ConvertUTF8ToUTF16LE(const std::string& utf8Str);

        // Decode JSON Unicode escape sequences (\uXXXX) to UTF-16LE byte string.
        // Input: ASCII string with \uXXXX escapes (e.g., "\\u6c5f\\u6771 599")
        // Output: UTF-16LE byte string for LabVIEW display.
        // ASCII characters pass through as 2-byte UTF-16LE (e.g., 'A' -> 0x41 0x00).
        static std::string DecodeJsonUnicodeToUTF16LE(const std::string& escapedStr);
        static std::vector<cv::Point2f> MaskToNormalizedPolygon(const cv::Mat& binaryMask, const cv::Rect& boundingBox, float imageWidth, float imageHeight, float simplificationEpsilon = 2.0f, int minContourArea = 10, int maxPoints = 50);

    };

    class ANSENGINE_API ANSFRBase {
    protected:
        bool            _licenseValid{ false };
        bool            _isInitialized{ false };
        std::string     _licenseKey;
        std::string     _modelFolder;
        std::string     _modelConfigFile;
        SPDLogger&      _logger = SPDLogger::GetInstance("ANSFR", false);
        void            CheckLicense();

    public:
        virtual bool    Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
        virtual bool    LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
        virtual bool    OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        virtual std::vector<FaceResultObject> Match(const cv::Mat& input, const std::vector<ANSCENTER::Object>& bBox, const std::map<std::string, std::string>& userDict) = 0; // 
        virtual std::vector<float>  Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) = 0; // Run inference and get embedding information from a cropped image (the first bbox)
        virtual cv::Mat GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) = 0;
        std::string     GetModelFolder() { return _modelFolder; };
        virtual bool    UpdateParamater(double knownPersonThreshold)=0;
        // For face
        virtual void Init()=0;
        virtual void AddEmbedding(const std::string& className, float embedding[])=0;
        virtual void AddEmbedding(const std::string& className, const std::vector<float>& embedding) =0;
        virtual bool Destroy()=0;
        virtual void SetMaxSlotsPerGpu(int n) {}  // override in TRT-based subclasses
        ~ANSFRBase();

		// Utility functions
        std::vector<float> L2Normalize(const std::vector<float>& values);     
        float CosineSimilarity(const std::vector<float>& a, const std::vector<float>& b, bool normalized);
    };
    class ANSENGINE_API ANSODBase {
    protected:
        bool                        _licenseValid{ false };
        bool                        _isInitialized{ false };
		Params          	        _params;
        std::string                 _licenseKey;
        std::string                 _modelFolder;
        std::string                 _modelConfigFile;
        ModelConfig                 _modelConfig;
        SPDLogger&                  _logger = SPDLogger::GetInstance("ANSOD", false);
        // Debug benchmarking flag — when enabled, logs per-stage timing
        // for the full inference pipeline (preprocess, inference, postprocess, tracking, etc.)
        bool                        _debugFlag{ false };
        std::string                 _classFilePath;
        std::vector <std::string>   _classes;
		bool    		            _loadEngineOnCreation{ true };
		bool                        _skipEngineCache{ false };
		bool                        _forceNoPool{ false };

        void                        CheckLicense();
        void                        LoadClassesFromString();
        void                        LoadClassesFromFile();
        std::recursive_mutex	    _mutex;
        MoveDetectsHandler          _handler;
        size_t                      QUEUE_SIZE = 20;

        // Multi-object tracker (MOT) — per-camera instances lazy-created in ApplyTracking
        bool                        _trackerEnabled = false;
        TrackerType                 _trackerType = TrackerType::BYTETRACK;
        int                         _trackerMotType = 1;       // ANSMOT motType int
        std::string                 _trackerParams;            // JSON params applied to each per-camera tracker

        // Detection stabilization config
        bool   _stabilizationEnabled = false;
        size_t _stabilizationQueueSize = 20;              // history depth (frames)
        int    _stabilizationMaxConsecutiveMisses = 5;    // stop interpolating after N misses
        float  _stabilizationConfidenceDecay = 0.85f;     // per-miss confidence multiplier
        float  _stabilizationMinConfidence = 0.15f;       // floor below which ghosts are dropped

        // #1 Confidence hysteresis: two-threshold system
        float  _hysteresisEnterThreshold = 0.0f;         // 0 = auto (use detectionScoreThreshold)
        float  _hysteresisKeepThreshold  = 0.0f;         // 0 = auto (enterThreshold * 0.65)

        // #2 Temporal confidence smoothing (EMA)
        float  _emaAlpha = 0.3f;                          // EMA weight for new observation (0..1)

        // #5 Track-aware confidence boost for established tracks
        int    _trackBoostMinFrames = 10;                 // frames before boost kicks in
        float  _trackBoostAmount = 0.05f;                 // confidence bonus for established tracks

        // #7 Class consistency — prevent sudden class switches on established tracks
        int    _classConsistencyMinFrames = 5;            // consecutive frames of new class required to accept switch
        // Adaptive screens
        struct ImageSection {
            cv::Rect region;
            int priority;
            ImageSection(const cv::Rect& r) : region(r), priority(0) {}
        };
        cv::Size previousImageSize = cv::Size(0, 0);

		// For active windows
        std::vector<ImageSection>   cachedSections;
        int                         _currentPriority{ 0 }; // None 
        cv::Rect                    _detectedArea;// This is active windows.

        int                         _retainDetectedArea{ 0 };
        bool				        _isObjectDetected{ false };
        struct CameraData
        {
            std::deque<std::vector<Object>> _detectionQueue; // That stores the detection results

            // Per-camera tracker instance (lazy-created in ApplyTracking)
            ANSCENTER::ANSMOT* _tracker = nullptr;

            // ── Stabilization state ──────────────────────────────
            struct TrackedObjectHistory {
                int classId = 0;
                std::string className;
                std::string extraInfo;       // preserve last-known extraInfo for ghost objects
                cv::Rect lastBox;
                float lastConfidence = 0.f;
                float smoothedConfidence = 0.f; // EMA-smoothed confidence
                int consecutiveMisses = 0;   // frames since last raw detection
                int totalDetections = 0;     // lifetime detection count
                int frameFirstSeen = 0;      // frame counter when first detected
                bool isEstablished = false;  // true once totalDetections >= trackBoostMinFrames

                // #7 Class consistency — resist sudden class switches
                int pendingClassId = -1;           // candidate new class (-1 = none)
                std::string pendingClassName;
                int pendingClassStreak = 0;        // consecutive frames with pendingClass
            };
            std::unordered_map<int, TrackedObjectHistory> _trackHistories; // trackId -> history
            int _stabilizationFrameCounter = 0;

            void clear()
            {
                if (_tracker) {
                    ReleaseANSMOTHandle(&_tracker);
                    _tracker = nullptr;
                }
                for (auto& detectionVector : _detectionQueue)
                {
                    detectionVector.clear(); // Clear each vector of Objects
                }
                _detectionQueue.clear(); // Clear the deque itself
                _trackHistories.clear();
                _stabilizationFrameCounter = 0;
            }
        };
        // Multi-camera data storage
        std::unordered_map<std::string, CameraData>     _cameras;
		CameraData _defaultCamera; // Default camera data if camera_id is not provided

        // Tracker: convert detections → TrackerObject, run tracker, assign trackIds via IoU match
        std::vector<Object> ApplyTracking(std::vector<Object>& detections, const std::string& camera_id);

        // Stabilize detections: fill gaps with tracker-predicted objects, apply confidence decay
        std::vector<Object> StabilizeDetections(std::vector<Object>& detections, const std::string& camera_id);

        // extraInfo stabilization tag helpers
        static void TagStabilized(std::string& extraInfo);
        static void UntagStabilized(std::string& extraInfo);
        static bool IsTaggedStabilized(const std::string& extraInfo);

        // Other functions can be used;
		bool isSimilarObject(const Object& obj1, const Object& obj2);
        bool isOverlayObject(const Object& obj1, const Object& obj2);
        // Helper: returns the Euclidean distance between two points.
        float distance(const cv::Point2f& a, const cv::Point2f& b) {
            float dx = a.x - b.x;
            float dy = a.y - b.y;
            return std::sqrt(dx * dx + dy * dy);
        }
        // Compute the union (bounding box) of two rectangles.
        cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) {
            int x = std::min(a.x, b.x);
            int y = std::min(a.y, b.y);
            int x2 = std::max(a.x + a.width, b.x + b.width);
            int y2 = std::max(a.y + a.height, b.y + b.height);
            return cv::Rect(x, y, x2 - x, y2 - y);
        }
        cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
        // Check if two rectangles overlap (i.e. intersection area > 0).
        bool isOverlap(const cv::Rect& a, const cv::Rect& b) {
            return ((a & b).area() > 0);
        }
        std::string GetOpenVINODevice(ov::Core &core);

        // Function to seperate screen size
        double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect);
        std::vector<ImageSection> divideImage(const cv::Mat& image);
        std::vector<ImageSection> createSlideScreens(const cv::Mat& image);
        int getHighestPriorityRegion();
        int getLowestPriorityRegion();
        cv::Rect getRegionByPriority(int priority);
        std::vector<Object> AdjustDetectedBoundingBoxes(const std::vector<Object>& detectionsInROI,
            const cv::Rect& roi, const cv::Size& fullImageSize,
            float aspectRatio = 0.9f, // width at least 2x height
            int padding = 10 // base padding
        );
        void UpdateNoDetectionCondition();
        void UpdateActiveROI(const cv::Mat& frame, ANSCENTER::Object detectedObj);
		bool IsValidObject(const Object& obj, std::vector<int> objectIds);
    public:
        [[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
        [[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
        [[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap);
        [[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        [[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input) = 0;
        [[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id) = 0;
        [[nodiscard]] virtual std::vector<std::vector<Object>> RunInferencesBatch(const std::vector<cv::Mat>& inputs, const std::string& camera_id);

        [[nodiscard]] std::vector<Object>         RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, const std::string& camera_id);
        [[nodiscard]] std::vector<Object>         RunInference(const cv::Mat& input, std::vector<cv::Point> Polygon, const std::string& camera_id);
		[[nodiscard]] std::vector<Object>         RunInferences(const cv::Mat& input, int tiledWidth, int tiledHeight, double overLap, const std::string& camera_id); // split image to slides and run inference
        [[nodiscard]] std::vector<Object>         RunInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, const std::string& camera_id) ;
        [[nodiscard]] std::vector<Object>         RunTiledInferenceFromJpegString(const char* jpegData, unsigned long jpegSize, int tiledWith, int tiledHeight, double overLap, const std::string& camera_id);
        [[nodiscard]] std::vector<Object>         DetectMovement(const cv::Mat& input, const std::string& camera_id);
        [[nodiscard]] std::vector<cv::Rect>       GenerateFixedROIs(const std::vector<Object>& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
        [[nodiscard]] cv::Rect                    GenerateMinimumSquareBoundingBox(const std::vector<ANSCENTER::Object>& detectedObjects, int minSize = 640);
        void                                      UpdateAndFilterDetectionObjects(std::vector<Object>& detectionObjects, int threshold);
        [[nodiscard]] bool                        ContainsIntersectingObject(const std::vector<Object>& movementObjects, const Object& result);
        [[nodiscard]] cv::Rect                    GetActiveWindow(const cv::Mat& input);
        [[nodiscard]] Params                      GetParameters() { return _params; } //
        [[nodiscard]] virtual bool                ConfigureParameters(Params& param); //
        [[nodiscard]] virtual bool                SetParameters(const Params& param); //ANSVIS will set the parameters
		[[nodiscard]] bool    		              UpdateDetectionThreshold(float detectionScore);
        [[nodiscard]] std::vector<Object>         RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode);// Get detected objects
        // New API to suppor dynamic inference
        [[nodiscard]] std::vector<Object> RunDynamicInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id);
        [[nodiscard]] std::vector<Object> RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id);
		void SetLoadEngineOnCreation(bool loadEngineOnCreation) { _loadEngineOnCreation = loadEngineOnCreation; }
        virtual void SetMaxSlotsPerGpu(int n) {}  // override in TRT-based subclasses
        void SetSkipEngineCache(bool skip) { _skipEngineCache = skip; }  // propagated to Engine<T> before buildLoadNetwork
        void SetForceNoPool(bool force) { _forceNoPool = force; }  // propagated to Engine<T> before buildLoadNetwork

        /// Enable/disable internal debug benchmarking.
        /// When enabled, per-stage timing (preprocess, inference, postprocess, tracking, etc.)
        /// is logged via _logger at info level for every inference call.
        void ActivateDebugger(bool debugFlag) { _debugFlag = debugFlag; }

        // Multi-object tracker (MOT) control
        bool SetTracker(TrackerType trackerType, bool enabled);

        // Detection stabilization control
        // Auto-enables tracker if not already enabled. Tracker auto-enables stabilization too.
        bool SetStabilization(bool enabled, int historySize = 20, int maxMisses = 5);

        // Fine-tune all stabilization parameters at once (JSON input).
        // Keys (all optional — omit to keep current value):
        //   "hysteresis_enter"       : float  — confidence to start tracking (0=auto from model threshold)
        //   "hysteresis_keep"        : float  — confidence to keep tracking  (0=auto, 65% of enter)
        //   "ema_alpha"              : float  — EMA weight for new observation (0..1, default 0.3)
        //   "track_boost_min_frames" : int    — frames before boost kicks in (default 10)
        //   "track_boost_amount"     : float  — confidence bonus for established tracks (default 0.05)
        //   "class_consistency_frames": int   — consecutive frames of new class to accept switch (default 5)
        //   "confidence_decay"       : float  — per-miss decay multiplier for ghosts (default 0.85)
        //   "min_confidence"         : float  — floor below which ghosts are dropped (default 0.15)
        bool SetStabilizationParameters(const std::string& jsonParams);

        bool SetTrackerParameters(const std::string& jsonParams);

        /// Set the text prompt for segmentation (pre-tokenized).
        /// Override in subclasses that support text-prompted segmentation (e.g. ANSSAM3, ANSONNXSAM3).
        virtual bool SetPrompt(const std::vector<int64_t>& inputIds,
            const std::vector<int64_t>& attentionMask) {
            return true;
        }

        /// Set the text prompt by tokenizing the given text.
        /// Requires merges.txt (CLIP BPE vocabulary) in the model folder.
        /// Override in subclasses that support text-prompted segmentation.
        virtual bool SetPrompt(const std::string& text) { return true; }

        std::vector<Object> _detectedObjects;
        [[nodiscard]] ModelConfig         GetModelConfig();
        // Function to add or retrieve camera data by ID
        CameraData& GetCameraData(const std::string& cameraId) {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            try
            {
                if (_cameras.empty())
                {
                    std::cerr << "Warning: _cameras is initially empty." << std::endl;
                }

                // Use try_emplace to insert a default CameraData if cameraId does not exist
                auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{});

                if (inserted)
                {
                    std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl;
                }

                return iterator->second; // Return the reference to CameraData
            }
            catch (const std::exception& ex)
            {
                std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl;
				return _defaultCamera;
            }
        }
        void EnqueueDetection(const std::vector<Object>& detectedObjects, const std::string& cameraId);
        [[nodiscard]] bool RunInference(const cv::Mat& input, const std::string& camera_id, std::string& detectionResult);
        [[nodiscard]] std::deque<std::vector<Object>> DequeueDetection(const std::string& cameraId);
        [[nodiscard]] virtual bool Destroy()=0;
        ~ANSODBase();  
        protected:
            std::vector<Object>         RunInferenceInScanningMode(const cv::Mat& input, const std::string& camera_id);// Get detected objects
            std::vector<Object>         RunInferenceInTrackingMode(const cv::Mat& input, const std::string& camera_id, std::vector<int> trackingObjectIds);// Get detected objects
    };
    class ANSENGINE_API ANSFDBase {
    protected:
        bool                        _licenseValid{ false };
        bool                        _isInitialized{ false };
        std::string                 _licenseKey;
        std::string                 _modelFolder;
        std::string                 _faceAttrModelFolder;

        ModelConfig                 _modelConfig;
        std::string                 _modelConfigFile;
        std::string                 _imageProcessingModelFile;
        MoveDetectsHandler          _handler;
        const size_t     QUEUE_SIZE = 10;
        std::recursive_mutex	    _mutex;
        ANSCENTER::EngineType	    engineType;

        bool                        _facelivenessEngineValid{ false };
        Ort::Env* _ortLivenessEnv = nullptr;
        Ort::SessionOptions* _ortLivenessSessionOptions = nullptr;
        Ort::Session* _livenessSession = nullptr;
        std::string _livenessInputName;
        std::string _livenessOutputName;
        ANSCENTER::ANSMOT* _faceTracker = nullptr;
#ifdef USE_TV_MODEL
        ANSCENTER::ANSMOT* _tvTracker = nullptr;
        std::unique_ptr<ANSODBase>_tvDetector = nullptr;
        std::vector<Object>  TrackTVScreens(const std::vector<Object>& tvObjects);
        bool InsideScreen(const cv::Rect& tvBox, const cv::Rect& faceBox);
#endif
        bool _useTvDetector{ false };
        std::unordered_map<int, int>  _mMissingTrackFrames; // Track ID and number of missing face
        std::unordered_map<int, int>  _mMissingTrackScreen; // Track ID and number of missing screen
        std::unordered_map<int, std::deque<int>> _mTrackHistory; //History of liveness attribute
        std::unordered_map<int, std::vector<Object>> _mTrackScreen; //History of Screen's position
        // Adaptive screens
        struct ImageSection {
            cv::Rect region;
            int priority;
            ImageSection(const cv::Rect& r) : region(r), priority(0) {}
        };
        cv::Size previousImageSize = cv::Size(0, 0);
        std::vector<ImageSection> cachedSections;
        int                     _currentPriority{ 0 }; // None 
        cv::Rect                _detectedArea;// Area where license plate are detected

        //AsyncPipeline*  _pipeline = nullptr;
        SPDLogger& _logger = SPDLogger::GetInstance("ANSFD", false);
        EngineType      _engineType;
        void            CheckLicense();
        void            Cleanup();
        // Other functions can be used;
        bool            isSimilarObject(const Object& obj1, const Object& obj2);
        bool            isOverlayObject(const Object& obj1, const Object& obj2);
        struct CameraData
        {
            std::deque<std::vector<Object>> _detectionQueue; // That stores the detection results
            void clear()
            {
                for (auto& detectionVector : _detectionQueue)
                {
                    detectionVector.clear(); // Clear each vector of Objects
                }
                _detectionQueue.clear(); // Clear the deque itself
            }
        };
        CameraData _defaultCamera;

        // Helper: returns the Euclidean distance between two points.
        float distance(const cv::Point2f& a, const cv::Point2f& b) {
            float dx = a.x - b.x;
            float dy = a.y - b.y;
            return std::sqrt(dx * dx + dy * dy);
        }
        // Compute the union (bounding box) of two rectangles.
        cv::Rect unionRect(const cv::Rect& a, const cv::Rect& b) {
            int x = std::min(a.x, b.x);
            int y = std::min(a.y, b.y);
            int x2 = std::max(a.x + a.width, b.x + b.width);
            int y2 = std::max(a.y + a.height, b.y + b.height);
            return cv::Rect(x, y, x2 - x, y2 - y);
        }
        cv::Rect computeCandidateROI(const cv::Rect& unionBox, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
        // Check if two rectangles overlap (i.e. intersection area > 0).
        bool isOverlap(const cv::Rect& a, const cv::Rect& b) {
            return ((a & b).area() > 0);
        }
        bool isValidFace(const std::vector<cv::Point2f>& landmarks, const cv::Rect& faceRect, float maxEyeAngle = 25, int offsetX = 0, int offsetY = 0, const cv::Mat& frame = cv::Mat(), float minBlurScore = 15.0f);
        // Multi-camera data storage
        std::unordered_map<std::string, CameraData>     _cameras;
        //cv::Mat         EnhanceImage(const cv::Mat inputImage, int cropedImageSize);
        cv::Mat GetCroppedFaceScale(const cv::Mat& image, const int x1, const int y1, const int x2, const int y2, int cropedImageSize);
        std::string GetOpenVINODevice(ov::Core& core);

        cv::Mat Preprocess(cv::Mat& input_mat, std::vector<cv::Point2f>& face_landmark_5, cv::Mat& preprocessed_mat);

        // Function to seperate screen size
        double calculateDistanceToCenter(const cv::Point& center, const cv::Rect& rect);
        std::vector<ImageSection> divideImage(const cv::Mat& image);
        std::vector<ANSFDBase::ImageSection> createSlideScreens(const cv::Mat& image);
        int getHighestPriorityRegion();
        int getLowestPriorityRegion();
        cv::Rect getRegionByPriority(int priority);
        std::vector<Object> AdjustDetectedBoundingBoxes(const std::vector<Object>& detectionsInROI,
            const cv::Rect& roi, const cv::Size& fullImageSize,
            float aspectRatio = 0.9f, // width at least 2x height
            int padding = 10 // base padding
        );


        // Track faces
        std::vector<Object>  TrackFaces(const cv::Mat& inputImage, const std::vector<Object>& faceObjects);
        template<typename MapTrackData, typename MapMissingFrames>
        void CleanUpTracks(std::vector<Object>& currentObjects,
            MapTrackData& trackDataMap,
            MapMissingFrames& missingFramesMap,
            int maxMissing,
            int maxTracks);
    public:
        [[nodiscard]] virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap);
        [[nodiscard]] virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword);
        [[nodiscard]] virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap);
        [[nodiscard]] virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        [[nodiscard]] virtual std::vector<ANSCENTER::Object> RunInference(const cv::Mat& input, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0;
        [[nodiscard]] virtual std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage = true, bool validateFace = false, bool facelivenessCheck = true) = 0;
        [[nodiscard]] virtual bool Destroy() = 0;
        [[nodiscard]] std::vector<Object>     DetectMovement(const cv::Mat& input, const std::string& camera_id);
        [[nodiscard]] cv::Rect                GenerateMinimumSquareBoundingBox(const std::vector<ANSCENTER::Object>& detectedObjects, int minSize = 640);
        [[nodiscard]] std::vector<cv::Rect>   GenerateFixedROIs(const std::vector<Object>& movementObjects, int fixedWidth, int fixedHeight, int imageWidth, int imageHeight);
        [[nodiscard]] bool                    ContainsIntersectingObject(const std::vector<Object>& movementObjects, const Object& result);
        void                    UpdateAndFilterDetectionObjects(std::vector<Object>& detectionObjects, int threshold);
        [[nodiscard]] bool                    UpdateDetectionThreshold(float detectionScore);
        [[nodiscard]] float 			        GetDetectionThreshold();
        [[nodiscard]] ModelConfig             GetModelConfig();
        // Function to add or retrieve camera data by ID
        CameraData& GetCameraData(const std::string& cameraId)
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            try
            {
                if (_cameras.empty())
                {
                    std::cerr << "Warning: _cameras is initially empty." << std::endl;
                }

                // Use try_emplace to insert a default CameraData if cameraId does not exist
                auto [iterator, inserted] = _cameras.try_emplace(cameraId, CameraData{});

                if (inserted)
                {
                    std::cout << "Added new CameraData for cameraId: " << cameraId << std::endl;
                }

                return iterator->second; // Return the reference to CameraData
            }
            catch (const std::exception& ex)
            {
                std::cerr << "Exception in GetCameraData: " << ex.what() << std::endl;
                return _defaultCamera;
            }
        }
        void EnqueueDetection(const std::vector<Object>& detectedObjects, const std::string& cameraId);
        std::deque<std::vector<Object>> DequeueDetection(const std::string& cameraId);
        // Face liveness functions
        bool LoadLivenessModel(std::string antiSpoofModelPath, bool isGPU = true);
        bool InitializeLivenessModel(std::string licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword);
        std::pair<int, float> PredictLiveness(const cv::Mat& faceImage);
        std::pair<int, float> LivenessPostProcessing(const float* pOutput);

        std::vector<Object> ValidateLivenessFaces(const cv::Mat& inputImage, const std::vector<Object>& faceObjects, const std::string& camera_id);
        float ComputeIoU(const cv::Rect& a, const cv::Rect& b);
        virtual void SetMaxSlotsPerGpu(int n) {}  // override in TRT-based subclasses
        ~ANSFDBase();
    };
}

// Unicode conversion utilities for LabVIEW wrapper classes
extern "C" ANSENGINE_API int ANSEngine_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result);
extern "C" ANSENGINE_API int ANSEngine_ConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result);

#endif