ANSCORE/modules/ANSODEngine/Movienet.cpp

#include "Movienet.h"
#include <filesystem>
namespace ANSCENTER {

    // Extract input size from _A<digit> suffix just before the file extension.
    //   _A0 / _A1 → 172    _A2 → 224    _A3 → 256    _A4 → 290    _A5 → 320
    // Examples:  Violence0305_A5.onnx → 320
    //            MyModel0908_A4.onnx  → 290
    // Returns 0 if the suffix is not recognised.
    static int GetMovinetSizeFromFilename(const std::string& filename)
    {
        static constexpr int variantSize[] = {
            172,  // A0
            172,  // A1
            224,  // A2
            256,  // A3
            290,  // A4
            320   // A5
        };

        // Strip extension — find last '.'
        const size_t dotPos = filename.rfind('.');
        if (dotPos == std::string::npos || dotPos < 3) return 0;

        // Expect 3-char suffix just before the dot: _A<digit>
        const char chUnderscore = filename[dotPos - 3];
        const char chA          = filename[dotPos - 2];
        const char chDigit      = filename[dotPos - 1];

        if (chUnderscore == '_' &&
            (chA == 'A' || chA == 'a') &&
            chDigit >= '0' && chDigit <= '5')
        {
            return variantSize[chDigit - '0'];
        }
        return 0;
    }

    // Scan folder for any .onnx whose stem ends with _A0.._A5 (highest first).
    // Returns the full path + matching input dimensions.
    // If modelName is provided (e.g. "Violence0305_A2"), uses that exact model
    // and resolves dimensions from its _A<digit> suffix.
    // Falls back to "movinet.onnx" with 172×172 if nothing matches.
    static std::string ResolveMovinetModel(const std::string& folder,
                                           int& outWidth, int& outHeight,
                                           const std::string& modelName = "")
    {
        // If a specific model name was given, use it directly
        if (!modelName.empty()) {
            std::string fname = modelName;
            // Append .onnx if not already present
            if (fname.size() < 5 || fname.substr(fname.size() - 5) != ".onnx") {
                fname += ".onnx";
            }
            std::string fullPath = CreateFilePath(folder, fname);
            int sz = GetMovinetSizeFromFilename(fname);
            if (sz > 0) {
                outWidth  = sz;
                outHeight = sz;
				std::cout << "ANSMOVIENET: Using specified model '" << fname << "' with input size " << sz << "x" << sz << "\n";
            }
            else {
                // Model name given but no recognized _A<digit> suffix — use default
                outWidth  = 172;
                outHeight = 172;
				std::cout << "ANSMOVIENET: Using specified model '" << fname << "' but failed to detect input size from filename. Defaulting to 172x172.\n";
            }
            return fullPath;
        }

        // No model name specified — scan folder, highest variant first
        static const std::string suffixes[] = { "_A5", "_A4", "_A3", "_A2", "_A1", "_A0" };

        try {
            namespace fs = std::filesystem;
            if (fs::is_directory(folder)) {
                for (const auto& suffix : suffixes) {
                    for (const auto& entry : fs::directory_iterator(folder)) {
                        if (!entry.is_regular_file()) continue;
                        const std::string fname = entry.path().filename().string();
                        // Must be .onnx
                        if (fname.size() < 5) continue;
                        std::string ext = fname.substr(fname.size() - 5);
                        // Case-insensitive .onnx check
                        for (auto& c : ext) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
                        if (ext != ".onnx") continue;

                        // Check if stem ends with the current suffix
                        const std::string stem = fname.substr(0, fname.size() - 5);
                        if (stem.size() >= suffix.size() &&
                            stem.compare(stem.size() - suffix.size(), suffix.size(), suffix) == 0)
                        {
                            int sz = GetMovinetSizeFromFilename(fname);
                            if (sz > 0) {
                                outWidth  = sz;
                                outHeight = sz;
                                return entry.path().string();
                            }
                        }
                    }
                }
            }
        }
        catch (...) {
            // Filesystem error — fall through to legacy
			std::cout << "ANSMOVIENET: Error scanning model folder '. Falling back to default model.\n";
        }

        // Legacy fallback
        outWidth  = 172;
        outHeight = 172;
        return CreateFilePath(folder, "movinet.onnx");
    }

    bool ANSMOVIENET::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
        bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
        labelMap = "Face";
        _licenseValid = true;
        std::vector<std::string> labels{ labelMap };
        if (!_licenseValid) return false;
        try {
            _modelConfig = modelConfig;
            _modelConfig.modelType = ModelType::MOVIENET;
            _modelConfig.detectionType = DetectionType::CLASSIFICATION;

            // Auto-detect model variant and matching input size
            int detectedW = 0, detectedH = 0;
            std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH);
            _modelConfig.inpHeight = detectedH;
            _modelConfig.inpWidth = detectedW;

            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_isInitialized) {
                _movienet_detector.reset();
                _isInitialized = false;
            }
            unsigned int numThreads = 1;
            this->_movienet_detector = std::make_unique<MOVINET>(
                onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);
            _isInitialized = true;
            return true;
        }
        catch (const std::exception& e) {
            this->_logger.LogFatal("ANSMOVIENET::Initialize", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSMOVIENET::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
        try {
            bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
            if (!result) return false;

            int detectedW = 0, detectedH = 0;
            std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH);
            _modelConfig.inpWidth  = detectedW;
            _modelConfig.inpHeight = detectedH;

            unsigned int numThreads = 1;
            _movienet_detector = std::make_unique<MOVINET>(
                onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);

            _isInitialized = true;
            return _isInitialized;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSMOVIENET::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
        try {
            bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
            if (!result) return false;

            _modelConfig = modelConfig;
            _modelConfig.modelType = ModelType::MOVIENET;
            _modelConfig.detectionType = DetectionType::CLASSIFICATION;

            // Resolve model path and input dimensions.
            // If modelName is given (e.g. "Violence0305_A2"), uses that exact model;
            // otherwise scans the folder for the best _A<digit> variant.
            int detectedW = 0, detectedH = 0;
            std::string onnxModel = ResolveMovinetModel(modelFolder, detectedW, detectedH, modelName);
            _modelConfig.inpWidth  = detectedW;
            _modelConfig.inpHeight = detectedH;

            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_isInitialized) {
                _movienet_detector.reset();
                _isInitialized = false;
            }
            unsigned int numThreads = 1;
            this->_movienet_detector = std::make_unique<MOVINET>(
                onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);
            _isInitialized = true;
            return _isInitialized;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSMOVIENET::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
        if (FileExist(_modelFilePath)) {
            optimizedModelFolder = GetParentFolder(_modelFilePath);
            this->_logger.LogDebug("ANSMOVIENET::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
            return true;
        }
        else {
            optimizedModelFolder = "";
            this->_logger.LogFatal("ANSMOVIENET::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
            return false;
        }
    }
    ANSMOVIENET::~ANSMOVIENET() {
        try {
            Destroy();
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__);
        }
    }
    bool ANSMOVIENET::Destroy() {
        try {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            _cameraQueues.clear();
            _globalFrameCounter = 0;
            _movienet_detector.reset();
            _isInitialized = false;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__);
            return false;
        }
    }

	// Inference functions
    std::vector<Object> ANSMOVIENET::RunInference(const cv::Mat& input, const std::string& camera_id) {
        std::vector<Object> result = Inference(input, camera_id);
        if (_trackerEnabled) {
            result = ApplyTracking(result, camera_id);
            if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
        }
        return result;
    }
    std::vector<Object> ANSMOVIENET::RunInference(const cv::Mat& input) {
        return Inference(input, "MovienetCam");
    }

    void ANSMOVIENET::CleanupStaleQueues() {
        // Called internally <20> already under lock from Inference()
        if (_cameraQueues.empty()) return;
        // 1. Remove queues not accessed for STALE_THRESHOLD frames
        for (auto it = _cameraQueues.begin(); it != _cameraQueues.end(); ) {
            int age = _globalFrameCounter - it->second.lastAccessFrame;
            if (age > STALE_THRESHOLD) {
                it = _cameraQueues.erase(it);
            }
            else {
                ++it;
            }
        }

        // 2. If still over hard cap, remove oldest queues first
        if (static_cast<int>(_cameraQueues.size()) > MAX_QUEUES) {
            // Collect and sort by last access time
            std::vector<std::pair<int, std::string>> accessTimes;
            accessTimes.reserve(_cameraQueues.size());
            for (const auto& [key, state] : _cameraQueues) {
                accessTimes.emplace_back(state.lastAccessFrame, key);
            }
            std::sort(accessTimes.begin(), accessTimes.end());

            // Remove oldest until under cap
            int toRemove = static_cast<int>(_cameraQueues.size()) - MAX_QUEUES;
            for (int i = 0; i < toRemove && i < static_cast<int>(accessTimes.size()); ++i) {
                 _cameraQueues.erase(accessTimes[i].second);
            }
        }
    }
    std::vector<Object> ANSMOVIENET::Inference(const cv::Mat& input, const std::string& camera_id) {
        std::lock_guard<std::recursive_mutex> lock(_mutex);
        std::vector<Object> detectedObjects;
        if (!_isInitialized || !_licenseValid || !_movienet_detector) {
            this->_logger.LogError("ANSMOVIENET::Inference",
                "Model is not initialized or license is not valid", __FILE__, __LINE__);
            return detectedObjects;
        }
        if (input.empty()) {
            this->_logger.LogError("ANSMOVIENET::Inference",
                "Input frame is empty", __FILE__, __LINE__);
            return detectedObjects;
        }

        try {
            _globalFrameCounter++;

            std::string cameraKey = camera_id.empty() ? "" : camera_id;
            auto& state = _cameraQueues[cameraKey];

            // Update access timestamp
            state.lastAccessFrame = _globalFrameCounter;
            state.frameCount++;

            // Add frame to queue
            state.frames.push_back(input.clone());

            // Maintain queue at TEMPORAL_LENGTH
            while (state.frames.size() > static_cast<size_t>(TEMPORAL_LENGTH)) {
                state.frames.pop_front();
            }

            // Run inference when:
            // 1. Full window available
            // 2. Stride condition met
            bool hasFullWindow = (state.frames.size() == static_cast<size_t>(TEMPORAL_LENGTH));
            bool strideReady = (state.frameCount == TEMPORAL_LENGTH)
                || (state.frameCount > TEMPORAL_LENGTH
                    && (state.frameCount - TEMPORAL_LENGTH) % _inferenceStride == 0);

            if (hasFullWindow && strideReady) {
                std::pair<int, float> result;
                _movienet_detector->inference(state.frames, result);

                if (result.first >= 0) {
                    Object obj;
                    obj.classId = result.first;
                    obj.className = (result.first < static_cast<int>(_classes.size())) ?
                        _classes[result.first] : "Unknown";
                    obj.confidence = result.second;
                    obj.box = cv::Rect(0, 0, input.cols, input.rows);
                    detectedObjects.push_back(obj);
                }
            }

            // Store in base class camera data
            CameraData& cameraData = GetCameraData(cameraKey);
            cameraData._detectionQueue.push_back(detectedObjects);
            if (cameraData._detectionQueue.size() > QUEUE_SIZE) {
                cameraData._detectionQueue.pop_front();
            }

            // ----- Periodic self-cleanup -----
            if (_globalFrameCounter % CLEANUP_INTERVAL == 0) {
                CleanupStaleQueues();
            }
        }
        catch (const std::exception& e) {
            this->_logger.LogError("ANSMOVIENET::Inference",
                std::string("Exception during inference: ") + e.what(), __FILE__, __LINE__);
            detectedObjects.clear();
        }
        catch (...) {
            this->_logger.LogError("ANSMOVIENET::Inference",
                "Unknown exception during inference", __FILE__, __LINE__);
            detectedObjects.clear();
        }

        return detectedObjects;
    }

}