#include "Movienet.h" #include namespace ANSCENTER { // Extract input size from _A suffix just before the file extension. // _A0 / _A1 → 172 _A2 → 224 _A3 → 256 _A4 → 290 _A5 → 320 // Examples: Violence0305_A5.onnx → 320 // MyModel0908_A4.onnx → 290 // Returns 0 if the suffix is not recognised. static int GetMovinetSizeFromFilename(const std::string& filename) { static constexpr int variantSize[] = { 172, // A0 172, // A1 224, // A2 256, // A3 290, // A4 320 // A5 }; // Strip extension — find last '.' const size_t dotPos = filename.rfind('.'); if (dotPos == std::string::npos || dotPos < 3) return 0; // Expect 3-char suffix just before the dot: _A const char chUnderscore = filename[dotPos - 3]; const char chA = filename[dotPos - 2]; const char chDigit = filename[dotPos - 1]; if (chUnderscore == '_' && (chA == 'A' || chA == 'a') && chDigit >= '0' && chDigit <= '5') { return variantSize[chDigit - '0']; } return 0; } // Scan folder for any .onnx whose stem ends with _A0.._A5 (highest first). // Returns the full path + matching input dimensions. // If modelName is provided (e.g. "Violence0305_A2"), uses that exact model // and resolves dimensions from its _A suffix. // Falls back to "movinet.onnx" with 172×172 if nothing matches. static std::string ResolveMovinetModel(const std::string& folder, int& outWidth, int& outHeight, const std::string& modelName = "") { // If a specific model name was given, use it directly if (!modelName.empty()) { std::string fname = modelName; // Append .onnx if not already present if (fname.size() < 5 || fname.substr(fname.size() - 5) != ".onnx") { fname += ".onnx"; } std::string fullPath = CreateFilePath(folder, fname); int sz = GetMovinetSizeFromFilename(fname); if (sz > 0) { outWidth = sz; outHeight = sz; std::cout << "ANSMOVIENET: Using specified model '" << fname << "' with input size " << sz << "x" << sz << "\n"; } else { // Model name given but no recognized _A suffix — use default outWidth = 172; outHeight = 172; std::cout << "ANSMOVIENET: Using specified model '" << fname << "' but failed to detect input size from filename. Defaulting to 172x172.\n"; } return fullPath; } // No model name specified — scan folder, highest variant first static const std::string suffixes[] = { "_A5", "_A4", "_A3", "_A2", "_A1", "_A0" }; try { namespace fs = std::filesystem; if (fs::is_directory(folder)) { for (const auto& suffix : suffixes) { for (const auto& entry : fs::directory_iterator(folder)) { if (!entry.is_regular_file()) continue; const std::string fname = entry.path().filename().string(); // Must be .onnx if (fname.size() < 5) continue; std::string ext = fname.substr(fname.size() - 5); // Case-insensitive .onnx check for (auto& c : ext) c = static_cast(std::tolower(static_cast(c))); if (ext != ".onnx") continue; // Check if stem ends with the current suffix const std::string stem = fname.substr(0, fname.size() - 5); if (stem.size() >= suffix.size() && stem.compare(stem.size() - suffix.size(), suffix.size(), suffix) == 0) { int sz = GetMovinetSizeFromFilename(fname); if (sz > 0) { outWidth = sz; outHeight = sz; return entry.path().string(); } } } } } } catch (...) { // Filesystem error — fall through to legacy std::cout << "ANSMOVIENET: Error scanning model folder '. Falling back to default model.\n"; } // Legacy fallback outWidth = 172; outHeight = 172; return CreateFilePath(folder, "movinet.onnx"); } bool ANSMOVIENET::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); labelMap = "Face"; _licenseValid = true; std::vector labels{ labelMap }; if (!_licenseValid) return false; try { _modelConfig = modelConfig; _modelConfig.modelType = ModelType::MOVIENET; _modelConfig.detectionType = DetectionType::CLASSIFICATION; // Auto-detect model variant and matching input size int detectedW = 0, detectedH = 0; std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH); _modelConfig.inpHeight = detectedH; _modelConfig.inpWidth = detectedW; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_isInitialized) { _movienet_detector.reset(); _isInitialized = false; } unsigned int numThreads = 1; this->_movienet_detector = std::make_unique( onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads); _isInitialized = true; return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSMOVIENET::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSMOVIENET::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; int detectedW = 0, detectedH = 0; std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH); _modelConfig.inpWidth = detectedW; _modelConfig.inpHeight = detectedH; unsigned int numThreads = 1; _movienet_detector = std::make_unique( onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads); _isInitialized = true; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSMOVIENET::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; _modelConfig = modelConfig; _modelConfig.modelType = ModelType::MOVIENET; _modelConfig.detectionType = DetectionType::CLASSIFICATION; // Resolve model path and input dimensions. // If modelName is given (e.g. "Violence0305_A2"), uses that exact model; // otherwise scans the folder for the best _A variant. int detectedW = 0, detectedH = 0; std::string onnxModel = ResolveMovinetModel(modelFolder, detectedW, detectedH, modelName); _modelConfig.inpWidth = detectedW; _modelConfig.inpHeight = detectedH; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_isInitialized) { _movienet_detector.reset(); _isInitialized = false; } unsigned int numThreads = 1; this->_movienet_detector = std::make_unique( onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads); _isInitialized = true; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSMOVIENET::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { if (FileExist(_modelFilePath)) { optimizedModelFolder = GetParentFolder(_modelFilePath); this->_logger.LogDebug("ANSMOVIENET::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__); return true; } else { optimizedModelFolder = ""; this->_logger.LogFatal("ANSMOVIENET::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__); return false; } } ANSMOVIENET::~ANSMOVIENET() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__); } } bool ANSMOVIENET::Destroy() { try { std::lock_guard lock(_mutex); _cameraQueues.clear(); _globalFrameCounter = 0; _movienet_detector.reset(); _isInitialized = false; return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__); return false; } } // Inference functions std::vector ANSMOVIENET::RunInference(const cv::Mat& input, const std::string& camera_id) { std::vector result = Inference(input, camera_id); if (_trackerEnabled) { result = ApplyTracking(result, camera_id); if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id); } return result; } std::vector ANSMOVIENET::RunInference(const cv::Mat& input) { return Inference(input, "MovienetCam"); } void ANSMOVIENET::CleanupStaleQueues() { // Called internally � already under lock from Inference() if (_cameraQueues.empty()) return; // 1. Remove queues not accessed for STALE_THRESHOLD frames for (auto it = _cameraQueues.begin(); it != _cameraQueues.end(); ) { int age = _globalFrameCounter - it->second.lastAccessFrame; if (age > STALE_THRESHOLD) { it = _cameraQueues.erase(it); } else { ++it; } } // 2. If still over hard cap, remove oldest queues first if (static_cast(_cameraQueues.size()) > MAX_QUEUES) { // Collect and sort by last access time std::vector> accessTimes; accessTimes.reserve(_cameraQueues.size()); for (const auto& [key, state] : _cameraQueues) { accessTimes.emplace_back(state.lastAccessFrame, key); } std::sort(accessTimes.begin(), accessTimes.end()); // Remove oldest until under cap int toRemove = static_cast(_cameraQueues.size()) - MAX_QUEUES; for (int i = 0; i < toRemove && i < static_cast(accessTimes.size()); ++i) { _cameraQueues.erase(accessTimes[i].second); } } } std::vector ANSMOVIENET::Inference(const cv::Mat& input, const std::string& camera_id) { std::lock_guard lock(_mutex); std::vector detectedObjects; if (!_isInitialized || !_licenseValid || !_movienet_detector) { this->_logger.LogError("ANSMOVIENET::Inference", "Model is not initialized or license is not valid", __FILE__, __LINE__); return detectedObjects; } if (input.empty()) { this->_logger.LogError("ANSMOVIENET::Inference", "Input frame is empty", __FILE__, __LINE__); return detectedObjects; } try { _globalFrameCounter++; std::string cameraKey = camera_id.empty() ? "" : camera_id; auto& state = _cameraQueues[cameraKey]; // Update access timestamp state.lastAccessFrame = _globalFrameCounter; state.frameCount++; // Add frame to queue state.frames.push_back(input.clone()); // Maintain queue at TEMPORAL_LENGTH while (state.frames.size() > static_cast(TEMPORAL_LENGTH)) { state.frames.pop_front(); } // Run inference when: // 1. Full window available // 2. Stride condition met bool hasFullWindow = (state.frames.size() == static_cast(TEMPORAL_LENGTH)); bool strideReady = (state.frameCount == TEMPORAL_LENGTH) || (state.frameCount > TEMPORAL_LENGTH && (state.frameCount - TEMPORAL_LENGTH) % _inferenceStride == 0); if (hasFullWindow && strideReady) { std::pair result; _movienet_detector->inference(state.frames, result); if (result.first >= 0) { Object obj; obj.classId = result.first; obj.className = (result.first < static_cast(_classes.size())) ? _classes[result.first] : "Unknown"; obj.confidence = result.second; obj.box = cv::Rect(0, 0, input.cols, input.rows); detectedObjects.push_back(obj); } } // Store in base class camera data CameraData& cameraData = GetCameraData(cameraKey); cameraData._detectionQueue.push_back(detectedObjects); if (cameraData._detectionQueue.size() > QUEUE_SIZE) { cameraData._detectionQueue.pop_front(); } // ----- Periodic self-cleanup ----- if (_globalFrameCounter % CLEANUP_INTERVAL == 0) { CleanupStaleQueues(); } } catch (const std::exception& e) { this->_logger.LogError("ANSMOVIENET::Inference", std::string("Exception during inference: ") + e.what(), __FILE__, __LINE__); detectedObjects.clear(); } catch (...) { this->_logger.LogError("ANSMOVIENET::Inference", "Unknown exception during inference", __FILE__, __LINE__); detectedObjects.clear(); } return detectedObjects; } }