369 lines
15 KiB
C++
369 lines
15 KiB
C++
|
|
#include "Movienet.h"
|
|||
|
|
#include <filesystem>
|
|||
|
|
namespace ANSCENTER {
|
|||
|
|
|
|||
|
|
// Extract input size from _A<digit> suffix just before the file extension.
|
|||
|
|
// _A0 / _A1 → 172 _A2 → 224 _A3 → 256 _A4 → 290 _A5 → 320
|
|||
|
|
// Examples: Violence0305_A5.onnx → 320
|
|||
|
|
// MyModel0908_A4.onnx → 290
|
|||
|
|
// Returns 0 if the suffix is not recognised.
|
|||
|
|
static int GetMovinetSizeFromFilename(const std::string& filename)
|
|||
|
|
{
|
|||
|
|
static constexpr int variantSize[] = {
|
|||
|
|
172, // A0
|
|||
|
|
172, // A1
|
|||
|
|
224, // A2
|
|||
|
|
256, // A3
|
|||
|
|
290, // A4
|
|||
|
|
320 // A5
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// Strip extension — find last '.'
|
|||
|
|
const size_t dotPos = filename.rfind('.');
|
|||
|
|
if (dotPos == std::string::npos || dotPos < 3) return 0;
|
|||
|
|
|
|||
|
|
// Expect 3-char suffix just before the dot: _A<digit>
|
|||
|
|
const char chUnderscore = filename[dotPos - 3];
|
|||
|
|
const char chA = filename[dotPos - 2];
|
|||
|
|
const char chDigit = filename[dotPos - 1];
|
|||
|
|
|
|||
|
|
if (chUnderscore == '_' &&
|
|||
|
|
(chA == 'A' || chA == 'a') &&
|
|||
|
|
chDigit >= '0' && chDigit <= '5')
|
|||
|
|
{
|
|||
|
|
return variantSize[chDigit - '0'];
|
|||
|
|
}
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Scan folder for any .onnx whose stem ends with _A0.._A5 (highest first).
|
|||
|
|
// Returns the full path + matching input dimensions.
|
|||
|
|
// If modelName is provided (e.g. "Violence0305_A2"), uses that exact model
|
|||
|
|
// and resolves dimensions from its _A<digit> suffix.
|
|||
|
|
// Falls back to "movinet.onnx" with 172×172 if nothing matches.
|
|||
|
|
static std::string ResolveMovinetModel(const std::string& folder,
|
|||
|
|
int& outWidth, int& outHeight,
|
|||
|
|
const std::string& modelName = "")
|
|||
|
|
{
|
|||
|
|
// If a specific model name was given, use it directly
|
|||
|
|
if (!modelName.empty()) {
|
|||
|
|
std::string fname = modelName;
|
|||
|
|
// Append .onnx if not already present
|
|||
|
|
if (fname.size() < 5 || fname.substr(fname.size() - 5) != ".onnx") {
|
|||
|
|
fname += ".onnx";
|
|||
|
|
}
|
|||
|
|
std::string fullPath = CreateFilePath(folder, fname);
|
|||
|
|
int sz = GetMovinetSizeFromFilename(fname);
|
|||
|
|
if (sz > 0) {
|
|||
|
|
outWidth = sz;
|
|||
|
|
outHeight = sz;
|
|||
|
|
std::cout << "ANSMOVIENET: Using specified model '" << fname << "' with input size " << sz << "x" << sz << "\n";
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
// Model name given but no recognized _A<digit> suffix — use default
|
|||
|
|
outWidth = 172;
|
|||
|
|
outHeight = 172;
|
|||
|
|
std::cout << "ANSMOVIENET: Using specified model '" << fname << "' but failed to detect input size from filename. Defaulting to 172x172.\n";
|
|||
|
|
}
|
|||
|
|
return fullPath;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// No model name specified — scan folder, highest variant first
|
|||
|
|
static const std::string suffixes[] = { "_A5", "_A4", "_A3", "_A2", "_A1", "_A0" };
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
namespace fs = std::filesystem;
|
|||
|
|
if (fs::is_directory(folder)) {
|
|||
|
|
for (const auto& suffix : suffixes) {
|
|||
|
|
for (const auto& entry : fs::directory_iterator(folder)) {
|
|||
|
|
if (!entry.is_regular_file()) continue;
|
|||
|
|
const std::string fname = entry.path().filename().string();
|
|||
|
|
// Must be .onnx
|
|||
|
|
if (fname.size() < 5) continue;
|
|||
|
|
std::string ext = fname.substr(fname.size() - 5);
|
|||
|
|
// Case-insensitive .onnx check
|
|||
|
|
for (auto& c : ext) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|||
|
|
if (ext != ".onnx") continue;
|
|||
|
|
|
|||
|
|
// Check if stem ends with the current suffix
|
|||
|
|
const std::string stem = fname.substr(0, fname.size() - 5);
|
|||
|
|
if (stem.size() >= suffix.size() &&
|
|||
|
|
stem.compare(stem.size() - suffix.size(), suffix.size(), suffix) == 0)
|
|||
|
|
{
|
|||
|
|
int sz = GetMovinetSizeFromFilename(fname);
|
|||
|
|
if (sz > 0) {
|
|||
|
|
outWidth = sz;
|
|||
|
|
outHeight = sz;
|
|||
|
|
return entry.path().string();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
catch (...) {
|
|||
|
|
// Filesystem error — fall through to legacy
|
|||
|
|
std::cout << "ANSMOVIENET: Error scanning model folder '. Falling back to default model.\n";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Legacy fallback
|
|||
|
|
outWidth = 172;
|
|||
|
|
outHeight = 172;
|
|||
|
|
return CreateFilePath(folder, "movinet.onnx");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
bool ANSMOVIENET::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
|
|||
|
|
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|||
|
|
labelMap = "Face";
|
|||
|
|
_licenseValid = true;
|
|||
|
|
std::vector<std::string> labels{ labelMap };
|
|||
|
|
if (!_licenseValid) return false;
|
|||
|
|
try {
|
|||
|
|
_modelConfig = modelConfig;
|
|||
|
|
_modelConfig.modelType = ModelType::MOVIENET;
|
|||
|
|
_modelConfig.detectionType = DetectionType::CLASSIFICATION;
|
|||
|
|
|
|||
|
|
// Auto-detect model variant and matching input size
|
|||
|
|
int detectedW = 0, detectedH = 0;
|
|||
|
|
std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH);
|
|||
|
|
_modelConfig.inpHeight = detectedH;
|
|||
|
|
_modelConfig.inpWidth = detectedW;
|
|||
|
|
|
|||
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|||
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|||
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|||
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|||
|
|
if (_isInitialized) {
|
|||
|
|
_movienet_detector.reset();
|
|||
|
|
_isInitialized = false;
|
|||
|
|
}
|
|||
|
|
unsigned int numThreads = 1;
|
|||
|
|
this->_movienet_detector = std::make_unique<MOVINET>(
|
|||
|
|
onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::Initialize", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSMOVIENET::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|||
|
|
try {
|
|||
|
|
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|||
|
|
if (!result) return false;
|
|||
|
|
|
|||
|
|
int detectedW = 0, detectedH = 0;
|
|||
|
|
std::string onnxModel = ResolveMovinetModel(_modelFolder, detectedW, detectedH);
|
|||
|
|
_modelConfig.inpWidth = detectedW;
|
|||
|
|
_modelConfig.inpHeight = detectedH;
|
|||
|
|
|
|||
|
|
unsigned int numThreads = 1;
|
|||
|
|
_movienet_detector = std::make_unique<MOVINET>(
|
|||
|
|
onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);
|
|||
|
|
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return _isInitialized;
|
|||
|
|
}
|
|||
|
|
catch (std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSMOVIENET::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|||
|
|
try {
|
|||
|
|
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
|||
|
|
if (!result) return false;
|
|||
|
|
|
|||
|
|
_modelConfig = modelConfig;
|
|||
|
|
_modelConfig.modelType = ModelType::MOVIENET;
|
|||
|
|
_modelConfig.detectionType = DetectionType::CLASSIFICATION;
|
|||
|
|
|
|||
|
|
// Resolve model path and input dimensions.
|
|||
|
|
// If modelName is given (e.g. "Violence0305_A2"), uses that exact model;
|
|||
|
|
// otherwise scans the folder for the best _A<digit> variant.
|
|||
|
|
int detectedW = 0, detectedH = 0;
|
|||
|
|
std::string onnxModel = ResolveMovinetModel(modelFolder, detectedW, detectedH, modelName);
|
|||
|
|
_modelConfig.inpWidth = detectedW;
|
|||
|
|
_modelConfig.inpHeight = detectedH;
|
|||
|
|
|
|||
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|||
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|||
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|||
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|||
|
|
if (_isInitialized) {
|
|||
|
|
_movienet_detector.reset();
|
|||
|
|
_isInitialized = false;
|
|||
|
|
}
|
|||
|
|
unsigned int numThreads = 1;
|
|||
|
|
this->_movienet_detector = std::make_unique<MOVINET>(
|
|||
|
|
onnxModel, TEMPORAL_LENGTH, detectedW, detectedH, 3, numThreads);
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return _isInitialized;
|
|||
|
|
}
|
|||
|
|
catch (std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::LoadModel", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSMOVIENET::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|||
|
|
if (FileExist(_modelFilePath)) {
|
|||
|
|
optimizedModelFolder = GetParentFolder(_modelFilePath);
|
|||
|
|
this->_logger.LogDebug("ANSMOVIENET::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
optimizedModelFolder = "";
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
ANSMOVIENET::~ANSMOVIENET() {
|
|||
|
|
try {
|
|||
|
|
Destroy();
|
|||
|
|
}
|
|||
|
|
catch (std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSMOVIENET::Destroy() {
|
|||
|
|
try {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
_cameraQueues.clear();
|
|||
|
|
_globalFrameCounter = 0;
|
|||
|
|
_movienet_detector.reset();
|
|||
|
|
_isInitialized = false;
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
catch (std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSMOVIENET::Destroy", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Inference functions
|
|||
|
|
std::vector<Object> ANSMOVIENET::RunInference(const cv::Mat& input, const std::string& camera_id) {
|
|||
|
|
std::vector<Object> result = Inference(input, camera_id);
|
|||
|
|
if (_trackerEnabled) {
|
|||
|
|
result = ApplyTracking(result, camera_id);
|
|||
|
|
if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
|
|||
|
|
}
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
std::vector<Object> ANSMOVIENET::RunInference(const cv::Mat& input) {
|
|||
|
|
return Inference(input, "MovienetCam");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void ANSMOVIENET::CleanupStaleQueues() {
|
|||
|
|
// Called internally <20> already under lock from Inference()
|
|||
|
|
if (_cameraQueues.empty()) return;
|
|||
|
|
// 1. Remove queues not accessed for STALE_THRESHOLD frames
|
|||
|
|
for (auto it = _cameraQueues.begin(); it != _cameraQueues.end(); ) {
|
|||
|
|
int age = _globalFrameCounter - it->second.lastAccessFrame;
|
|||
|
|
if (age > STALE_THRESHOLD) {
|
|||
|
|
it = _cameraQueues.erase(it);
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
++it;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. If still over hard cap, remove oldest queues first
|
|||
|
|
if (static_cast<int>(_cameraQueues.size()) > MAX_QUEUES) {
|
|||
|
|
// Collect and sort by last access time
|
|||
|
|
std::vector<std::pair<int, std::string>> accessTimes;
|
|||
|
|
accessTimes.reserve(_cameraQueues.size());
|
|||
|
|
for (const auto& [key, state] : _cameraQueues) {
|
|||
|
|
accessTimes.emplace_back(state.lastAccessFrame, key);
|
|||
|
|
}
|
|||
|
|
std::sort(accessTimes.begin(), accessTimes.end());
|
|||
|
|
|
|||
|
|
// Remove oldest until under cap
|
|||
|
|
int toRemove = static_cast<int>(_cameraQueues.size()) - MAX_QUEUES;
|
|||
|
|
for (int i = 0; i < toRemove && i < static_cast<int>(accessTimes.size()); ++i) {
|
|||
|
|
_cameraQueues.erase(accessTimes[i].second);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
std::vector<Object> ANSMOVIENET::Inference(const cv::Mat& input, const std::string& camera_id) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
std::vector<Object> detectedObjects;
|
|||
|
|
if (!_isInitialized || !_licenseValid || !_movienet_detector) {
|
|||
|
|
this->_logger.LogError("ANSMOVIENET::Inference",
|
|||
|
|
"Model is not initialized or license is not valid", __FILE__, __LINE__);
|
|||
|
|
return detectedObjects;
|
|||
|
|
}
|
|||
|
|
if (input.empty()) {
|
|||
|
|
this->_logger.LogError("ANSMOVIENET::Inference",
|
|||
|
|
"Input frame is empty", __FILE__, __LINE__);
|
|||
|
|
return detectedObjects;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
_globalFrameCounter++;
|
|||
|
|
|
|||
|
|
std::string cameraKey = camera_id.empty() ? "" : camera_id;
|
|||
|
|
auto& state = _cameraQueues[cameraKey];
|
|||
|
|
|
|||
|
|
// Update access timestamp
|
|||
|
|
state.lastAccessFrame = _globalFrameCounter;
|
|||
|
|
state.frameCount++;
|
|||
|
|
|
|||
|
|
// Add frame to queue
|
|||
|
|
state.frames.push_back(input.clone());
|
|||
|
|
|
|||
|
|
// Maintain queue at TEMPORAL_LENGTH
|
|||
|
|
while (state.frames.size() > static_cast<size_t>(TEMPORAL_LENGTH)) {
|
|||
|
|
state.frames.pop_front();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Run inference when:
|
|||
|
|
// 1. Full window available
|
|||
|
|
// 2. Stride condition met
|
|||
|
|
bool hasFullWindow = (state.frames.size() == static_cast<size_t>(TEMPORAL_LENGTH));
|
|||
|
|
bool strideReady = (state.frameCount == TEMPORAL_LENGTH)
|
|||
|
|
|| (state.frameCount > TEMPORAL_LENGTH
|
|||
|
|
&& (state.frameCount - TEMPORAL_LENGTH) % _inferenceStride == 0);
|
|||
|
|
|
|||
|
|
if (hasFullWindow && strideReady) {
|
|||
|
|
std::pair<int, float> result;
|
|||
|
|
_movienet_detector->inference(state.frames, result);
|
|||
|
|
|
|||
|
|
if (result.first >= 0) {
|
|||
|
|
Object obj;
|
|||
|
|
obj.classId = result.first;
|
|||
|
|
obj.className = (result.first < static_cast<int>(_classes.size())) ?
|
|||
|
|
_classes[result.first] : "Unknown";
|
|||
|
|
obj.confidence = result.second;
|
|||
|
|
obj.box = cv::Rect(0, 0, input.cols, input.rows);
|
|||
|
|
detectedObjects.push_back(obj);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Store in base class camera data
|
|||
|
|
CameraData& cameraData = GetCameraData(cameraKey);
|
|||
|
|
cameraData._detectionQueue.push_back(detectedObjects);
|
|||
|
|
if (cameraData._detectionQueue.size() > QUEUE_SIZE) {
|
|||
|
|
cameraData._detectionQueue.pop_front();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ----- Periodic self-cleanup -----
|
|||
|
|
if (_globalFrameCounter % CLEANUP_INTERVAL == 0) {
|
|||
|
|
CleanupStaleQueues();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogError("ANSMOVIENET::Inference",
|
|||
|
|
std::string("Exception during inference: ") + e.what(), __FILE__, __LINE__);
|
|||
|
|
detectedObjects.clear();
|
|||
|
|
}
|
|||
|
|
catch (...) {
|
|||
|
|
this->_logger.LogError("ANSMOVIENET::Inference",
|
|||
|
|
"Unknown exception during inference", __FILE__, __LINE__);
|
|||
|
|
detectedObjects.clear();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return detectedObjects;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
}
|