Files
ANSCORE/engines/ONNXEngine/ONNXEngine.cpp

1489 lines
60 KiB
C++
Raw Normal View History

2026-03-28 16:54:11 +11:00
#include "ONNXEngine.h"
#include "EPLoader.h"
#include "Utility.h"
#include <algorithm>
#include <limits>
#include <filesystem>
#include <fstream>
namespace ANSCENTER {
// ====================================================================
// BasicOrtHandler — constructors
// ====================================================================
BasicOrtHandler::BasicOrtHandler(const std::string& _onnx_path,
unsigned int _num_threads)
: log_id(_onnx_path.data()),
num_threads(_num_threads),
m_engineType(static_cast<EngineType>(-1)),
onnx_path_w(_onnx_path.begin(), _onnx_path.end()) // ← stored as member
{
onnx_path = onnx_path_w.c_str(); // ← safe, member owns storage
initialize_handler();
}
BasicOrtHandler::BasicOrtHandler(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads)
: log_id(_onnx_path.data()),
num_threads(_num_threads),
m_engineType(engineType),
onnx_path_w(_onnx_path.begin(), _onnx_path.end()) // ← stored as member
{
onnx_path = onnx_path_w.c_str(); // ← safe, member owns storage
initialize_handler();
}
BasicOrtHandler::~BasicOrtHandler()
{
if (ort_session) {
delete ort_session;
ort_session = nullptr;
}
if (memory_info_handler) {
delete memory_info_handler;
memory_info_handler = nullptr;
}
if (ort_env) {
delete ort_env;
ort_env = nullptr;
}
}
// ====================================================================
// EP appenders
// ====================================================================
bool BasicOrtHandler::TryAppendCUDA(Ort::SessionOptions& session_options)
{
try {
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);
// Memory-safe GPU configuration for multi-model environments:
// - arena_extend_strategy = 1 (kSameAsRequested) to avoid
// pre-allocating huge GPU memory blocks that may exceed VRAM
// - cudnn_conv_algo_search = HEURISTIC for faster session init
// - cudnn_conv_use_max_workspace = 0 — use minimal cuDNN workspace
// to prevent CUDNN_BACKEND_API_FAILED when TRT engines already
// occupy most VRAM on the same GPU
// - gpu_mem_limit — cap ONNX Runtime's GPU memory arena to 2 GB
// so it doesn't compete with TensorRT for the remaining VRAM
const char* keys[] = {
"device_id",
"arena_extend_strategy",
"cudnn_conv_algo_search",
"cudnn_conv_use_max_workspace",
"gpu_mem_limit"
};
const char* values[] = {
"0",
"1", // kSameAsRequested
"HEURISTIC", // avoid exhaustive algo search on large model
"0", // minimal cuDNN workspace (prevents OOM)
"2147483648" // 2 GB arena limit
};
Ort::GetApi().UpdateCUDAProviderOptions(
cuda_options, keys, values, 5);
session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
std::cout << "[ORT] CUDA EP attached (arena=SameAsRequested, "
"cudnn=HEURISTIC, maxWorkspace=0, memLimit=2GB)." << std::endl;
return true;
}
catch (const Ort::Exception& e) {
std::cerr << "[ORT] CUDA EP failed: " << e.what() << std::endl;
return false;
}
}
bool BasicOrtHandler::TryAppendDirectML(Ort::SessionOptions& session_options)
{
try {
// AppendExecutionProvider("DML") is the correct API for DirectML —
// there is no V2 variant, so the string-based map is intentional here.
std::unordered_map<std::string, std::string> options = {
{ "device_id", "0" }
};
session_options.AppendExecutionProvider("DML", options);
std::cout << "[ORT] DirectML EP attached (device 0)." << std::endl;
return true;
}
catch (const Ort::Exception& e) {
std::cerr << "[ORT] DirectML EP failed: " << e.what() << std::endl;
return false;
}
}
bool BasicOrtHandler::TryAppendOpenVINO(Ort::SessionOptions& session_options)
{
// Use AppendExecutionProvider_OpenVINO_V2 instead of the generic string API,
// matching the pattern used in YOLOOD/YOLO12OD/ANSONNXCL etc.
// Try device configs in priority order, falling back gracefully.
2026-04-08 13:45:52 +10:00
//
// NPU availability is probed once per process. If AUTO:NPU,GPU fails on
// the first call, we skip it for all subsequent models to avoid repeated
// "Failed to load shared library" errors cluttering the log.
static bool s_npuProbed = false;
static bool s_npuAvailable = false;
2026-03-28 16:54:11 +11:00
const std::string precision = "FP16";
const std::string numberOfThreads = "4";
const std::string numberOfStreams = "4";
2026-04-08 13:45:52 +10:00
auto makeConfig = [&](const std::string& device) {
return std::unordered_map<std::string, std::string>{
{"device_type", device}, {"precision", precision},
{"num_of_threads", numberOfThreads}, {"num_streams", numberOfStreams},
{"enable_opencl_throttling", "False"}, {"enable_qdq_optimizer", "True"}
};
2026-03-28 16:54:11 +11:00
};
2026-04-08 13:45:52 +10:00
std::vector<std::unordered_map<std::string, std::string>> try_configs;
// Only try NPU if it hasn't been probed yet or was previously available
if (!s_npuProbed || s_npuAvailable) {
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
}
try_configs.push_back(makeConfig("GPU.0"));
try_configs.push_back(makeConfig("GPU.1"));
try_configs.push_back(makeConfig("AUTO:GPU,CPU"));
2026-03-28 16:54:11 +11:00
for (const auto& config : try_configs) {
try {
session_options.AppendExecutionProvider_OpenVINO_V2(config);
2026-04-08 13:45:52 +10:00
const auto& device = config.at("device_type");
2026-03-28 16:54:11 +11:00
std::cout << "[ORT] OpenVINO EP attached ("
2026-04-08 13:45:52 +10:00
<< device << ", " << precision << ")." << std::endl;
ANS_DBG("OrtHandler", "OpenVINO EP attached: %s", device.c_str());
// If NPU config succeeded, mark it available
if (device.find("NPU") != std::string::npos) {
s_npuProbed = true;
s_npuAvailable = true;
}
2026-03-28 16:54:11 +11:00
return true;
}
catch (const Ort::Exception& e) {
2026-04-08 13:45:52 +10:00
const auto& device = config.at("device_type");
// If NPU config failed, remember so we skip it next time
if (device.find("NPU") != std::string::npos) {
if (!s_npuProbed) {
std::cout << "[ORT] NPU not available — skipping NPU configs for subsequent models." << std::endl;
ANS_DBG("OrtHandler", "NPU not available, will skip in future");
}
s_npuProbed = true;
s_npuAvailable = false;
} else {
std::cerr << "[ORT] OpenVINO EP failed for device "
<< device << ": " << e.what() << std::endl;
}
2026-03-28 16:54:11 +11:00
}
}
std::cerr << "[ORT] OpenVINO EP: all device configs failed." << std::endl;
return false;
}
// ====================================================================
// initialize_handler
// ====================================================================
void BasicOrtHandler::initialize_handler()
{
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "initialize_handler: m_engineType=%d", static_cast<int>(m_engineType));
2026-03-28 16:54:11 +11:00
const auto& epInfo = EPLoader::Current();
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "initialize_handler: EPLoader type=%d dir=%s",
static_cast<int>(epInfo.type), epInfo.libraryDir.c_str());
2026-03-28 16:54:11 +11:00
if (Ort::Global<void>::api_ == nullptr)
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
std::cout << "[ORT] api_ = " << (void*)Ort::Global<void>::api_ << std::endl;
EngineType engine = (static_cast<int>(m_engineType) == -1)
? epInfo.type : m_engineType;
2026-04-08 13:45:52 +10:00
// Persist the resolved engine type so subclasses (e.g. ONNXYOLO)
// can branch on the actual EP at inference time (IoBinding for DML).
m_engineType = engine;
ANS_DBG("OrtHandler", "initialize_handler: resolved engine=%d (from %s)",
static_cast<int>(engine),
(static_cast<int>(m_engineType) == -1) ? "EPLoader" : "explicit");
2026-03-28 16:54:11 +11:00
ort_env = new Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id);
memory_info_handler = new Ort::MemoryInfo(
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault));
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(num_threads);
// Start with full optimization — will be downgraded to DISABLE_ALL
// later if we detect a large external data file (e.g. SAM3's 3.3 GB
// .onnx_data). Normal small models keep ORT_ENABLE_ALL.
session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.SetLogSeverityLevel(4);
2026-04-08 13:45:52 +10:00
// DirectML REQUIRES these two settings per ORT documentation:
// - DisableMemPattern: DML manages its own memory; ORT's memory
// pattern optimization conflicts with DML's D3D12 allocator.
// - ORT_SEQUENTIAL: DML uses a single command queue and cannot
// handle parallel execution mode — doing so causes deadlocks
// when synchronizing GPU→CPU data transfers.
if (engine == EngineType::AMD_GPU) {
session_options.DisableMemPattern();
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
ANS_DBG("OrtHandler", "DirectML: DisableMemPattern + ORT_SEQUENTIAL set");
}
2026-03-28 16:54:11 +11:00
std::vector<std::string> available = Ort::GetAvailableProviders();
std::cout << "[ORT] Available providers: ";
for (auto& p : available) std::cout << p << " ";
std::cout << std::endl;
//std::cout << "[ORT] Selected engine : "
// << EPLoader::EngineTypeToString(engine) << std::endl;
auto hasProvider = [&](const std::string& name) -> bool {
return std::find(available.begin(), available.end(), name)
!= available.end();
};
bool epAttached = false;
switch (engine)
{
// --------------------------------------------------------
case EngineType::NVIDIA_GPU:
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Trying CUDA EP...");
2026-03-28 16:54:11 +11:00
if (hasProvider("CUDAExecutionProvider"))
epAttached = TryAppendCUDA(session_options);
2026-04-08 13:45:52 +10:00
if (!epAttached) {
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] CUDA EP unavailable — falling back to CPU."
<< std::endl;
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "CUDA EP FAILED — fallback to CPU");
}
2026-03-28 16:54:11 +11:00
break;
// --------------------------------------------------------
case EngineType::AMD_GPU:
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Trying DirectML EP...");
2026-03-28 16:54:11 +11:00
if (hasProvider("DmlExecutionProvider"))
epAttached = TryAppendDirectML(session_options);
2026-04-08 13:45:52 +10:00
if (!epAttached) {
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] DirectML EP unavailable — falling back to CPU."
<< std::endl;
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "DirectML EP FAILED — fallback to CPU");
}
2026-03-28 16:54:11 +11:00
break;
// --------------------------------------------------------
case EngineType::OPENVINO_GPU:
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Trying OpenVINO EP...");
2026-03-28 16:54:11 +11:00
if (hasProvider("OpenVINOExecutionProvider"))
epAttached = TryAppendOpenVINO(session_options);
2026-04-08 13:45:52 +10:00
if (!epAttached) {
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] OpenVINO EP unavailable — falling back to CPU."
<< std::endl;
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "OpenVINO EP FAILED — fallback to CPU");
}
2026-03-28 16:54:11 +11:00
break;
// --------------------------------------------------------
case EngineType::CPU:
default:
std::cout << "[ORT] Using CPU EP." << std::endl;
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Using CPU EP");
2026-03-28 16:54:11 +11:00
epAttached = true;
break;
}
2026-04-08 13:45:52 +10:00
if (!epAttached) {
2026-03-28 16:54:11 +11:00
std::cout << "[ORT] Running on CPU EP (fallback)." << std::endl;
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "EP not attached — running on CPU fallback");
} else {
ANS_DBG("OrtHandler", "EP attached successfully");
}
2026-03-28 16:54:11 +11:00
// ----------------------------------------------------------------
// Create session
// ----------------------------------------------------------------
// ORT resolves external data files (e.g. .onnx_data) relative to
// the CWD rather than the model file's directory. Temporarily
// switch CWD so ORT can locate them.
//
// Additionally, ORT's internal memory-mapping of very large
// external data files (>2 GB) can crash with an access violation
// on Windows. When we detect a large .onnx_data file, we
// pre-load it with standard file I/O and pass the buffer via
// AddExternalInitializersFromFilesInMemory() so ORT never
// memory-maps the file itself.
// ----------------------------------------------------------------
std::filesystem::path modelFsPath(onnx_path); // wchar_t*
std::filesystem::path modelDir = modelFsPath.parent_path();
std::filesystem::path prevCwd = std::filesystem::current_path();
if (!modelDir.empty() && std::filesystem::is_directory(modelDir)) {
std::filesystem::current_path(modelDir);
std::cout << "[ORT] CWD -> " << modelDir.string() << std::endl;
}
// --- Pre-load external data files if they exist -----------------
// Keep the buffer alive across session creation (must outlive the
// Ort::Session constructor call).
std::vector<char> extDataBuffer;
{
// Build the expected external-data filename:
// <model_stem>.onnx_data (e.g. anssam3.onnx_data)
std::filesystem::path extDataPath =
modelDir / (modelFsPath.stem().wstring() + L".onnx_data");
if (std::filesystem::exists(extDataPath)) {
auto fileSize = std::filesystem::file_size(extDataPath);
std::cout << "[ORT] External data file found: "
<< extDataPath.string()
<< " (" << (fileSize / (1024*1024)) << " MB)" << std::endl;
// Read entire file into memory with standard I/O.
// This avoids ORT's internal memory-mapping which can crash
// with access violation for files > 2 GB on Windows.
try {
std::ifstream ifs(extDataPath, std::ios::binary);
if (!ifs) {
std::cerr << "[ORT] ERROR: Could not open external data file."
<< std::endl;
} else {
extDataBuffer.resize(static_cast<size_t>(fileSize));
std::cout << "[ORT] Reading external data into memory..."
<< std::endl;
ifs.read(extDataBuffer.data(), static_cast<std::streamsize>(fileSize));
ifs.close();
std::cout << "[ORT] External data loaded ("
<< extDataBuffer.size() << " bytes)." << std::endl;
// Tell ORT to use our in-memory buffer instead of
// memory-mapping the file.
std::vector<std::basic_string<ORTCHAR_T>> extFileNames = {
extDataPath.filename().wstring()
};
std::vector<char*> extBuffers = { extDataBuffer.data() };
std::vector<size_t> extLengths = { extDataBuffer.size() };
session_options.AddExternalInitializersFromFilesInMemory(
extFileNames, extBuffers, extLengths);
std::cout << "[ORT] External initializers registered."
<< std::endl;
// Large external-data models crash ORT's CUDA graph
// optimization passes. Disable all optimization for
// these models only. Normal small models (SCRFD, YOLO,
// GlintArcFace, etc.) keep ORT_ENABLE_ALL.
session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_DISABLE_ALL);
std::cout << "[ORT] Graph optimization set to DISABLE_ALL "
"(large external data detected)." << std::endl;
}
}
catch (const std::bad_alloc&) {
std::cerr << "[ORT] WARNING: Could not allocate "
<< (fileSize / (1024*1024)) << " MB for external data. "
<< "Falling back to ORT file mapping." << std::endl;
extDataBuffer.clear();
extDataBuffer.shrink_to_fit();
}
}
}
// --- Load the .onnx model file into a memory buffer too ----------
// This avoids ORT opening/mapping ANY files during CreateSession.
std::vector<char> modelBuffer;
bool useModelBuffer = false;
if (!extDataBuffer.empty()) {
// External data was pre-loaded, so also load the .onnx itself
try {
auto modelFileSize = std::filesystem::file_size(modelFsPath);
modelBuffer.resize(static_cast<size_t>(modelFileSize));
std::ifstream mifs(modelFsPath, std::ios::binary);
if (mifs) {
mifs.read(modelBuffer.data(), static_cast<std::streamsize>(modelFileSize));
mifs.close();
useModelBuffer = true;
std::cout << "[ORT] Model proto loaded into memory ("
<< modelBuffer.size() << " bytes)." << std::endl;
}
}
catch (const std::exception& e) {
std::cerr << "[ORT] WARNING: Could not read model file into memory: "
<< e.what() << ". Using file path." << std::endl;
}
}
// --- Attempt session creation (with CUDA → CPU fallback) --------
auto createSession = [&](Ort::SessionOptions& opts, const char* label) {
std::cout << "[ORT] Creating session (" << label << ")..." << std::endl;
if (useModelBuffer) {
ort_session = new Ort::Session(*ort_env,
modelBuffer.data(), modelBuffer.size(), opts);
} else {
ort_session = new Ort::Session(*ort_env, onnx_path, opts);
}
std::cout << "[ORT] Session created OK (" << label << ")." << std::endl;
};
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Creating session for model: %ls", onnx_path);
2026-03-28 16:54:11 +11:00
try {
createSession(session_options, "primary EP");
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Session created OK with primary EP");
2026-03-28 16:54:11 +11:00
}
catch (const Ort::Exception& e) {
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Session FAILED with primary EP: %s", e.what());
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] Session creation FAILED with primary EP: "
<< e.what() << std::endl;
// If we were using a GPU EP, fall back to CPU
if (engine != EngineType::CPU && epAttached) {
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Retrying with CPU fallback...");
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] Retrying with CPU EP (fallback)..." << std::endl;
// Build fresh session options — no GPU EP, no graph opt
Ort::SessionOptions cpuOpts;
cpuOpts.SetIntraOpNumThreads(num_threads);
cpuOpts.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_DISABLE_ALL);
cpuOpts.SetLogSeverityLevel(4);
// Re-register the in-memory external data if we have it
if (!extDataBuffer.empty()) {
std::filesystem::path extDataPath =
modelDir / (modelFsPath.stem().wstring() + L".onnx_data");
std::vector<std::basic_string<ORTCHAR_T>> extFileNames = {
extDataPath.filename().wstring()
};
std::vector<char*> extBuffers = { extDataBuffer.data() };
std::vector<size_t> extLengths = { extDataBuffer.size() };
cpuOpts.AddExternalInitializersFromFilesInMemory(
extFileNames, extBuffers, extLengths);
}
createSession(cpuOpts, "CPU fallback");
} else {
throw; // re-throw if already on CPU
}
}
catch (const std::exception& e) {
2026-04-08 13:45:52 +10:00
ANS_DBG("OrtHandler", "Session FAILED (std::exception): %s", e.what());
2026-03-28 16:54:11 +11:00
std::cerr << "[ORT] Session creation FAILED (std::exception): "
<< e.what() << std::endl;
throw;
}
// Restore previous CWD & release buffers
std::filesystem::current_path(prevCwd);
extDataBuffer.clear();
extDataBuffer.shrink_to_fit();
modelBuffer.clear();
modelBuffer.shrink_to_fit();
Ort::Allocator allocator(*ort_session, *memory_info_handler);
std::cout << "[ORT] Allocator created OK." << std::endl;
// Input
input_node_names.resize(1);
input_node_names_.resize(1);
input_node_names_[0] = OrtCompatiableGetInputName(0, allocator, ort_session);
input_node_names[0] = input_node_names_[0].data();
Ort::TypeInfo type_info = ort_session->GetInputTypeInfo(0);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
input_tensor_size = 1;
input_node_dims = tensor_info.GetShape();
for (auto dim : input_node_dims) {
if (dim > 0) input_tensor_size *= static_cast<size_t>(dim);
}
input_values_handler.resize(input_tensor_size);
// Outputs
num_outputs = static_cast<int>(ort_session->GetOutputCount());
output_node_names.resize(num_outputs);
output_node_names_.resize(num_outputs);
for (int i = 0; i < num_outputs; ++i) {
output_node_names_[i] =
OrtCompatiableGetOutputName(i, allocator, ort_session);
output_node_names[i] = output_node_names_[i].data();
output_node_dims.push_back(
ort_session->GetOutputTypeInfo(i)
.GetTensorTypeAndShapeInfo().GetShape());
}
}
// ====================================================================
// GlintArcFace
// ====================================================================
Ort::Value GlintArcFace::transform(const cv::Mat& mat)
{
if (mat.empty())
throw std::runtime_error("GlintArcFace::transform — input is empty.");
cv::Mat canvas;
cv::resize(mat, canvas, cv::Size(input_node_dims.at(3), input_node_dims.at(2)));
cv::cvtColor(canvas, canvas, cv::COLOR_BGR2RGB);
if (canvas.type() != CV_32FC3)
canvas.convertTo(canvas, CV_32FC3);
utils::transform::normalize_inplace(canvas, mean_val, scale_val);
std::vector<int64_t> shape = input_node_dims;
if (shape[0] == -1) shape[0] = 1;
return utils::transform::create_tensor(
canvas, shape, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
Ort::Value GlintArcFace::transformBatch(const std::vector<cv::Mat>& images)
{
if (images.empty())
throw std::runtime_error("GlintArcFace::transformBatch — batch is empty.");
const int width = input_node_dims.at(3);
const int height = input_node_dims.at(2);
std::vector<cv::Mat> batch;
batch.reserve(images.size());
cv::Mat t1, t2, t3;
for (const auto& mat : images) {
if (mat.empty())
throw std::runtime_error("GlintArcFace::transformBatch — empty image in batch.");
cv::resize(mat, t1, cv::Size(width, height));
cv::cvtColor(t1, t2, cv::COLOR_BGR2RGB);
if (t2.type() != CV_32FC3) t2.convertTo(t3, CV_32FC3);
else t3 = t2.clone();
utils::transform::normalize_inplace(t3, mean_val, scale_val);
batch.push_back(t3.clone());
}
std::vector<int64_t> shape = input_node_dims;
shape[0] = static_cast<int64_t>(images.size());
return utils::transform::create_tensor_batch(
batch, shape, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
void GlintArcFace::detect(const cv::Mat& mat, types::FaceContent& face_content)
{
if (mat.empty()) return;
Ort::Value input_tensor = transform(mat);
auto output_tensors = ort_session->Run(
Ort::RunOptions{ nullptr },
input_node_names.data(), &input_tensor, 1,
output_node_names.data(), num_outputs);
const unsigned int hidden_dim =
static_cast<unsigned int>(output_node_dims.at(0).at(1));
const float* vals =
output_tensors.at(0).GetTensorMutableData<float>();
std::vector<float> embedding(vals, vals + hidden_dim);
cv::normalize(embedding, embedding);
face_content.embedding = std::move(embedding);
face_content.dim = hidden_dim;
face_content.flag = true;
}
void GlintArcFace::detectBatch(const std::vector<cv::Mat>& images,
std::vector<types::FaceContent>& face_contents)
{
if (images.empty()) return;
const size_t batch_size = images.size();
face_contents.clear();
face_contents.reserve(batch_size);
try {
Ort::Value input_tensor = transformBatch(images);
auto output_tensors = ort_session->Run(
Ort::RunOptions{ nullptr },
input_node_names.data(), &input_tensor, 1,
output_node_names.data(), num_outputs);
input_values_handler.clear();
input_values_handler.shrink_to_fit();
const float* vals = output_tensors[0].GetTensorData<float>();
const unsigned int hidden_dim =
static_cast<unsigned int>(output_node_dims.at(0).at(1));
face_contents.resize(batch_size);
for (size_t i = 0; i < batch_size; ++i) {
cv::Mat emb_mat(1, hidden_dim, CV_32F,
const_cast<float*>(vals + i * hidden_dim));
cv::Mat emb_norm;
cv::normalize(emb_mat, emb_norm);
face_contents[i].embedding = std::vector<float>(
emb_norm.begin<float>(), emb_norm.end<float>());
face_contents[i].dim = hidden_dim;
face_contents[i].flag = true;
}
}
catch (const Ort::Exception&) {
face_contents.clear();
throw;
}
}
// ====================================================================
// GlintCosFace
// ====================================================================
Ort::Value GlintCosFace::transform(const cv::Mat& mat)
{
if (mat.empty())
throw std::runtime_error("GlintCosFace::transform — input is empty.");
cv::Mat canvas;
cv::resize(mat, canvas, cv::Size(input_node_dims.at(3), input_node_dims.at(2)));
cv::cvtColor(canvas, canvas, cv::COLOR_BGR2RGB);
canvas.convertTo(canvas, CV_32FC3);
utils::transform::normalize_inplace(canvas, mean_val, scale_val);
std::vector<int64_t> shape = input_node_dims;
if (shape[0] == -1) shape[0] = 1;
return utils::transform::create_tensor(
canvas, shape, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
Ort::Value GlintCosFace::transformBatch(const std::vector<cv::Mat>& images)
{
if (images.empty())
throw std::runtime_error("GlintCosFace::transformBatch — batch is empty.");
const int width = input_node_dims.at(3);
const int height = input_node_dims.at(2);
std::vector<cv::Mat> batch;
batch.reserve(images.size());
for (const auto& mat : images) {
if (mat.empty())
throw std::runtime_error("GlintCosFace::transformBatch — empty image in batch.");
cv::Mat canvas;
cv::resize(mat, canvas, cv::Size(width, height));
cv::cvtColor(canvas, canvas, cv::COLOR_BGR2RGB);
canvas.convertTo(canvas, CV_32FC3);
utils::transform::normalize_inplace(canvas, mean_val, scale_val);
batch.push_back(std::move(canvas));
}
std::vector<int64_t> shape = input_node_dims;
shape[0] = static_cast<int64_t>(images.size());
return utils::transform::create_tensor_batch(
batch, shape, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
void GlintCosFace::detect(const cv::Mat& mat, types::FaceContent& face_content)
{
if (mat.empty()) return;
Ort::Value input_tensor = transform(mat);
auto output_tensors = ort_session->Run(
Ort::RunOptions{ nullptr },
input_node_names.data(), &input_tensor, 1,
output_node_names.data(), num_outputs);
const unsigned int hidden_dim =
static_cast<unsigned int>(output_node_dims.at(0).at(1));
const float* vals =
output_tensors.at(0).GetTensorMutableData<float>();
std::vector<float> embedding(vals, vals + hidden_dim);
cv::normalize(embedding, embedding);
face_content.embedding = std::move(embedding);
face_content.dim = hidden_dim;
face_content.flag = true;
}
void GlintCosFace::detectBatch(const std::vector<cv::Mat>& images,
std::vector<types::FaceContent>& face_contents)
{
if (images.empty()) return;
const size_t batch_size = images.size();
face_contents.clear();
face_contents.reserve(batch_size);
Ort::Value input_tensor = transformBatch(images);
auto output_tensors = ort_session->Run(
Ort::RunOptions{ nullptr },
input_node_names.data(), &input_tensor, 1,
output_node_names.data(), num_outputs);
const float* vals =
output_tensors.at(0).GetTensorMutableData<float>();
const unsigned int hidden_dim =
static_cast<unsigned int>(output_node_dims.at(0).at(1));
for (size_t i = 0; i < batch_size; ++i) {
std::vector<float> embedding(vals + i * hidden_dim,
vals + i * hidden_dim + hidden_dim);
cv::normalize(embedding, embedding);
types::FaceContent fc;
fc.embedding = std::move(embedding);
fc.dim = hidden_dim;
fc.flag = true;
face_contents.emplace_back(std::move(fc));
}
}
// ====================================================================
// SCRFD — constructors
// ====================================================================
SCRFD::SCRFD(const std::string& _onnx_path, unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, _num_threads)
{
initial_context();
}
SCRFD::SCRFD(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, engineType, _num_threads)
{
initial_context();
}
void SCRFD::initial_context()
{
if (num_outputs == 6) {
fmc = 3; feat_stride_fpn = { 8, 16, 32 }; num_anchors = 2; use_kps = false;
}
else if (num_outputs == 9) {
fmc = 3; feat_stride_fpn = { 8, 16, 32 }; num_anchors = 2; use_kps = true;
}
}
void SCRFD::resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
int target_height, int target_width,
SCRFDScaleParams& scale_params)
{
if (mat.empty()) return;
int img_height = mat.rows;
int img_width = mat.cols;
mat_rs = cv::Mat(target_height, target_width, CV_8UC3, cv::Scalar(0, 0, 0));
float r = std::min(
static_cast<float>(target_width) / img_width,
static_cast<float>(target_height) / img_height);
int new_w = static_cast<int>(img_width * r);
int new_h = static_cast<int>(img_height * r);
int dw = (target_width - new_w) / 2;
int dh = (target_height - new_h) / 2;
cv::Mat resized;
cv::resize(mat, resized, cv::Size(new_w, new_h));
resized.copyTo(mat_rs(cv::Rect(dw, dh, new_w, new_h)));
scale_params.ratio = r;
scale_params.dw = dw;
scale_params.dh = dh;
scale_params.flag = true;
}
Ort::Value SCRFD::transform(const cv::Mat& mat_rs)
{
cv::Mat canvas = mat_rs.clone();
cv::cvtColor(canvas, canvas, cv::COLOR_BGR2RGB);
utils::transform::normalize_inplace(canvas, mean_vals, scale_vals);
return utils::transform::create_tensor(
canvas, input_node_dims, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
Ort::Value SCRFD::transformBatch(const std::vector<cv::Mat>& images)
{
if (images.empty())
throw std::runtime_error("SCRFD::transformBatch — batch is empty.");
const int width = input_node_dims.at(3);
const int height = input_node_dims.at(2);
std::vector<cv::Mat> batch;
batch.reserve(images.size());
for (const auto& mat : images) {
if (mat.empty())
throw std::runtime_error("SCRFD::transformBatch — empty image in batch.");
cv::Mat canvas;
cv::resize(mat, canvas, cv::Size(width, height));
cv::cvtColor(canvas, canvas, cv::COLOR_BGR2RGB);
canvas.convertTo(canvas, CV_32FC3);
utils::transform::normalize_inplace(canvas, mean_vals, scale_vals);
batch.push_back(std::move(canvas));
}
std::vector<int64_t> shape = input_node_dims;
shape[0] = static_cast<int64_t>(images.size());
return utils::transform::create_tensor_batch(
batch, shape, *memory_info_handler,
input_values_handler, utils::transform::CHW);
}
void SCRFD::detect(const cv::Mat& mat,
std::vector<types::BoxfWithLandmarks>& detected_boxes_kps,
float score_threshold, float iou_threshold, unsigned int topk)
{
if (mat.empty()) return;
float img_height = static_cast<float>(mat.rows);
float img_width = static_cast<float>(mat.cols);
int target_height = static_cast<int>(input_node_dims.at(2));
int target_width = static_cast<int>(input_node_dims.at(3));
cv::Mat mat_rs;
SCRFDScaleParams scale_params;
resize_unscale(mat, mat_rs, target_height, target_width, scale_params);
Ort::Value input_tensor = transform(mat_rs);
auto output_tensors = ort_session->Run(
Ort::RunOptions{ nullptr },
input_node_names.data(), &input_tensor, 1,
output_node_names.data(), num_outputs);
std::vector<types::BoxfWithLandmarks> bbox_kps_collection;
generate_bboxes_kps(scale_params, bbox_kps_collection,
output_tensors, score_threshold,
img_height, img_width);
nms_bboxes_kps(bbox_kps_collection, detected_boxes_kps,
iou_threshold, topk);
}
void SCRFD::generate_points(int target_height, int target_width)
{
if (center_points_is_update) return;
for (auto stride : feat_stride_fpn) {
unsigned int num_grid_w = target_width / stride;
unsigned int num_grid_h = target_height / stride;
for (unsigned int i = 0; i < num_grid_h; ++i) {
for (unsigned int j = 0; j < num_grid_w; ++j) {
for (unsigned int k = 0; k < num_anchors; ++k) {
SCRFDPoint pt;
pt.cx = static_cast<float>(j);
pt.cy = static_cast<float>(i);
pt.stride = static_cast<float>(stride);
center_points[stride].push_back(pt);
}
}
}
}
center_points_is_update = true;
}
void SCRFD::generate_bboxes_kps(const SCRFDScaleParams& scale_params,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection,
std::vector<Ort::Value>& output_tensors,
float score_threshold,
float img_height, float img_width)
{
const float input_height = static_cast<float>(input_node_dims.at(2));
const float input_width = static_cast<float>(input_node_dims.at(3));
generate_points(static_cast<int>(input_height),
static_cast<int>(input_width));
bbox_kps_collection.clear();
if (use_kps) {
generate_bboxes_kps_single_stride(scale_params,
output_tensors.at(0), output_tensors.at(3), output_tensors.at(6),
8, score_threshold, img_height, img_width, bbox_kps_collection);
generate_bboxes_kps_single_stride(scale_params,
output_tensors.at(1), output_tensors.at(4), output_tensors.at(7),
16, score_threshold, img_height, img_width, bbox_kps_collection);
generate_bboxes_kps_single_stride(scale_params,
output_tensors.at(2), output_tensors.at(5), output_tensors.at(8),
32, score_threshold, img_height, img_width, bbox_kps_collection);
}
else {
generate_bboxes_single_stride(scale_params,
output_tensors.at(0), output_tensors.at(3),
8, score_threshold, img_height, img_width, bbox_kps_collection);
generate_bboxes_single_stride(scale_params,
output_tensors.at(1), output_tensors.at(4),
16, score_threshold, img_height, img_width, bbox_kps_collection);
generate_bboxes_single_stride(scale_params,
output_tensors.at(2), output_tensors.at(5),
32, score_threshold, img_height, img_width, bbox_kps_collection);
}
}
void SCRFD::generate_bboxes_single_stride(
const SCRFDScaleParams& scale_params,
Ort::Value& score_pred, Ort::Value& bbox_pred,
unsigned int stride, float score_threshold,
float img_height, float img_width,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection)
{
unsigned int nms_pre_ = std::max(nms_pre, (stride / 8) * nms_pre);
auto stride_dims = score_pred.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();
const unsigned int num_points = static_cast<unsigned int>(stride_dims.at(1));
const float* score_ptr = score_pred.GetTensorMutableData<float>();
const float* bbox_ptr = bbox_pred.GetTensorMutableData<float>();
float ratio = scale_params.ratio;
int dw = scale_params.dw;
int dh = scale_params.dh;
unsigned int count = 0;
auto& stride_points = center_points[stride];
for (unsigned int i = 0; i < num_points; ++i) {
if (score_ptr[i] < score_threshold) continue;
const auto& point = stride_points.at(i);
const float* offsets = bbox_ptr + i * 4;
float x1 = ((point.cx - offsets[0]) * point.stride - dw) / ratio;
float y1 = ((point.cy - offsets[1]) * point.stride - dh) / ratio;
float x2 = ((point.cx + offsets[2]) * point.stride - dw) / ratio;
float y2 = ((point.cy + offsets[3]) * point.stride - dh) / ratio;
types::BoxfWithLandmarks box_kps;
box_kps.box.x1 = std::max(0.f, x1);
box_kps.box.y1 = std::max(0.f, y1);
box_kps.box.x2 = std::min(img_width - 1.f, x2);
box_kps.box.y2 = std::min(img_height - 1.f, y2);
box_kps.box.score = score_ptr[i];
box_kps.box.label = 1;
box_kps.box.label_text = "face";
box_kps.box.flag = true;
box_kps.flag = true;
bbox_kps_collection.push_back(box_kps);
if (++count > max_nms) break;
}
if (bbox_kps_collection.size() > nms_pre_) {
std::sort(bbox_kps_collection.begin(), bbox_kps_collection.end(),
[](const types::BoxfWithLandmarks& a, const types::BoxfWithLandmarks& b) {
return a.box.score > b.box.score; });
bbox_kps_collection.resize(nms_pre_);
}
}
void SCRFD::generate_bboxes_kps_single_stride(
const SCRFDScaleParams& scale_params,
Ort::Value& score_pred, Ort::Value& bbox_pred, Ort::Value& kps_pred,
unsigned int stride, float score_threshold,
float img_height, float img_width,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection)
{
unsigned int nms_pre_ = std::max(nms_pre, (stride / 8) * nms_pre);
auto stride_dims = score_pred.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();
const unsigned int num_points = static_cast<unsigned int>(stride_dims.at(1));
const float* score_ptr = score_pred.GetTensorMutableData<float>();
const float* bbox_ptr = bbox_pred.GetTensorMutableData<float>();
const float* kps_ptr = kps_pred.GetTensorMutableData<float>();
float ratio = scale_params.ratio;
int dw = scale_params.dw;
int dh = scale_params.dh;
unsigned int count = 0;
auto& stride_points = center_points[stride];
for (unsigned int i = 0; i < num_points; ++i) {
if (score_ptr[i] < score_threshold) continue;
const auto& point = stride_points.at(i);
const float* offsets = bbox_ptr + i * 4;
float x1 = ((point.cx - offsets[0]) * point.stride - dw) / ratio;
float y1 = ((point.cy - offsets[1]) * point.stride - dh) / ratio;
float x2 = ((point.cx + offsets[2]) * point.stride - dw) / ratio;
float y2 = ((point.cy + offsets[3]) * point.stride - dh) / ratio;
types::BoxfWithLandmarks box_kps;
box_kps.box.x1 = std::max(0.f, x1);
box_kps.box.y1 = std::max(0.f, y1);
box_kps.box.x2 = std::min(img_width - 1.f, x2);
box_kps.box.y2 = std::min(img_height - 1.f, y2);
box_kps.box.score = score_ptr[i];
box_kps.box.label = 1;
box_kps.box.label_text = "face";
box_kps.box.flag = true;
const float* kps_offsets = kps_ptr + i * 10;
for (unsigned int j = 0; j < 10; j += 2) {
cv::Point2f kp;
kp.x = std::min(std::max(0.f,
((point.cx + kps_offsets[j]) * point.stride - dw) / ratio),
img_width - 1.f);
kp.y = std::min(std::max(0.f,
((point.cy + kps_offsets[j + 1]) * point.stride - dh) / ratio),
img_height - 1.f);
box_kps.landmarks.points.push_back(kp);
}
box_kps.landmarks.flag = true;
box_kps.flag = true;
bbox_kps_collection.push_back(box_kps);
if (++count > max_nms) break;
}
if (bbox_kps_collection.size() > nms_pre_) {
std::sort(bbox_kps_collection.begin(), bbox_kps_collection.end(),
[](const types::BoxfWithLandmarks& a, const types::BoxfWithLandmarks& b) {
return a.box.score > b.box.score; });
bbox_kps_collection.resize(nms_pre_);
}
}
void SCRFD::nms_bboxes_kps(std::vector<types::BoxfWithLandmarks>& input,
std::vector<types::BoxfWithLandmarks>& output,
float iou_threshold, unsigned int topk)
{
if (input.empty()) return;
std::sort(input.begin(), input.end(),
[](const types::BoxfWithLandmarks& a, const types::BoxfWithLandmarks& b) {
return a.box.score > b.box.score; });
const unsigned int box_num = static_cast<unsigned int>(input.size());
std::vector<int> merged(box_num, 0);
unsigned int count = 0;
for (unsigned int i = 0; i < box_num; ++i) {
if (merged[i]) continue;
output.push_back(input[i]);
merged[i] = 1;
for (unsigned int j = i + 1; j < box_num; ++j) {
if (merged[j]) continue;
if (input[i].box.iou_of(input[j].box) > iou_threshold) {
merged[j] = 1;
}
}
if (++count >= topk) break;
}
}
// ====================================================================
// MOVINET
// ====================================================================
MOVINET::MOVINET(const std::string& _onnx_path, unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, _num_threads)
{
init_io_names();
}
MOVINET::MOVINET(const std::string& _onnx_path,
int _temporal, int _width, int _height, int _channels,
unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, _num_threads)
{
input_params.temporal = _temporal;
input_params.width = _width;
input_params.height = _height;
input_params.channels = _channels;
init_io_names();
}
MOVINET::MOVINET(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, engineType, _num_threads)
{
init_io_names();
}
MOVINET::MOVINET(const std::string& _onnx_path,
EngineType engineType,
int _temporal, int _width, int _height, int _channels,
unsigned int _num_threads)
: BasicOrtHandler(_onnx_path, engineType, _num_threads)
{
input_params.temporal = _temporal;
input_params.width = _width;
input_params.height = _height;
input_params.channels = _channels;
init_io_names();
}
void MOVINET::init_io_names()
{
Ort::AllocatorWithDefaultOptions allocator;
_MoviNetInputName =
ort_session->GetInputNameAllocated(0, allocator).get();
_MoviNetOutputName =
ort_session->GetOutputNameAllocated(0, allocator).get();
}
Ort::Value MOVINET::transform(const std::deque<cv::Mat>& frames)
{
if (frames.size() != static_cast<size_t>(input_params.temporal))
throw std::runtime_error("MOVINET::transform — frame count != temporal length.");
std::vector<int64_t> shape = {
1,
input_params.channels,
input_params.temporal,
input_params.height,
input_params.width
};
auto mem = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
return utils::transform::create_video_tensor_5d(
frames, shape, mem, input_tensor_values);
}
std::pair<int, float> MOVINET::post_processing(const float* p)
{
const int C = output_params.num_classes;
float m = *std::max_element(p, p + C);
float s = 0.f;
std::vector<float> prob(C);
for (int i = 0; i < C; ++i) s += (prob[i] = std::exp(p[i] - m));
for (float& v : prob) v /= s;
int label = static_cast<int>(
std::max_element(prob.begin(), prob.end()) - prob.begin());
return { label, prob[label] };
}
void MOVINET::inference(const std::deque<cv::Mat>& frames,
std::pair<int, float>& out_result)
{
if (frames.empty() ||
frames.size() != static_cast<size_t>(input_params.temporal)) {
std::cerr << "[MOVINET] Invalid frame count." << std::endl;
out_result = { -1, 0.f };
return;
}
Ort::Value input_tensor = transform(frames);
const char* in_names[] = { _MoviNetInputName.c_str() };
const char* out_names[] = { _MoviNetOutputName.c_str() };
auto outputs = ort_session->Run(
Ort::RunOptions{ nullptr },
in_names, &input_tensor, 1, out_names, 1);
out_result = post_processing(outputs[0].GetTensorData<float>());
}
Ort::Value MOVINET::transform(const cv::Mat& mat)
{
std::deque<cv::Mat> frames;
for (int i = 0; i < input_params.temporal; ++i)
frames.push_back(mat.clone());
return transform(frames);
}
Ort::Value MOVINET::transformBatch(const std::vector<cv::Mat>& images)
{
std::deque<cv::Mat> frames;
if (!images.empty()) {
for (int i = 0; i < input_params.temporal; ++i)
frames.push_back(images[i % images.size()].clone());
}
return transform(frames);
}
// ====================================================================
// utils::transform
// ====================================================================
Ort::Value ANSCENTER::utils::transform::create_tensor(
const cv::Mat& mat,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler,
unsigned int data_format)
{
if (mat.empty() || tensor_dims.size() != 4 || tensor_dims.at(0) != 1)
return Ort::Value(nullptr);
const unsigned int channels = mat.channels();
cv::Mat mat_ref;
if (mat.type() != CV_32FC(channels))
mat.convertTo(mat_ref, CV_32FC(channels));
else
mat_ref = mat;
if (data_format == CHW) {
const unsigned int H = tensor_dims.at(2);
const unsigned int W = tensor_dims.at(3);
const unsigned int C = tensor_dims.at(1);
if (C != channels) return Ort::Value(nullptr);
const unsigned int total = C * H * W;
tensor_value_handler.resize(total);
cv::Mat resized;
if (H != static_cast<unsigned int>(mat_ref.rows) ||
W != static_cast<unsigned int>(mat_ref.cols))
cv::resize(mat_ref, resized, cv::Size(W, H));
else
resized = mat_ref;
std::vector<cv::Mat> chans;
cv::split(resized, chans);
for (unsigned int c = 0; c < C; ++c)
std::memcpy(tensor_value_handler.data() + c * H * W,
chans[c].data, H * W * sizeof(float));
return Ort::Value::CreateTensor<float>(
memory_info_handler, tensor_value_handler.data(),
total, tensor_dims.data(), tensor_dims.size());
}
// HWC
const unsigned int H = tensor_dims.at(1);
const unsigned int W = tensor_dims.at(2);
const unsigned int C = tensor_dims.at(3);
const unsigned int total = C * H * W;
if (C != channels) return Ort::Value(nullptr);
tensor_value_handler.resize(total);
cv::Mat resized;
if (H != static_cast<unsigned int>(mat_ref.rows) ||
W != static_cast<unsigned int>(mat_ref.cols))
cv::resize(mat_ref, resized, cv::Size(W, H));
else
resized = mat_ref;
std::memcpy(tensor_value_handler.data(), resized.data, total * sizeof(float));
return Ort::Value::CreateTensor<float>(
memory_info_handler, tensor_value_handler.data(),
total, tensor_dims.data(), tensor_dims.size());
}
Ort::Value ANSCENTER::utils::transform::create_tensor_batch(
const std::vector<cv::Mat>& batch_mats,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler,
unsigned int data_format)
{
if (batch_mats.empty() || tensor_dims.size() != 4)
return Ort::Value(nullptr);
const size_t N = batch_mats.size();
const unsigned int C = tensor_dims.at(1);
const unsigned int H = tensor_dims.at(2);
const unsigned int W = tensor_dims.at(3);
const unsigned int image_size = C * H * W;
const unsigned int total = static_cast<unsigned int>(N) * image_size;
tensor_value_handler.resize(total);
for (size_t b = 0; b < N; ++b) {
const cv::Mat& mat = batch_mats[b];
if (mat.empty() || static_cast<unsigned int>(mat.channels()) != C)
return Ort::Value(nullptr);
cv::Mat mat_ref;
if (mat.type() != CV_32FC(C))
mat.convertTo(mat_ref, CV_32FC(C));
else
mat_ref = mat;
cv::Mat resized;
if (static_cast<unsigned int>(mat_ref.rows) != H ||
static_cast<unsigned int>(mat_ref.cols) != W)
cv::resize(mat_ref, resized, cv::Size(W, H));
else
resized = mat_ref;
const size_t batch_offset = b * image_size;
if (data_format == CHW) {
const float* src = resized.ptr<float>(0);
const size_t plane_size = H * W;
for (unsigned int c = 0; c < C; ++c) {
float* dst = tensor_value_handler.data() + batch_offset + c * plane_size;
for (size_t i = 0; i < plane_size; ++i)
dst[i] = src[i * C + c];
}
}
else {
std::memcpy(tensor_value_handler.data() + batch_offset,
resized.data, image_size * sizeof(float));
}
}
return Ort::Value::CreateTensor<float>(
memory_info_handler, tensor_value_handler.data(),
total, tensor_dims.data(), tensor_dims.size());
}
Ort::Value ANSCENTER::utils::transform::create_video_tensor_5d(
const std::deque<cv::Mat>& frames,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler)
{
if (tensor_dims.size() != 5 || tensor_dims[0] != 1)
throw std::runtime_error("create_video_tensor_5d: expect [1,C,T,H,W]");
const unsigned int C = tensor_dims[1];
const unsigned int T = tensor_dims[2];
const unsigned int H = tensor_dims[3];
const unsigned int W = tensor_dims[4];
if (frames.size() != T)
throw std::runtime_error("create_video_tensor_5d: frame count != T");
const size_t total = static_cast<size_t>(C) * T * H * W;
tensor_value_handler.resize(total);
for (unsigned int t = 0; t < T; ++t) {
cv::Mat frame_ref;
if (frames[t].type() != CV_32FC(C))
frames[t].convertTo(frame_ref, CV_32FC(C), 1.0 / 255.0);
else
frame_ref = frames[t];
cv::Mat resized;
if (static_cast<unsigned int>(frame_ref.rows) != H ||
static_cast<unsigned int>(frame_ref.cols) != W)
cv::resize(frame_ref, resized, cv::Size(W, H));
else
resized = frame_ref;
cv::cvtColor(resized, resized, cv::COLOR_BGR2RGB);
std::vector<cv::Mat> chans;
cv::split(resized, chans);
for (unsigned int c = 0; c < C; ++c) {
float* dst = tensor_value_handler.data()
+ c * (T * H * W) + t * (H * W);
std::memcpy(dst, chans[c].data, H * W * sizeof(float));
}
}
return Ort::Value::CreateTensor<float>(
memory_info_handler, tensor_value_handler.data(),
total, tensor_dims.data(), tensor_dims.size());
}
cv::Mat ANSCENTER::utils::transform::normalize(
const cv::Mat& mat, float mean, float scale)
{
cv::Mat matf;
if (mat.type() != CV_32FC3) mat.convertTo(matf, CV_32FC3);
else matf = mat;
return (matf - mean) * scale;
}
cv::Mat ANSCENTER::utils::transform::normalize(
const cv::Mat& mat, const float* mean, const float* scale)
{
cv::Mat out;
if (mat.type() != CV_32FC3) mat.convertTo(out, CV_32FC3);
else out = mat.clone();
for (int i = 0; i < out.rows; ++i) {
cv::Vec3f* p = out.ptr<cv::Vec3f>(i);
for (int j = 0; j < out.cols; ++j) {
p[j][0] = (p[j][0] - mean[0]) * scale[0];
p[j][1] = (p[j][1] - mean[1]) * scale[1];
p[j][2] = (p[j][2] - mean[2]) * scale[2];
}
}
return out;
}
void ANSCENTER::utils::transform::normalize(
const cv::Mat& inmat, cv::Mat& outmat, float mean, float scale)
{
outmat = ANSCENTER::utils::transform::normalize(inmat, mean, scale);
}
void ANSCENTER::utils::transform::normalize_inplace(
cv::Mat& mat_inplace, float mean, float scale)
{
if (mat_inplace.type() != CV_32FC3)
mat_inplace.convertTo(mat_inplace, CV_32FC3);
ANSCENTER::utils::transform::normalize(mat_inplace, mat_inplace, mean, scale);
}
void ANSCENTER::utils::transform::normalize_inplace(
cv::Mat& mat_inplace, const float* mean, const float* scale)
{
if (mat_inplace.type() != CV_32FC3)
mat_inplace.convertTo(mat_inplace, CV_32FC3);
for (int i = 0; i < mat_inplace.rows; ++i) {
cv::Vec3f* p = mat_inplace.ptr<cv::Vec3f>(i);
for (int j = 0; j < mat_inplace.cols; ++j) {
p[j][0] = (p[j][0] - mean[0]) * scale[0];
p[j][1] = (p[j][1] - mean[1]) * scale[1];
p[j][2] = (p[j][2] - mean[2]) * scale[2];
}
}
}
// ====================================================================
// BoundingBoxType template method implementations
// ====================================================================
template<typename T1, typename T2>
template<typename O1, typename O2>
inline typename ANSCENTER::types::BoundingBoxType<T1, T2>::value_type
ANSCENTER::types::BoundingBoxType<T1, T2>::iou_of(
const BoundingBoxType<O1, O2>& other) const
{
auto tbox = other.template convert_type<value_type, score_type>();
value_type ix1 = std::max(x1, tbox.x1);
value_type iy1 = std::max(y1, tbox.y1);
value_type ix2 = std::min(x2, tbox.x2);
value_type iy2 = std::min(y2, tbox.y2);
value_type iw = ix2 - ix1 + static_cast<value_type>(1);
value_type ih = iy2 - iy1 + static_cast<value_type>(1);
if (iw <= 0 || ih <= 0)
return std::numeric_limits<value_type>::min();
value_type inter = iw * ih;
return inter / (area() + tbox.area() - inter);
}
template<typename T1, typename T2>
inline ::cv::Rect ANSCENTER::types::BoundingBoxType<T1, T2>::rect() const
{
auto b = convert_type<int>();
return ::cv::Rect(b.x1, b.y1, b.width(), b.height());
}
template<typename T1, typename T2>
inline ::cv::Point2i ANSCENTER::types::BoundingBoxType<T1, T2>::tl() const
{
auto b = convert_type<int>();
return ::cv::Point2i(b.x1, b.y1);
}
template<typename T1, typename T2>
inline ::cv::Point2i ANSCENTER::types::BoundingBoxType<T1, T2>::rb() const
{
auto b = convert_type<int>();
return ::cv::Point2i(b.x2, b.y2);
}
template<typename T1, typename T2>
inline typename ANSCENTER::types::BoundingBoxType<T1, T2>::value_type
ANSCENTER::types::BoundingBoxType<T1, T2>::width() const
{
return x2 - x1 + static_cast<value_type>(1);
}
template<typename T1, typename T2>
inline typename ANSCENTER::types::BoundingBoxType<T1, T2>::value_type
ANSCENTER::types::BoundingBoxType<T1, T2>::height() const
{
return y2 - y1 + static_cast<value_type>(1);
}
template<typename T1, typename T2>
inline typename ANSCENTER::types::BoundingBoxType<T1, T2>::value_type
ANSCENTER::types::BoundingBoxType<T1, T2>::area() const
{
return std::fabs(width() * height());
}
} // namespace ANSCENTER