Refactor project structure
This commit is contained in:
734
modules/ANSODEngine/ANSSAM.cpp
Normal file
734
modules/ANSODEngine/ANSSAM.cpp
Normal file
@@ -0,0 +1,734 @@
|
||||
#include "ANSSAM.h"
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
|
||||
namespace ANSCENTER {
|
||||
// public:
|
||||
bool ANSSAM::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
|
||||
return false;
|
||||
}
|
||||
if (FileExist(_modelFilePath)) {
|
||||
std::string modelName = GetFileNameWithoutExtension(_modelFilePath);
|
||||
std::string binaryModelName = modelName + ".bin";
|
||||
std::string modelFolder = GetParentFolder(_modelFilePath);
|
||||
std::string optimizedModelPath = CreateFilePath(modelFolder, binaryModelName);
|
||||
if (FileExist(optimizedModelPath)) {
|
||||
this->_logger.LogDebug("ANSSAM::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
|
||||
optimizedModelFolder = modelFolder;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
this->_logger.LogFatal("ANSSAM::OptimizeModel", "This model can not be optimized.", __FILE__, __LINE__);
|
||||
optimizedModelFolder = modelFolder;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogFatal("ANSSAM::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
|
||||
optimizedModelFolder = "";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ANSSAM::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
||||
if (!result) return false;
|
||||
std::string xmlfile = CreateFilePath(_modelFolder, "anssam.xml");
|
||||
std::string binaryfile = CreateFilePath(_modelFolder, "anssam.bin");
|
||||
if (std::filesystem::exists(xmlfile)) {
|
||||
_modelFilePath = xmlfile;
|
||||
this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
|
||||
if (FileExist(binaryfile)) {
|
||||
this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__);
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
if (_modelConfig.modelConfThreshold <= 0)_modelConfig.modelConfThreshold = 0.4;
|
||||
if (_modelConfig.modelMNSThreshold <= 0)_modelConfig.modelMNSThreshold = 0.4;
|
||||
_isInitialized = Init(_modelFilePath, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, true);
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSPOSE::LoadModel", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool ANSSAM::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
||||
if (!result) return false;
|
||||
std::string _modelName = modelName;
|
||||
if (_modelName.empty()) {
|
||||
_modelName = "anssam";
|
||||
}
|
||||
std::string xmlFileName = _modelName + ".xml";
|
||||
std::string binFileName = _modelName + ".bin";
|
||||
std::string xmlfile = CreateFilePath(_modelFolder, xmlFileName);
|
||||
std::string binaryfile = CreateFilePath(_modelFolder, binFileName);
|
||||
if (std::filesystem::exists(xmlfile)) {
|
||||
_modelFilePath = xmlfile;
|
||||
this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
|
||||
if (FileExist(binaryfile)) {
|
||||
this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__);
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
if (modelConfig.modelConfThreshold <= 0)modelConfig.modelConfThreshold = 0.4;
|
||||
if (modelConfig.modelMNSThreshold <= 0)modelConfig.modelMNSThreshold = 0.4;
|
||||
labelMap = "SAM";
|
||||
_isInitialized = Init(_modelFilePath, modelConfig.modelConfThreshold, modelConfig.modelMNSThreshold, true);
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSPOSE::LoadModel", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool ANSSAM::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap)
|
||||
{
|
||||
try {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_modelConfig = modelConfig;
|
||||
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
||||
if (!result) return false;
|
||||
std::string xmlfile = CreateFilePath(_modelFolder, "anssam.xml");
|
||||
std::string binaryfile = CreateFilePath(_modelFolder, "anssam.bin");
|
||||
if (std::filesystem::exists(xmlfile)) {
|
||||
_modelFilePath = xmlfile;
|
||||
this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__);
|
||||
if (FileExist(binaryfile)) {
|
||||
this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__);
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
if (modelConfig.modelConfThreshold <= 0)modelConfig.modelConfThreshold = 0.4;
|
||||
if (modelConfig.modelMNSThreshold <= 0)modelConfig.modelMNSThreshold = 0.4;
|
||||
labelMap = "SAM";
|
||||
_isInitialized = Init(_modelFilePath, modelConfig.modelConfThreshold, modelConfig.modelMNSThreshold, true);
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSPOSE::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<Object> ANSSAM::RunInference(const cv::Mat& input) {
|
||||
return RunInference(input, "CustomCam");
|
||||
}
|
||||
std::vector<Object> ANSSAM::RunInference(const cv::Mat& input,const std::string& camera_id)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
try {
|
||||
if (!_licenseValid || !_isInitialized || input.empty() ||
|
||||
input.cols < 10 || input.rows < 10) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Convert to BGR
|
||||
cv::Mat processedImage = (input.channels() == 1)
|
||||
? [&] { cv::Mat tmp; cv::cvtColor(input, tmp, cv::COLOR_GRAY2BGR); return tmp; }()
|
||||
: input;
|
||||
|
||||
// Run inference
|
||||
ov::Tensor input_tensor = Preprocess(processedImage);
|
||||
if (input_tensor.get_size() == 0) return {};
|
||||
|
||||
m_request.set_input_tensor(input_tensor);
|
||||
m_request.infer();
|
||||
|
||||
std::vector<cv::Mat> masks = Postprocess(processedImage);
|
||||
if (masks.empty()) return {};
|
||||
|
||||
// Build output
|
||||
std::vector<Object> output;
|
||||
output.reserve(masks.size());
|
||||
|
||||
const cv::Rect imageBounds(0, 0, input.cols, input.rows);
|
||||
|
||||
for (auto& mask : masks) {
|
||||
Object obj;
|
||||
obj.extraInfo = MaskToPolygons(mask, obj.box, obj.polygon);
|
||||
|
||||
obj.box &= imageBounds;
|
||||
if (obj.box.area() == 0) continue;
|
||||
|
||||
obj.confidence = 1.0f;
|
||||
obj.classId = 0;
|
||||
obj.className = "sam";
|
||||
obj.cameraId = camera_id;
|
||||
|
||||
output.push_back(std::move(obj));
|
||||
}
|
||||
|
||||
if (_trackerEnabled) {
|
||||
output = ApplyTracking(output, camera_id);
|
||||
if (_stabilizationEnabled) output = StabilizeDetections(output, camera_id);
|
||||
}
|
||||
return output;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_logger.LogFatal("ANSSAM::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
ANSSAM::~ANSSAM() {
|
||||
Destroy();
|
||||
}
|
||||
bool ANSSAM::Destroy()
|
||||
{
|
||||
try {
|
||||
if (FolderExist(_modelFolder)) {
|
||||
DeleteFolder(_modelFolder);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSSAM::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
//private:
|
||||
bool ANSSAM::Init(const std::string& xml_path, float conf, float iou, bool useGpu)
|
||||
{
|
||||
m_conf = conf;
|
||||
m_iou = iou;
|
||||
if (!std::filesystem::exists(xml_path))
|
||||
return false;
|
||||
|
||||
m_model = m_core.read_model(xml_path);
|
||||
|
||||
if (!ParseArgs())
|
||||
return false;
|
||||
|
||||
if (!BuildProcessor())
|
||||
return false;
|
||||
|
||||
m_compiled_model = m_core.compile_model(m_model, useGpu && IsGpuAvaliable(m_core) ? "GPU" : "CPU");
|
||||
m_request = m_compiled_model.create_infer_request();
|
||||
return true;
|
||||
}
|
||||
void ANSSAM::Infer(const std::string& image_path)
|
||||
{
|
||||
try
|
||||
{
|
||||
cv::Mat image = cv::imread(image_path);
|
||||
|
||||
cv::Mat rendered = Infer(image);
|
||||
|
||||
std::string savedir = std::filesystem::current_path().string() + "/results";
|
||||
if (!std::filesystem::exists(savedir))
|
||||
std::filesystem::create_directory(savedir);
|
||||
|
||||
std::string savepath = savedir + "/" + std::filesystem::path(image_path).filename().string();
|
||||
cv::imwrite(savepath, rendered);
|
||||
|
||||
//slog::info << "result saved in :" << savepath << "\n";
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
//slog::info << "Failed to Infer! ec: " << e.what() << '\n';
|
||||
}
|
||||
}
|
||||
cv::Mat ANSSAM::Infer(const cv::Mat& image)
|
||||
{
|
||||
cv::Mat processedImg = image.clone();
|
||||
ov::Tensor input_tensor = Preprocess(processedImg);
|
||||
|
||||
assert(input_tensor.get_size() != 0);
|
||||
|
||||
m_request.set_input_tensor(input_tensor);
|
||||
m_request.infer();
|
||||
|
||||
std::vector<cv::Mat> result = Postprocess(image);
|
||||
|
||||
return Render(image, result);
|
||||
}
|
||||
bool ANSSAM::ParseArgs()
|
||||
{
|
||||
try
|
||||
{
|
||||
model_input_shape = m_model->input().get_shape();
|
||||
model_output0_shape = m_model->output(0).get_shape();
|
||||
model_output1_shape = m_model->output(1).get_shape();
|
||||
|
||||
//slog::info << "xml input shape:" << model_input_shape << "\n";
|
||||
//slog::info << "xml output shape 0:" << model_output0_shape << "\n";
|
||||
//slog::info << "xml output shape 1:" << model_output1_shape << "\n";
|
||||
|
||||
// [1, 3, 640, 640]
|
||||
input_channel = model_input_shape[1];
|
||||
input_height = model_input_shape[2];
|
||||
input_width = model_input_shape[3];
|
||||
|
||||
this->input_data.resize(input_channel * input_height * input_height);
|
||||
|
||||
//slog::info << "model input height:" << input_height << " input width:" << input_width << "\n";
|
||||
|
||||
// output0 = [1,37,8400]
|
||||
// output1 = [1,32,160,160]
|
||||
mh = model_output1_shape[2];
|
||||
mw = model_output1_shape[3];
|
||||
|
||||
//slog::info << "model output mh:" << mh << " output mw:" << mw << "\n";
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
//slog::info << "Failed to Parse Args. " << e.what() << '\n';
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
void ANSSAM::ScaleBoxes(cv::Mat& box, const cv::Size& oriSize)
|
||||
{
|
||||
float oriWidth = static_cast<float>(oriSize.width);
|
||||
float oriHeight = static_cast<float>(oriSize.height);
|
||||
float* pxvec = box.ptr<float>(0);
|
||||
|
||||
for (int i = 0; i < box.rows; i++) {
|
||||
pxvec = box.ptr<float>(i);
|
||||
pxvec[0] -= this->dw;
|
||||
pxvec[0] = std::clamp(pxvec[0] * this->ratio, 0.f, oriWidth);
|
||||
pxvec[1] -= this->dh;
|
||||
pxvec[1] = std::clamp(pxvec[1] * this->ratio, 0.f, oriHeight);
|
||||
pxvec[2] -= this->dw;
|
||||
pxvec[2] = std::clamp(pxvec[2] * this->ratio, 0.f, oriWidth);
|
||||
pxvec[3] -= this->dh;
|
||||
pxvec[3] = std::clamp(pxvec[3] * this->ratio, 0.f, oriHeight);
|
||||
}
|
||||
}
|
||||
std::vector<cv::Mat> ANSSAM::NMS(cv::Mat& prediction, int max_det)
|
||||
{
|
||||
std::vector<cv::Mat> vreMat;
|
||||
cv::Mat temData = cv::Mat();
|
||||
prediction = prediction.t(); // [37, 8400] --> [rows:8400, cols:37]
|
||||
float* pxvec = prediction.ptr<float>(0);
|
||||
|
||||
for (int i = 0; i < prediction.rows; i++) {
|
||||
pxvec = prediction.ptr<float>(i);
|
||||
if (pxvec[4] > m_conf) {
|
||||
temData.push_back(prediction.rowRange(i, i + 1).clone());
|
||||
}
|
||||
}
|
||||
|
||||
if (temData.rows == 0) {
|
||||
return vreMat;
|
||||
}
|
||||
|
||||
cv::Mat box = temData.colRange(0, 4).clone(); //
|
||||
cv::Mat cls = temData.colRange(4, 5).clone(); //
|
||||
cv::Mat mask = temData.colRange(5, temData.cols).clone(); //
|
||||
|
||||
cv::Mat j = cv::Mat::zeros(cls.size(), CV_32F);
|
||||
cv::Mat dst;
|
||||
cv::hconcat(box, cls, dst); // concat
|
||||
cv::hconcat(dst, j, dst);
|
||||
cv::hconcat(dst, mask, dst); // dst = [box class j mask] concat
|
||||
|
||||
std::vector<float> scores;
|
||||
std::vector<cv::Rect> boxes;
|
||||
pxvec = dst.ptr<float>(0);
|
||||
for (int i = 0; i < dst.rows; i++) {
|
||||
pxvec = dst.ptr<float>(i);
|
||||
boxes.push_back(cv::Rect(pxvec[0], pxvec[1], pxvec[2], pxvec[3]));
|
||||
scores.push_back(pxvec[4]);
|
||||
}
|
||||
|
||||
std::vector<int> indices;
|
||||
cv::dnn::NMSBoxes(boxes, scores, m_conf, m_iou, indices);
|
||||
cv::Mat reMat;
|
||||
for (int i = 0; i < indices.size() && i < max_det; i++) {
|
||||
int index = indices[i];
|
||||
reMat.push_back(dst.rowRange(index, index + 1).clone());
|
||||
}
|
||||
box = reMat.colRange(0, 6).clone();
|
||||
xywh2xyxy(box);
|
||||
mask = reMat.colRange(6, reMat.cols).clone();
|
||||
|
||||
vreMat.push_back(box);
|
||||
vreMat.push_back(mask);
|
||||
|
||||
return vreMat;
|
||||
}
|
||||
void ANSSAM::xywh2xyxy(cv::Mat& box)
|
||||
{
|
||||
float* pxvec = box.ptr<float>(0);
|
||||
for (int i = 0; i < box.rows; i++) {
|
||||
pxvec = box.ptr<float>(i);
|
||||
float w = pxvec[2];
|
||||
float h = pxvec[3];
|
||||
float cx = pxvec[0];
|
||||
float cy = pxvec[1];
|
||||
pxvec[0] = cx - w / 2;
|
||||
pxvec[1] = cy - h / 2;
|
||||
pxvec[2] = cx + w / 2;
|
||||
pxvec[3] = cy + h / 2;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat ANSSAM::ConvertToBinary(cv::Mat src) {
|
||||
cv::Mat gray, binary;
|
||||
|
||||
// Check if the image is already in grayscale
|
||||
if (src.channels() == 3) {
|
||||
// Convert from BGR to Grayscale
|
||||
cv::cvtColor(src, gray, cv::COLOR_BGR2GRAY);
|
||||
}
|
||||
else if (src.channels() == 1) {
|
||||
gray = src.clone(); // Use the grayscale image as is
|
||||
}
|
||||
else {
|
||||
std::cerr << "Unsupported number of channels: " << src.channels() << std::endl;
|
||||
return cv::Mat(); // Return an empty matrix on failure
|
||||
}
|
||||
|
||||
// Convert grayscale to binary
|
||||
cv::threshold(gray, binary, 127, 255, cv::THRESH_BINARY); // Adjust the threshold as needed
|
||||
cv::Mat binaryMask;
|
||||
cv::threshold(binary, binaryMask, 0.5, 255, cv::THRESH_BINARY);
|
||||
binaryMask.convertTo(binaryMask, CV_8UC1);
|
||||
return binaryMask;
|
||||
}
|
||||
|
||||
|
||||
std::string ANSSAM::MaskToPolygons(const cv::Mat& image, cv::Rect& boundingBox, std::vector<cv::Point2f>& polygon) {
|
||||
try {
|
||||
// image is boxMask: CV_32FC1 with 0.0 / 1.0 → directly convert
|
||||
cv::Mat mask8u;
|
||||
image.convertTo(mask8u, CV_8UC1, 255.0); // 1.0 → 255, 0.0 → 0
|
||||
|
||||
if (mask8u.empty() || mask8u.type() != CV_8UC1) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
std::vector<cv::Vec4i> hierarchy;
|
||||
|
||||
cv::findContours(mask8u, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
if (contours.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Select the largest contour
|
||||
size_t largestContourIdx = 0;
|
||||
double maxArea = 0.0;
|
||||
for (size_t i = 0; i < contours.size(); ++i) {
|
||||
double area = cv::contourArea(contours[i]);
|
||||
if (area > maxArea) {
|
||||
maxArea = area;
|
||||
largestContourIdx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Approximate polygon
|
||||
std::vector<cv::Point> approxPolygon;
|
||||
cv::approxPolyDP(contours[largestContourIdx], approxPolygon, 3, true);
|
||||
|
||||
// Convert to Point2f
|
||||
polygon.clear();
|
||||
for (const auto& pt : approxPolygon) {
|
||||
polygon.push_back(cv::Point2f(pt.x, pt.y));
|
||||
}
|
||||
|
||||
// Calculate bounding box
|
||||
boundingBox = cv::boundingRect(approxPolygon);
|
||||
|
||||
// Convert polygon to string
|
||||
std::stringstream xCoords, yCoords;
|
||||
for (size_t i = 0; i < polygon.size(); ++i) {
|
||||
if (i > 0) {
|
||||
xCoords << "; ";
|
||||
yCoords << "; ";
|
||||
}
|
||||
xCoords << polygon[i].x;
|
||||
yCoords << polygon[i].y;
|
||||
}
|
||||
|
||||
std::string formattedString = xCoords.str() + " | " + yCoords.str();
|
||||
return formattedString;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
//slog::info << "Failed to convert mask to polygons: " << e.what() << '\n';
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat ANSSAM::Render(const cv::Mat& image, const std::vector<cv::Mat>& vremat)
|
||||
{
|
||||
cv::Mat rendered = image.clone();
|
||||
|
||||
for (const auto& mask : vremat) {
|
||||
auto color = RandomColor();
|
||||
for (int y = 0; y < mask.rows; y++) {
|
||||
const float* mp = mask.ptr<float>(y);
|
||||
uchar* p = rendered.ptr<uchar>(y);
|
||||
for (int x = 0; x < mask.cols; x++) {
|
||||
if (mp[x] == 1.0) { // ??
|
||||
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5);
|
||||
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5);
|
||||
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5);
|
||||
}
|
||||
p += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rendered;
|
||||
}
|
||||
|
||||
ov::Tensor ANSSAM::Preprocess(cv::Mat& image)
|
||||
{
|
||||
if (!ConvertSize(image)) {
|
||||
//slog::info << "failed to Convert Size!\n";
|
||||
return ov::Tensor();
|
||||
}
|
||||
|
||||
if (!ConvertLayout(image)) {
|
||||
//slog::info << "Failed to Convert Layout!\n";
|
||||
return ov::Tensor();
|
||||
}
|
||||
|
||||
return BuildTensor();
|
||||
}
|
||||
|
||||
|
||||
cv::Mat ANSSAM::BuildOutput0()
|
||||
{
|
||||
auto* ptr = m_request.get_output_tensor(0).data();
|
||||
return cv::Mat(model_output0_shape[1], model_output0_shape[2], CV_32F, ptr);
|
||||
}
|
||||
|
||||
cv::Mat ANSSAM::BuildOutput1()
|
||||
{
|
||||
auto* ptr = m_request.get_output_tensor(1).data();
|
||||
return cv::Mat(model_output1_shape[1], model_output1_shape[2] * model_output1_shape[3], CV_32F, ptr);
|
||||
}
|
||||
std::vector<cv::Mat> ANSSAM::ProcessMaskNative(const cv::Mat& image, cv::Mat& protos, cv::Mat& masks_in, cv::Mat& bboxes, cv::Size shape)
|
||||
{
|
||||
std::vector<cv::Mat> result;
|
||||
cv::Mat matmulRes = (masks_in * protos).t(); //
|
||||
cv::Mat maskMat = matmulRes.reshape(bboxes.rows, { mh, mw });
|
||||
std::vector<cv::Mat> maskChannels;
|
||||
cv::split(maskMat, maskChannels);
|
||||
int scale_dw = this->dw / input_width * mw;
|
||||
int scale_dh = this->dh / input_height * mh;
|
||||
cv::Rect roi(scale_dw, scale_dh, mw - 2 * scale_dw, mh - 2 * scale_dh);
|
||||
float* pxvec = bboxes.ptr<float>(0);
|
||||
for (int i = 0; i < bboxes.rows; i++) {
|
||||
pxvec = bboxes.ptr<float>(i);
|
||||
cv::Mat dest, mask;
|
||||
cv::exp(-maskChannels[i], dest);
|
||||
dest = 1.0 / (1.0 + dest);
|
||||
dest = dest(roi);
|
||||
cv::resize(dest, mask, image.size(), cv::INTER_LINEAR);
|
||||
cv::Rect roi(pxvec[0], pxvec[1], pxvec[2] - pxvec[0], pxvec[3] - pxvec[1]);
|
||||
cv::Mat temmask = mask(roi);
|
||||
cv::Mat boxMask = cv::Mat(image.size(), mask.type(), cv::Scalar(0.0));
|
||||
float rx = std::max(pxvec[0], 0.0f);
|
||||
float ry = std::max(pxvec[1], 0.0f);
|
||||
for (int y = ry, my = 0; my < temmask.rows; y++, my++) {
|
||||
float* ptemmask = temmask.ptr<float>(my);
|
||||
float* pboxmask = boxMask.ptr<float>(y);
|
||||
for (int x = rx, mx = 0; mx < temmask.cols; x++, mx++) {
|
||||
pboxmask[x] = ptemmask[mx] > 0.5 ? 1.0 : 0.0;
|
||||
}
|
||||
}
|
||||
result.push_back(boxMask);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
std::vector<cv::Mat> ANSSAM::Postprocess(const cv::Mat& oriImage)
|
||||
{
|
||||
cv::Mat prediction = BuildOutput0();
|
||||
cv::Mat proto = BuildOutput1();
|
||||
std::vector<cv::Mat> remat = NMS(prediction, 100);
|
||||
|
||||
if (remat.size() < 2) {
|
||||
return std::vector<cv::Mat>();
|
||||
}
|
||||
cv::Mat box = remat[0];
|
||||
cv::Mat mask = remat[1];
|
||||
ScaleBoxes(box, oriImage.size());
|
||||
|
||||
return ProcessMaskNative(oriImage, proto, mask, box, oriImage.size());
|
||||
}
|
||||
bool ANSSAM::ConvertSize(cv::Mat& image)
|
||||
{
|
||||
float height = static_cast<float>(image.rows);
|
||||
float width = static_cast<float>(image.cols);
|
||||
float r = std::min(input_height / height, input_width / width);
|
||||
int padw = static_cast<int>(std::round(width * r)); //
|
||||
int padh = static_cast<int>(std::round(height * r));
|
||||
if ((int)width != padw || (int)height != padh)
|
||||
cv::resize(image, image, cv::Size(padw, padh));
|
||||
float _dw = (input_width - padw) / 2.f;
|
||||
float _dh = (input_height - padh) / 2.f;
|
||||
int top = static_cast<int>(std::round(_dh - 0.1f));
|
||||
int bottom = static_cast<int>(std::round(_dh + 0.1f));
|
||||
int left = static_cast<int>(std::round(_dw - 0.1f));
|
||||
int right = static_cast<int>(std::round(_dw + 0.1f));
|
||||
cv::copyMakeBorder(image, image, top, bottom, left, right, cv::BORDER_CONSTANT,
|
||||
cv::Scalar(114, 114, 114));
|
||||
this->ratio = 1 / r;
|
||||
this->dw = _dw;
|
||||
this->dh = _dh;
|
||||
return true;
|
||||
}
|
||||
bool ANSSAM::ConvertLayout(cv::Mat& image)
|
||||
{
|
||||
int row = image.rows;
|
||||
int col = image.cols;
|
||||
int channels = image.channels();
|
||||
|
||||
if (channels != 3)
|
||||
return false;
|
||||
|
||||
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
|
||||
|
||||
for (int c = 0; c < channels; c++) {
|
||||
for (int i = 0; i < row; i++) {
|
||||
for (int j = 0; j < col; j++) {
|
||||
float pix = image.at<cv::Vec3b>(i, j)[c];
|
||||
input_data[c * row * col + i * col + j] = pix / 255.0;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
ov::Tensor ANSSAM::BuildTensor()
|
||||
{
|
||||
ov::Shape shape = { 1, static_cast<unsigned long>(input_channel), static_cast<unsigned long>(input_height), static_cast<unsigned long>(input_width) };
|
||||
|
||||
return ov::Tensor(ov::element::f32, shape, input_data.data());;
|
||||
}
|
||||
bool ANSSAM::BuildProcessor()
|
||||
{
|
||||
try
|
||||
{
|
||||
m_ppp = std::make_shared<ov::preprocess::PrePostProcessor>(m_model);
|
||||
m_ppp->input().tensor()
|
||||
.set_shape({ 1, input_channel, input_height, input_width })
|
||||
.set_element_type(ov::element::f32)
|
||||
.set_layout("NCHW")
|
||||
.set_color_format(ov::preprocess::ColorFormat::RGB);
|
||||
m_model = m_ppp->build();
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::cerr << "Failed to build the model processor!\n" << e.what() << '\n';
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool ANSSAM::IsGpuAvaliable(const ov::Core& core)
|
||||
{
|
||||
std::vector<std::string> avaliableDevice = core.get_available_devices();
|
||||
auto iter = std::find(avaliableDevice.begin(), avaliableDevice.end(), "GPU");
|
||||
return iter != avaliableDevice.end();
|
||||
}
|
||||
cv::Scalar ANSSAM::RandomColor()
|
||||
{
|
||||
int b = rand() % 256;
|
||||
int g = rand() % 256;
|
||||
int r = rand() % 256;
|
||||
return cv::Scalar(b, g, r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// std::vector<Object> ANSSAM::RunInference(const cv::Mat& input, const std::string& camera_id) {
|
||||
// std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
// std::vector<Object> detectionObjects;
|
||||
// try {
|
||||
// if (!_licenseValid) {
|
||||
// this->_logger.LogError("ANSSAM::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
// return detectionObjects;
|
||||
// }
|
||||
// if (!_isInitialized) {
|
||||
// this->_logger.LogError("ANSSAM::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
// return detectionObjects;
|
||||
// }
|
||||
// if (input.empty()) return detectionObjects;
|
||||
// if ((input.cols < 10) || (input.rows < 10)) return detectionObjects;
|
||||
// // Convert grayscale to 3-channel BGR if needed
|
||||
// cv::Mat processedImage;
|
||||
// if (input.channels() == 1) {
|
||||
// cv::cvtColor(input, processedImage, cv::COLOR_GRAY2BGR);
|
||||
// }
|
||||
// else {
|
||||
// processedImage = input;
|
||||
// }
|
||||
// ov::Tensor input_tensor = Preprocess(processedImage);
|
||||
|
||||
// assert(input_tensor.get_size() != 0);
|
||||
|
||||
// m_request.set_input_tensor(input_tensor);
|
||||
// m_request.infer();
|
||||
|
||||
// std::vector<cv::Mat> result = Postprocess(input);
|
||||
// for (int i = 0; i < result.size(); i++) {
|
||||
// Object detectedObject;
|
||||
// cv::Rect rect;
|
||||
// std::vector<cv::Point2f> polygon;
|
||||
// std::string extraInfo;
|
||||
// extraInfo = MaskToPolygons(result.at(i), rect, polygon);
|
||||
// detectedObject.box = rect;
|
||||
// detectedObject.box.x = std::max(detectedObject.box.x, 0);
|
||||
// detectedObject.box.y = std::max(detectedObject.box.y, 0);
|
||||
// detectedObject.box.width = std::min(detectedObject.box.width, input.cols - detectedObject.box.x);
|
||||
// detectedObject.box.height = std::min(detectedObject.box.height, input.rows - detectedObject.box.y);
|
||||
// detectedObject.polygon = polygon;
|
||||
// detectedObject.extraInfo = extraInfo;
|
||||
// detectedObject.confidence = 1.0;
|
||||
// detectedObject.classId = 0;
|
||||
// detectedObject.className = "sam";
|
||||
// detectedObject.cameraId = camera_id;
|
||||
// detectionObjects.push_back(detectedObject);
|
||||
|
||||
// }
|
||||
////EnqueueDetection(detectionObjects,camera_id);
|
||||
// return detectionObjects;
|
||||
// }
|
||||
// catch (std::exception& e) {
|
||||
// detectionObjects.clear();
|
||||
// this->_logger.LogFatal("ANSSAM::RunInference", e.what(), __FILE__, __LINE__);
|
||||
// return detectionObjects;
|
||||
// }
|
||||
|
||||
// }
|
||||
Reference in New Issue
Block a user