#include "ANSSAM.h" #include #include #include namespace ANSCENTER { // public: bool ANSSAM::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } if (FileExist(_modelFilePath)) { std::string modelName = GetFileNameWithoutExtension(_modelFilePath); std::string binaryModelName = modelName + ".bin"; std::string modelFolder = GetParentFolder(_modelFilePath); std::string optimizedModelPath = CreateFilePath(modelFolder, binaryModelName); if (FileExist(optimizedModelPath)) { this->_logger.LogDebug("ANSSAM::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__); optimizedModelFolder = modelFolder; return true; } else { this->_logger.LogFatal("ANSSAM::OptimizeModel", "This model can not be optimized.", __FILE__, __LINE__); optimizedModelFolder = modelFolder; return false; } } else { this->_logger.LogFatal("ANSSAM::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__); optimizedModelFolder = ""; return false; } } bool ANSSAM::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; std::string xmlfile = CreateFilePath(_modelFolder, "anssam.xml"); std::string binaryfile = CreateFilePath(_modelFolder, "anssam.bin"); if (std::filesystem::exists(xmlfile)) { _modelFilePath = xmlfile; this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__); if (FileExist(binaryfile)) { this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__); } else { this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__); return false; } } else { this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__); return false; } if (_modelConfig.modelConfThreshold <= 0)_modelConfig.modelConfThreshold = 0.4; if (_modelConfig.modelMNSThreshold <= 0)_modelConfig.modelMNSThreshold = 0.4; _isInitialized = Init(_modelFilePath, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, true); return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSPOSE::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSSAM::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "anssam"; } std::string xmlFileName = _modelName + ".xml"; std::string binFileName = _modelName + ".bin"; std::string xmlfile = CreateFilePath(_modelFolder, xmlFileName); std::string binaryfile = CreateFilePath(_modelFolder, binFileName); if (std::filesystem::exists(xmlfile)) { _modelFilePath = xmlfile; this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__); if (FileExist(binaryfile)) { this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__); } else { this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__); return false; } } else { this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__); return false; } if (modelConfig.modelConfThreshold <= 0)modelConfig.modelConfThreshold = 0.4; if (modelConfig.modelMNSThreshold <= 0)modelConfig.modelMNSThreshold = 0.4; labelMap = "SAM"; _isInitialized = Init(_modelFilePath, modelConfig.modelConfThreshold, modelConfig.modelMNSThreshold, true); return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSPOSE::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSSAM::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { try { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); _modelConfig = modelConfig; bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; std::string xmlfile = CreateFilePath(_modelFolder, "anssam.xml"); std::string binaryfile = CreateFilePath(_modelFolder, "anssam.bin"); if (std::filesystem::exists(xmlfile)) { _modelFilePath = xmlfile; this->_logger.LogDebug("ANSSAM::Initialize. Loading OpenVINO weight", _modelFilePath, __FILE__, __LINE__); if (FileExist(binaryfile)) { this->_logger.LogDebug("ANSSAM::Initialize binary weight", binaryfile, __FILE__, __LINE__); } else { this->_logger.LogError("ANSSAM::Initialize. Model binary file does not exist", binaryfile, __FILE__, __LINE__); return false; } } else { this->_logger.LogError("ANSPOSE::Initialize. Model file does exist", _modelFilePath, __FILE__, __LINE__); return false; } if (modelConfig.modelConfThreshold <= 0)modelConfig.modelConfThreshold = 0.4; if (modelConfig.modelMNSThreshold <= 0)modelConfig.modelMNSThreshold = 0.4; labelMap = "SAM"; _isInitialized = Init(_modelFilePath, modelConfig.modelConfThreshold, modelConfig.modelMNSThreshold, true); return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSPOSE::Initialize", e.what(), __FILE__, __LINE__); return false; } } std::vector ANSSAM::RunInference(const cv::Mat& input) { return RunInference(input, "CustomCam"); } std::vector ANSSAM::RunInference(const cv::Mat& input,const std::string& camera_id) { if (!PreInferenceCheck("ANSSAM::RunInference")) return {}; try { if (!_licenseValid || !_isInitialized || input.empty() || input.cols < 10 || input.rows < 10) { return {}; } // Convert to BGR cv::Mat processedImage = (input.channels() == 1) ? [&] { cv::Mat tmp; cv::cvtColor(input, tmp, cv::COLOR_GRAY2BGR); return tmp; }() : input; // Run inference ov::Tensor input_tensor = Preprocess(processedImage); if (input_tensor.get_size() == 0) return {}; m_request.set_input_tensor(input_tensor); m_request.infer(); std::vector masks = Postprocess(processedImage); if (masks.empty()) return {}; // Build output std::vector output; output.reserve(masks.size()); const cv::Rect imageBounds(0, 0, input.cols, input.rows); for (auto& mask : masks) { Object obj; obj.extraInfo = MaskToPolygons(mask, obj.box, obj.polygon); obj.box &= imageBounds; if (obj.box.area() == 0) continue; obj.confidence = 1.0f; obj.classId = 0; obj.className = "sam"; obj.cameraId = camera_id; output.push_back(std::move(obj)); } if (_trackerEnabled) { output = ApplyTracking(output, camera_id); if (_stabilizationEnabled) output = StabilizeDetections(output, camera_id); } return output; } catch (const std::exception& e) { _logger.LogFatal("ANSSAM::RunInference", e.what(), __FILE__, __LINE__); return {}; } } ANSSAM::~ANSSAM() { Destroy(); } bool ANSSAM::Destroy() { try { if (FolderExist(_modelFolder)) { DeleteFolder(_modelFolder); } return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSSAM::Destroy", e.what(), __FILE__, __LINE__); return false; } } //private: bool ANSSAM::Init(const std::string& xml_path, float conf, float iou, bool useGpu) { m_conf = conf; m_iou = iou; if (!std::filesystem::exists(xml_path)) return false; m_model = m_core.read_model(xml_path); if (!ParseArgs()) return false; if (!BuildProcessor()) return false; m_compiled_model = m_core.compile_model(m_model, useGpu && IsGpuAvaliable(m_core) ? "GPU" : "CPU"); m_request = m_compiled_model.create_infer_request(); return true; } void ANSSAM::Infer(const std::string& image_path) { try { cv::Mat image = cv::imread(image_path); cv::Mat rendered = Infer(image); std::string savedir = std::filesystem::current_path().string() + "/results"; if (!std::filesystem::exists(savedir)) std::filesystem::create_directory(savedir); std::string savepath = savedir + "/" + std::filesystem::path(image_path).filename().string(); cv::imwrite(savepath, rendered); //slog::info << "result saved in :" << savepath << "\n"; } catch (const std::exception& e) { //slog::info << "Failed to Infer! ec: " << e.what() << '\n'; } } cv::Mat ANSSAM::Infer(const cv::Mat& image) { cv::Mat processedImg = image.clone(); ov::Tensor input_tensor = Preprocess(processedImg); assert(input_tensor.get_size() != 0); m_request.set_input_tensor(input_tensor); m_request.infer(); std::vector result = Postprocess(image); return Render(image, result); } bool ANSSAM::ParseArgs() { try { model_input_shape = m_model->input().get_shape(); model_output0_shape = m_model->output(0).get_shape(); model_output1_shape = m_model->output(1).get_shape(); //slog::info << "xml input shape:" << model_input_shape << "\n"; //slog::info << "xml output shape 0:" << model_output0_shape << "\n"; //slog::info << "xml output shape 1:" << model_output1_shape << "\n"; // [1, 3, 640, 640] input_channel = model_input_shape[1]; input_height = model_input_shape[2]; input_width = model_input_shape[3]; this->input_data.resize(input_channel * input_height * input_height); //slog::info << "model input height:" << input_height << " input width:" << input_width << "\n"; // output0 = [1,37,8400] // output1 = [1,32,160,160] mh = model_output1_shape[2]; mw = model_output1_shape[3]; //slog::info << "model output mh:" << mh << " output mw:" << mw << "\n"; } catch (const std::exception& e) { //slog::info << "Failed to Parse Args. " << e.what() << '\n'; return false; } return true; } void ANSSAM::ScaleBoxes(cv::Mat& box, const cv::Size& oriSize) { float oriWidth = static_cast(oriSize.width); float oriHeight = static_cast(oriSize.height); float* pxvec = box.ptr(0); for (int i = 0; i < box.rows; i++) { pxvec = box.ptr(i); pxvec[0] -= this->dw; pxvec[0] = std::clamp(pxvec[0] * this->ratio, 0.f, oriWidth); pxvec[1] -= this->dh; pxvec[1] = std::clamp(pxvec[1] * this->ratio, 0.f, oriHeight); pxvec[2] -= this->dw; pxvec[2] = std::clamp(pxvec[2] * this->ratio, 0.f, oriWidth); pxvec[3] -= this->dh; pxvec[3] = std::clamp(pxvec[3] * this->ratio, 0.f, oriHeight); } } std::vector ANSSAM::NMS(cv::Mat& prediction, int max_det) { std::vector vreMat; cv::Mat temData = cv::Mat(); prediction = prediction.t(); // [37, 8400] --> [rows:8400, cols:37] float* pxvec = prediction.ptr(0); for (int i = 0; i < prediction.rows; i++) { pxvec = prediction.ptr(i); if (pxvec[4] > m_conf) { temData.push_back(prediction.rowRange(i, i + 1).clone()); } } if (temData.rows == 0) { return vreMat; } cv::Mat box = temData.colRange(0, 4).clone(); // cv::Mat cls = temData.colRange(4, 5).clone(); // cv::Mat mask = temData.colRange(5, temData.cols).clone(); // cv::Mat j = cv::Mat::zeros(cls.size(), CV_32F); cv::Mat dst; cv::hconcat(box, cls, dst); // concat cv::hconcat(dst, j, dst); cv::hconcat(dst, mask, dst); // dst = [box class j mask] concat std::vector scores; std::vector boxes; pxvec = dst.ptr(0); for (int i = 0; i < dst.rows; i++) { pxvec = dst.ptr(i); boxes.push_back(cv::Rect(pxvec[0], pxvec[1], pxvec[2], pxvec[3])); scores.push_back(pxvec[4]); } std::vector indices; cv::dnn::NMSBoxes(boxes, scores, m_conf, m_iou, indices); cv::Mat reMat; for (int i = 0; i < indices.size() && i < max_det; i++) { int index = indices[i]; reMat.push_back(dst.rowRange(index, index + 1).clone()); } box = reMat.colRange(0, 6).clone(); xywh2xyxy(box); mask = reMat.colRange(6, reMat.cols).clone(); vreMat.push_back(box); vreMat.push_back(mask); return vreMat; } void ANSSAM::xywh2xyxy(cv::Mat& box) { float* pxvec = box.ptr(0); for (int i = 0; i < box.rows; i++) { pxvec = box.ptr(i); float w = pxvec[2]; float h = pxvec[3]; float cx = pxvec[0]; float cy = pxvec[1]; pxvec[0] = cx - w / 2; pxvec[1] = cy - h / 2; pxvec[2] = cx + w / 2; pxvec[3] = cy + h / 2; } } cv::Mat ANSSAM::ConvertToBinary(cv::Mat src) { cv::Mat gray, binary; // Check if the image is already in grayscale if (src.channels() == 3) { // Convert from BGR to Grayscale cv::cvtColor(src, gray, cv::COLOR_BGR2GRAY); } else if (src.channels() == 1) { gray = src.clone(); // Use the grayscale image as is } else { std::cerr << "Unsupported number of channels: " << src.channels() << std::endl; return cv::Mat(); // Return an empty matrix on failure } // Convert grayscale to binary cv::threshold(gray, binary, 127, 255, cv::THRESH_BINARY); // Adjust the threshold as needed cv::Mat binaryMask; cv::threshold(binary, binaryMask, 0.5, 255, cv::THRESH_BINARY); binaryMask.convertTo(binaryMask, CV_8UC1); return binaryMask; } std::string ANSSAM::MaskToPolygons(const cv::Mat& image, cv::Rect& boundingBox, std::vector& polygon) { try { // image is boxMask: CV_32FC1 with 0.0 / 1.0 → directly convert cv::Mat mask8u; image.convertTo(mask8u, CV_8UC1, 255.0); // 1.0 → 255, 0.0 → 0 if (mask8u.empty() || mask8u.type() != CV_8UC1) { return ""; } std::vector> contours; std::vector hierarchy; cv::findContours(mask8u, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); if (contours.empty()) { return ""; } // Select the largest contour size_t largestContourIdx = 0; double maxArea = 0.0; for (size_t i = 0; i < contours.size(); ++i) { double area = cv::contourArea(contours[i]); if (area > maxArea) { maxArea = area; largestContourIdx = i; } } // Approximate polygon std::vector approxPolygon; cv::approxPolyDP(contours[largestContourIdx], approxPolygon, 3, true); // Convert to Point2f polygon.clear(); for (const auto& pt : approxPolygon) { polygon.push_back(cv::Point2f(pt.x, pt.y)); } // Calculate bounding box boundingBox = cv::boundingRect(approxPolygon); // Convert polygon to string std::stringstream xCoords, yCoords; for (size_t i = 0; i < polygon.size(); ++i) { if (i > 0) { xCoords << "; "; yCoords << "; "; } xCoords << polygon[i].x; yCoords << polygon[i].y; } std::string formattedString = xCoords.str() + " | " + yCoords.str(); return formattedString; } catch (const std::exception& e) { //slog::info << "Failed to convert mask to polygons: " << e.what() << '\n'; return ""; } } cv::Mat ANSSAM::Render(const cv::Mat& image, const std::vector& vremat) { cv::Mat rendered = image.clone(); for (const auto& mask : vremat) { auto color = RandomColor(); for (int y = 0; y < mask.rows; y++) { const float* mp = mask.ptr(y); uchar* p = rendered.ptr(y); for (int x = 0; x < mask.cols; x++) { if (mp[x] == 1.0) { // ?? p[0] = cv::saturate_cast(p[0] * 0.5 + color[0] * 0.5); p[1] = cv::saturate_cast(p[1] * 0.5 + color[1] * 0.5); p[2] = cv::saturate_cast(p[2] * 0.5 + color[2] * 0.5); } p += 3; } } } return rendered; } ov::Tensor ANSSAM::Preprocess(cv::Mat& image) { if (!ConvertSize(image)) { //slog::info << "failed to Convert Size!\n"; return ov::Tensor(); } if (!ConvertLayout(image)) { //slog::info << "Failed to Convert Layout!\n"; return ov::Tensor(); } return BuildTensor(); } cv::Mat ANSSAM::BuildOutput0() { auto* ptr = m_request.get_output_tensor(0).data(); return cv::Mat(model_output0_shape[1], model_output0_shape[2], CV_32F, ptr); } cv::Mat ANSSAM::BuildOutput1() { auto* ptr = m_request.get_output_tensor(1).data(); return cv::Mat(model_output1_shape[1], model_output1_shape[2] * model_output1_shape[3], CV_32F, ptr); } std::vector ANSSAM::ProcessMaskNative(const cv::Mat& image, cv::Mat& protos, cv::Mat& masks_in, cv::Mat& bboxes, cv::Size shape) { std::vector result; cv::Mat matmulRes = (masks_in * protos).t(); // cv::Mat maskMat = matmulRes.reshape(bboxes.rows, { mh, mw }); std::vector maskChannels; cv::split(maskMat, maskChannels); int scale_dw = this->dw / input_width * mw; int scale_dh = this->dh / input_height * mh; cv::Rect roi(scale_dw, scale_dh, mw - 2 * scale_dw, mh - 2 * scale_dh); float* pxvec = bboxes.ptr(0); for (int i = 0; i < bboxes.rows; i++) { pxvec = bboxes.ptr(i); cv::Mat dest, mask; cv::exp(-maskChannels[i], dest); dest = 1.0 / (1.0 + dest); dest = dest(roi); cv::resize(dest, mask, image.size(), cv::INTER_LINEAR); cv::Rect roi(pxvec[0], pxvec[1], pxvec[2] - pxvec[0], pxvec[3] - pxvec[1]); cv::Mat temmask = mask(roi); cv::Mat boxMask = cv::Mat(image.size(), mask.type(), cv::Scalar(0.0)); float rx = std::max(pxvec[0], 0.0f); float ry = std::max(pxvec[1], 0.0f); for (int y = ry, my = 0; my < temmask.rows; y++, my++) { float* ptemmask = temmask.ptr(my); float* pboxmask = boxMask.ptr(y); for (int x = rx, mx = 0; mx < temmask.cols; x++, mx++) { pboxmask[x] = ptemmask[mx] > 0.5 ? 1.0 : 0.0; } } result.push_back(boxMask); } return result; } std::vector ANSSAM::Postprocess(const cv::Mat& oriImage) { cv::Mat prediction = BuildOutput0(); cv::Mat proto = BuildOutput1(); std::vector remat = NMS(prediction, 100); if (remat.size() < 2) { return std::vector(); } cv::Mat box = remat[0]; cv::Mat mask = remat[1]; ScaleBoxes(box, oriImage.size()); return ProcessMaskNative(oriImage, proto, mask, box, oriImage.size()); } bool ANSSAM::ConvertSize(cv::Mat& image) { float height = static_cast(image.rows); float width = static_cast(image.cols); float r = std::min(input_height / height, input_width / width); int padw = static_cast(std::round(width * r)); // int padh = static_cast(std::round(height * r)); if ((int)width != padw || (int)height != padh) cv::resize(image, image, cv::Size(padw, padh)); float _dw = (input_width - padw) / 2.f; float _dh = (input_height - padh) / 2.f; int top = static_cast(std::round(_dh - 0.1f)); int bottom = static_cast(std::round(_dh + 0.1f)); int left = static_cast(std::round(_dw - 0.1f)); int right = static_cast(std::round(_dw + 0.1f)); cv::copyMakeBorder(image, image, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114)); this->ratio = 1 / r; this->dw = _dw; this->dh = _dh; return true; } bool ANSSAM::ConvertLayout(cv::Mat& image) { int row = image.rows; int col = image.cols; int channels = image.channels(); if (channels != 3) return false; cv::cvtColor(image, image, cv::COLOR_BGR2RGB); for (int c = 0; c < channels; c++) { for (int i = 0; i < row; i++) { for (int j = 0; j < col; j++) { float pix = image.at(i, j)[c]; input_data[c * row * col + i * col + j] = pix / 255.0; } } } return true; } ov::Tensor ANSSAM::BuildTensor() { ov::Shape shape = { 1, static_cast(input_channel), static_cast(input_height), static_cast(input_width) }; return ov::Tensor(ov::element::f32, shape, input_data.data());; } bool ANSSAM::BuildProcessor() { try { m_ppp = std::make_shared(m_model); m_ppp->input().tensor() .set_shape({ 1, input_channel, input_height, input_width }) .set_element_type(ov::element::f32) .set_layout("NCHW") .set_color_format(ov::preprocess::ColorFormat::RGB); m_model = m_ppp->build(); } catch (const std::exception& e) { std::cerr << "Failed to build the model processor!\n" << e.what() << '\n'; return false; } return true; } bool ANSSAM::IsGpuAvaliable(const ov::Core& core) { std::vector avaliableDevice = core.get_available_devices(); auto iter = std::find(avaliableDevice.begin(), avaliableDevice.end(), "GPU"); return iter != avaliableDevice.end(); } cv::Scalar ANSSAM::RandomColor() { int b = rand() % 256; int g = rand() % 256; int r = rand() % 256; return cv::Scalar(b, g, r); } } // std::vector ANSSAM::RunInference(const cv::Mat& input, const std::string& camera_id) { // std::lock_guard lock(_mutex); // std::vector detectionObjects; // try { // if (!_licenseValid) { // this->_logger.LogError("ANSSAM::RunInference", "Invalid License", __FILE__, __LINE__); // return detectionObjects; // } // if (!_isInitialized) { // this->_logger.LogError("ANSSAM::RunInference", "Model is not initialized", __FILE__, __LINE__); // return detectionObjects; // } // if (input.empty()) return detectionObjects; // if ((input.cols < 10) || (input.rows < 10)) return detectionObjects; // // Convert grayscale to 3-channel BGR if needed // cv::Mat processedImage; // if (input.channels() == 1) { // cv::cvtColor(input, processedImage, cv::COLOR_GRAY2BGR); // } // else { // processedImage = input; // } // ov::Tensor input_tensor = Preprocess(processedImage); // assert(input_tensor.get_size() != 0); // m_request.set_input_tensor(input_tensor); // m_request.infer(); // std::vector result = Postprocess(input); // for (int i = 0; i < result.size(); i++) { // Object detectedObject; // cv::Rect rect; // std::vector polygon; // std::string extraInfo; // extraInfo = MaskToPolygons(result.at(i), rect, polygon); // detectedObject.box = rect; // detectedObject.box.x = std::max(detectedObject.box.x, 0); // detectedObject.box.y = std::max(detectedObject.box.y, 0); // detectedObject.box.width = std::min(detectedObject.box.width, input.cols - detectedObject.box.x); // detectedObject.box.height = std::min(detectedObject.box.height, input.rows - detectedObject.box.y); // detectedObject.polygon = polygon; // detectedObject.extraInfo = extraInfo; // detectedObject.confidence = 1.0; // detectedObject.classId = 0; // detectedObject.className = "sam"; // detectedObject.cameraId = camera_id; // detectionObjects.push_back(detectedObject); // } ////EnqueueDetection(detectionObjects,camera_id); // return detectionObjects; // } // catch (std::exception& e) { // detectionObjects.clear(); // this->_logger.LogFatal("ANSSAM::RunInference", e.what(), __FILE__, __LINE__); // return detectionObjects; // } // }