engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp

/*
// Copyright (C) 2020-2024 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/

#include "models/detection_model_retinaface.h"

#include <stddef.h>

#include <algorithm>
#include <cmath>
#include <stdexcept>

#include <opencv2/core.hpp>
#include <openvino/openvino.hpp>

#include <utils/common.hpp>
#include <utils/nms.hpp>

#include "models/internal_model_data.h"
#include "models/results.h"

ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,
                                 float confidenceThreshold,
                                 bool useAutoResize,
                                 float boxIOUThreshold,
                                 const std::string& layout)
    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),  // Default label is "Face"
      shouldDetectMasks(false),
      shouldDetectLandmarks(false),
      boxIOUThreshold(boxIOUThreshold),
      maskThreshold(0.8f),
      landmarkStd(1.0f),
      anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {
    generateAnchorsFpn();
}

void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
    // --------------------------- Configure input & output -------------------------------------------------
    // --------------------------- Prepare input  ------------------------------------------------------
    if (model->inputs().size() != 1) {
        throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");
    }
    const ov::Shape& inputShape = model->input().get_shape();
    const ov::Layout& inputLayout = getInputLayout(model->input());

    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
        throw std::logic_error("Expected 3-channel input");
    }

    ov::preprocess::PrePostProcessor ppp(model);
    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});

    if (useAutoResize) {
        ppp.input().tensor().set_spatial_dynamic_shape();

        ppp.input()
            .preprocess()
            .convert_element_type(ov::element::f32)
            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
    }

    ppp.input().model().set_layout(inputLayout);

    // --------------------------- Reading image input parameters -------------------------------------------
    inputsNames.push_back(model->input().get_any_name());
    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];

    // --------------------------- Prepare output  -----------------------------------------------------

    const ov::OutputVector& outputs = model->outputs();
    if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {
        throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");
    }

    const ov::Layout outputLayout{"NCHW"};
    std::vector<size_t> outputsSizes[OUT_MAX];
    for (const auto& output : model->outputs()) {
        auto outTensorName = output.get_any_name();
        outputsNames.push_back(outTensorName);
        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);

        OutputType type = OUT_MAX;
        if (outTensorName.find("box") != std::string::npos) {
            type = OUT_BOXES;
        } else if (outTensorName.find("cls") != std::string::npos) {
            type = OUT_SCORES;
        } else if (outTensorName.find("landmark") != std::string::npos) {
            type = OUT_LANDMARKS;
            shouldDetectLandmarks = true;
        } else if (outTensorName.find("type") != std::string::npos) {
            type = OUT_MASKSCORES;
            labels.clear();
            labels.push_back("No Mask");
            labels.push_back("Mask");
            shouldDetectMasks = true;
            landmarkStd = 0.2f;
        } else {
            continue;
        }

        size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];
        size_t i = 0;
        for (; i < outputsSizes[type].size(); ++i) {
            if (num < outputsSizes[type][i]) {
                break;
            }
        }
        separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);
        outputsSizes[type].insert(outputsSizes[type].begin() + i, num);
    }
    model = ppp.build();

    for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {
        size_t width = outputsSizes[OUT_BOXES][idx];
        size_t height = outputsSizes[OUT_BOXES][idx];
        auto s = anchorCfg[idx].stride;
        auto anchorNum = anchorsFpn[s].size();

        anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
        for (size_t iw = 0; iw < width; ++iw) {
            size_t sw = iw * s;
            for (size_t ih = 0; ih < height; ++ih) {
                size_t sh = ih * s;
                for (size_t k = 0; k < anchorNum; ++k) {
                    Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];
                    anc.left = anchorsFpn[s][k].left + sw;
                    anc.top = anchorsFpn[s][k].top + sh;
                    anc.right = anchorsFpn[s][k].right + sw;
                    anc.bottom = anchorsFpn[s][k].bottom + sh;
                }
            }
        }
    }
}

std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
    std::vector<Anchor> retVal;
    const auto w = anchor.getWidth();
    const auto h = anchor.getHeight();
    const auto xCtr = anchor.getXCenter();
    const auto yCtr = anchor.getYCenter();

    for (const auto ratio : ratios) {
        const auto size = w * h;
        const auto sizeRatio = static_cast<float>(size) / ratio;
        const auto ws = sqrt(sizeRatio);
        const auto hs = ws * ratio;
        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
    }
    return retVal;
}

std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
    std::vector<Anchor> retVal;
    const auto w = anchor.getWidth();
    const auto h = anchor.getHeight();
    const auto xCtr = anchor.getXCenter();
    const auto yCtr = anchor.getYCenter();

    for (auto scale : scales) {
        const auto ws = w * scale;
        const auto hs = h * scale;
        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
    }
    return retVal;
}

std::vector<Anchor> generateAnchors(const int baseSize,
                                                     const std::vector<int>& ratios,
                                                     const std::vector<int>& scales) {
    Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
    auto ratioAnchors = ratioEnum(baseAnchor, ratios);
    std::vector<Anchor> retVal;

    for (const auto& ra : ratioAnchors) {
        auto addon = scaleEnum(ra, scales);
        retVal.insert(retVal.end(), addon.begin(), addon.end());
    }
    return retVal;
}

void ModelRetinaFace::generateAnchorsFpn() {
    auto cfg = anchorCfg;
    std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {
        return x.stride > y.stride;
    });

    for (const auto& cfgLine : cfg) {
        anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));
    }
}

std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {
    std::vector<size_t> indices;
    indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);
    auto shape = scoresTensor.get_shape();
    size_t restAnchors = shape[1] - anchorNum;
    const float* scoresPtr = scoresTensor.data<float>();

    for (size_t x = anchorNum; x < shape[1]; ++x) {
        for (size_t y = 0; y < shape[2]; ++y) {
            for (size_t z = 0; z < shape[3]; ++z) {
                auto idx = (x * shape[2] + y) * shape[3] + z;
                auto score = scoresPtr[idx];
                if (score >= confidenceThreshold) {
                    indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));
                }
            }
        }
    }

    return indices;
}

void filterScores(std::vector<float>& scores,
                  const std::vector<size_t>& indices,
                  const ov::Tensor& scoresTensor,
                  const int anchorNum) {
    const auto& shape = scoresTensor.get_shape();
    const float* scoresPtr = scoresTensor.data<float>();
    const auto start = shape[2] * shape[3] * anchorNum;

    for (auto i : indices) {
        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
        scores.push_back(scoresPtr[start + offset]);
    }
}

void filterBoxes(std::vector<Anchor>& boxes,
                 const std::vector<size_t>& indices,
                 const ov::Tensor& boxesTensor,
                 int anchorNum,
                 const std::vector<Anchor>& anchors) {
    const auto& shape = boxesTensor.get_shape();
    const float* boxesPtr = boxesTensor.data<float>();
    const auto boxPredLen = shape[1] / anchorNum;
    const auto blockWidth = shape[2] * shape[3];

    for (auto i : indices) {
        auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);

        const auto dx = boxesPtr[offset];
        const auto dy = boxesPtr[offset + blockWidth];
        const auto dw = boxesPtr[offset + blockWidth * 2];
        const auto dh = boxesPtr[offset + blockWidth * 3];

        const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();
        const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();
        const auto predW = exp(dw) * anchors[i].getWidth();
        const auto predH = exp(dh) * anchors[i].getHeight();

        boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),
                         static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),
                         static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),
                         static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});
    }
}

void filterLandmarks(std::vector<cv::Point2f>& landmarks,
                     const std::vector<size_t>& indices,
                     const ov::Tensor& landmarksTensor,
                     int anchorNum,
                     const std::vector<Anchor>& anchors,
                     const float landmarkStd) {
    const auto& shape = landmarksTensor.get_shape();
    const float* landmarksPtr = landmarksTensor.data<float>();
    const auto landmarkPredLen = shape[1] / anchorNum;
    const auto blockWidth = shape[2] * shape[3];

    for (auto i : indices) {
        for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {
            auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;
            auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;
            auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;
            landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),
                                 deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});
        }
    }
}

void filterMasksScores(std::vector<float>& masks,
                       const std::vector<size_t>& indices,
                       const ov::Tensor& maskScoresTensor,
                       const int anchorNum) {
    auto shape = maskScoresTensor.get_shape();
    const float* maskScoresPtr = maskScoresTensor.data<float>();
    auto start = shape[2] * shape[3] * anchorNum * 2;

    for (auto i : indices) {
        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
        masks.push_back(maskScoresPtr[start + offset]);
    }
}

std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {
    std::vector<float> scores;
    scores.reserve(INIT_VECTOR_SIZE);
    std::vector<Anchor> boxes;
    boxes.reserve(INIT_VECTOR_SIZE);
    std::vector<cv::Point2f> landmarks;
    std::vector<float> masks;

    if (shouldDetectLandmarks) {
        landmarks.reserve(INIT_VECTOR_SIZE);
    }
    if (shouldDetectMasks) {
        masks.reserve(INIT_VECTOR_SIZE);
    }

    // --------------------------- Gather & Filter output from all levels
    // ----------------------------------------------------------
    for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {
        const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];
        const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];
        auto s = anchorCfg[idx].stride;
        auto anchorNum = anchorsFpn[s].size();

        auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);
        filterScores(scores, validIndices, scoresRaw, anchorNum);
        filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);
        if (shouldDetectLandmarks) {
            const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];
            filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);
        }
        if (shouldDetectMasks) {
            const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];
            filterMasksScores(masks, validIndices, masksRaw, anchorNum);
        }
    }
    // --------------------------- Apply Non-maximum Suppression
    // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if
    // true - boundaries are included in areas calculation
    const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);

    // --------------------------- Create detection result objects
    // --------------------------------------------------------
    RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);

    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
    const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;
    const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;

    result->objects.reserve(keep.size());
    result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);
    for (auto i : keep) {
        DetectedObject desc;
        desc.confidence = scores[i];
        //--- Scaling coordinates
        boxes[i].left /= scaleX;
        boxes[i].top /= scaleY;
        boxes[i].right /= scaleX;
        boxes[i].bottom /= scaleY;

        desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
        desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
        desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
        desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
        //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask
        desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;
        desc.label = labels[desc.labelID];
        result->objects.push_back(desc);

        //--- Scaling landmarks coordinates
        for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {
            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =
                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));
            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =
                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));
            result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);
        }
    }

    return std::unique_ptr<ResultBase>(result);
}
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`/*`
			`// Copyright (C) 2020-2024 Intel Corporation`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`*/`

			`#include "models/detection_model_retinaface.h"`

			`#include <stddef.h>`

			`#include <algorithm>`
			`#include <cmath>`
			`#include <stdexcept>`

			`#include <opencv2/core.hpp>`
			`#include <openvino/openvino.hpp>`

			`#include <utils/common.hpp>`
			`#include <utils/nms.hpp>`

			`#include "models/internal_model_data.h"`
			`#include "models/results.h"`

			`ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,`
			`float confidenceThreshold,`
			`bool useAutoResize,`
			`float boxIOUThreshold,`
			`const std::string& layout)`
			`: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face"`
			`shouldDetectMasks(false),`
			`shouldDetectLandmarks(false),`
			`boxIOUThreshold(boxIOUThreshold),`
			`maskThreshold(0.8f),`
			`landmarkStd(1.0f),`
			`anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {`
			`generateAnchorsFpn();`
			`}`

			`void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {`
			`// --------------------------- Configure input & output -------------------------------------------------`
			`// --------------------------- Prepare input ------------------------------------------------------`
			`if (model->inputs().size() != 1) {`
			`throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");`
			`}`
			`const ov::Shape& inputShape = model->input().get_shape();`
			`const ov::Layout& inputLayout = getInputLayout(model->input());`

			`if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {`
			`throw std::logic_error("Expected 3-channel input");`
			`}`

			`ov::preprocess::PrePostProcessor ppp(model);`
			`ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});`

			`if (useAutoResize) {`
			`ppp.input().tensor().set_spatial_dynamic_shape();`

			`ppp.input()`
			`.preprocess()`
			`.convert_element_type(ov::element::f32)`
			`.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);`
			`}`

			`ppp.input().model().set_layout(inputLayout);`

			`// --------------------------- Reading image input parameters -------------------------------------------`
			`inputsNames.push_back(model->input().get_any_name());`
			`netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];`
			`netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];`

			`// --------------------------- Prepare output -----------------------------------------------------`

			`const ov::OutputVector& outputs = model->outputs();`
			`if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {`
			`throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");`
			`}`

			`const ov::Layout outputLayout{"NCHW"};`
			`std::vector<size_t> outputsSizes[OUT_MAX];`
			`for (const auto& output : model->outputs()) {`
			`auto outTensorName = output.get_any_name();`
			`outputsNames.push_back(outTensorName);`
			`ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);`

			`OutputType type = OUT_MAX;`
			`if (outTensorName.find("box") != std::string::npos) {`
			`type = OUT_BOXES;`
			`} else if (outTensorName.find("cls") != std::string::npos) {`
			`type = OUT_SCORES;`
			`} else if (outTensorName.find("landmark") != std::string::npos) {`
			`type = OUT_LANDMARKS;`
			`shouldDetectLandmarks = true;`
			`} else if (outTensorName.find("type") != std::string::npos) {`
			`type = OUT_MASKSCORES;`
			`labels.clear();`
			`labels.push_back("No Mask");`
			`labels.push_back("Mask");`
			`shouldDetectMasks = true;`
			`landmarkStd = 0.2f;`
			`} else {`
			`continue;`
			`}`

			`size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];`
			`size_t i = 0;`
			`for (; i < outputsSizes[type].size(); ++i) {`
			`if (num < outputsSizes[type][i]) {`
			`break;`
			`}`
			`}`
			`separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);`
			`outputsSizes[type].insert(outputsSizes[type].begin() + i, num);`
			`}`
			`model = ppp.build();`

			`for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {`
			`size_t width = outputsSizes[OUT_BOXES][idx];`
			`size_t height = outputsSizes[OUT_BOXES][idx];`
			`auto s = anchorCfg[idx].stride;`
			`auto anchorNum = anchorsFpn[s].size();`

			`anchors.push_back(std::vector<Anchor>(height * width * anchorNum));`
			`for (size_t iw = 0; iw < width; ++iw) {`
			`size_t sw = iw * s;`
			`for (size_t ih = 0; ih < height; ++ih) {`
			`size_t sh = ih * s;`
			`for (size_t k = 0; k < anchorNum; ++k) {`
			`Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];`
			`anc.left = anchorsFpn[s][k].left + sw;`
			`anc.top = anchorsFpn[s][k].top + sh;`
			`anc.right = anchorsFpn[s][k].right + sw;`
			`anc.bottom = anchorsFpn[s][k].bottom + sh;`
			`}`
			`}`
			`}`
			`}`
			`}`

			`std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {`
			`std::vector<Anchor> retVal;`
			`const auto w = anchor.getWidth();`
			`const auto h = anchor.getHeight();`
			`const auto xCtr = anchor.getXCenter();`
			`const auto yCtr = anchor.getYCenter();`

			`for (const auto ratio : ratios) {`
			`const auto size = w * h;`
			`const auto sizeRatio = static_cast<float>(size) / ratio;`
			`const auto ws = sqrt(sizeRatio);`
			`const auto hs = ws * ratio;`
			`retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),`
			`static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),`
			`static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),`
			`static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});`
			`}`
			`return retVal;`
			`}`

			`std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {`
			`std::vector<Anchor> retVal;`
			`const auto w = anchor.getWidth();`
			`const auto h = anchor.getHeight();`
			`const auto xCtr = anchor.getXCenter();`
			`const auto yCtr = anchor.getYCenter();`

			`for (auto scale : scales) {`
			`const auto ws = w * scale;`
			`const auto hs = h * scale;`
			`retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),`
			`static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),`
			`static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),`
			`static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});`
			`}`
			`return retVal;`
			`}`

			`std::vector<Anchor> generateAnchors(const int baseSize,`
			`const std::vector<int>& ratios,`
			`const std::vector<int>& scales) {`
			`Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};`
			`auto ratioAnchors = ratioEnum(baseAnchor, ratios);`
			`std::vector<Anchor> retVal;`

			`for (const auto& ra : ratioAnchors) {`
			`auto addon = scaleEnum(ra, scales);`
			`retVal.insert(retVal.end(), addon.begin(), addon.end());`
			`}`
			`return retVal;`
			`}`

			`void ModelRetinaFace::generateAnchorsFpn() {`
			`auto cfg = anchorCfg;`
			`std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {`
			`return x.stride > y.stride;`
			`});`

			`for (const auto& cfgLine : cfg) {`
			`anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));`
			`}`
			`}`

			`std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {`
			`std::vector<size_t> indices;`
			`indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);`
			`auto shape = scoresTensor.get_shape();`
			`size_t restAnchors = shape[1] - anchorNum;`
			`const float* scoresPtr = scoresTensor.data<float>();`

			`for (size_t x = anchorNum; x < shape[1]; ++x) {`
			`for (size_t y = 0; y < shape[2]; ++y) {`
			`for (size_t z = 0; z < shape[3]; ++z) {`
			`auto idx = (x * shape[2] + y) * shape[3] + z;`
			`auto score = scoresPtr[idx];`
			`if (score >= confidenceThreshold) {`
			`indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));`
			`}`
			`}`
			`}`
			`}`

			`return indices;`
			`}`

			`void filterScores(std::vector<float>& scores,`
			`const std::vector<size_t>& indices,`
			`const ov::Tensor& scoresTensor,`
			`const int anchorNum) {`
			`const auto& shape = scoresTensor.get_shape();`
			`const float* scoresPtr = scoresTensor.data<float>();`
			`const auto start = shape[2] * shape[3] * anchorNum;`

			`for (auto i : indices) {`
			`auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;`
			`scores.push_back(scoresPtr[start + offset]);`
			`}`
			`}`

			`void filterBoxes(std::vector<Anchor>& boxes,`
			`const std::vector<size_t>& indices,`
			`const ov::Tensor& boxesTensor,`
			`int anchorNum,`
			`const std::vector<Anchor>& anchors) {`
			`const auto& shape = boxesTensor.get_shape();`
			`const float* boxesPtr = boxesTensor.data<float>();`
			`const auto boxPredLen = shape[1] / anchorNum;`
			`const auto blockWidth = shape[2] * shape[3];`

			`for (auto i : indices) {`
			`auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);`

			`const auto dx = boxesPtr[offset];`
			`const auto dy = boxesPtr[offset + blockWidth];`
			`const auto dw = boxesPtr[offset + blockWidth * 2];`
			`const auto dh = boxesPtr[offset + blockWidth * 3];`

			`const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();`
			`const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();`
			`const auto predW = exp(dw) * anchors[i].getWidth();`
			`const auto predH = exp(dh) * anchors[i].getHeight();`

			`boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),`
			`static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),`
			`static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),`
			`static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});`
			`}`
			`}`

			`void filterLandmarks(std::vector<cv::Point2f>& landmarks,`
			`const std::vector<size_t>& indices,`
			`const ov::Tensor& landmarksTensor,`
			`int anchorNum,`
			`const std::vector<Anchor>& anchors,`
			`const float landmarkStd) {`
			`const auto& shape = landmarksTensor.get_shape();`
			`const float* landmarksPtr = landmarksTensor.data<float>();`
			`const auto landmarkPredLen = shape[1] / anchorNum;`
			`const auto blockWidth = shape[2] * shape[3];`

			`for (auto i : indices) {`
			`for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {`
			`auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;`
			`auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;`
			`auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;`
			`landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),`
			`deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});`
			`}`
			`}`
			`}`

			`void filterMasksScores(std::vector<float>& masks,`
			`const std::vector<size_t>& indices,`
			`const ov::Tensor& maskScoresTensor,`
			`const int anchorNum) {`
			`auto shape = maskScoresTensor.get_shape();`
			`const float* maskScoresPtr = maskScoresTensor.data<float>();`
			`auto start = shape[2] * shape[3] * anchorNum * 2;`

			`for (auto i : indices) {`
			`auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;`
			`masks.push_back(maskScoresPtr[start + offset]);`
			`}`
			`}`

			`std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {`
			`std::vector<float> scores;`
			`scores.reserve(INIT_VECTOR_SIZE);`
			`std::vector<Anchor> boxes;`
			`boxes.reserve(INIT_VECTOR_SIZE);`
			`std::vector<cv::Point2f> landmarks;`
			`std::vector<float> masks;`

			`if (shouldDetectLandmarks) {`
			`landmarks.reserve(INIT_VECTOR_SIZE);`
			`}`
			`if (shouldDetectMasks) {`
			`masks.reserve(INIT_VECTOR_SIZE);`
			`}`

			`// --------------------------- Gather & Filter output from all levels`
			`// ----------------------------------------------------------`
			`for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {`
			`const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];`
			`const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];`
			`auto s = anchorCfg[idx].stride;`
			`auto anchorNum = anchorsFpn[s].size();`

			`auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);`
			`filterScores(scores, validIndices, scoresRaw, anchorNum);`
			`filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);`
			`if (shouldDetectLandmarks) {`
			`const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];`
			`filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);`
			`}`
			`if (shouldDetectMasks) {`
			`const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];`
			`filterMasksScores(masks, validIndices, masksRaw, anchorNum);`
			`}`
			`}`
			`// --------------------------- Apply Non-maximum Suppression`
			`// ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if`
			`// true - boundaries are included in areas calculation`
			`const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);`

			`// --------------------------- Create detection result objects`
			`// --------------------------------------------------------`
			`RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);`

			`const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;`
			`const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;`
			`const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;`
			`const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;`

			`result->objects.reserve(keep.size());`
			`result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);`
			`for (auto i : keep) {`
			`DetectedObject desc;`
			`desc.confidence = scores[i];`
			`//--- Scaling coordinates`
			`boxes[i].left /= scaleX;`
			`boxes[i].top /= scaleY;`
			`boxes[i].right /= scaleX;`
			`boxes[i].bottom /= scaleY;`

			`desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));`
			`desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));`
			`desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));`
			`desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));`
			`//--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask`
			`desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;`
			`desc.label = labels[desc.labelID];`
			`result->objects.push_back(desc);`

			`//--- Scaling landmarks coordinates`
			`for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {`
			`landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =`
			`clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));`
			`landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =`
			`clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));`
			`result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);`
			`}`
			`}`

			`return std::unique_ptr<ResultBase>(result);`
			`}`