Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/engines/OpenVINOEngine/src/models/associative_embedding_decoder.cpp
+++ b/engines/OpenVINOEngine/src/models/associative_embedding_decoder.cpp
@@ -0,0 +1,201 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/associative_embedding_decoder.h"
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include <utils/kuhn_munkres.hpp>
+
+void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
+               const std::vector<cv::Mat>& aembdsMaps,
+               std::vector<std::vector<Peak>>& allPeaks,
+               size_t jointId,
+               size_t maxNumPeople,
+               float detectionThreshold) {
+    const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId];
+    const float* heatMapData = nmsHeatMap.ptr<float>();
+    cv::Size outputSize = nmsHeatMap.size();
+
+    std::vector<int> indices(outputSize.area());
+    std::iota(std::begin(indices), std::end(indices), 0);
+    std::partial_sort(std::begin(indices),
+                      std::begin(indices) + maxNumPeople,
+                      std::end(indices),
+                      [heatMapData](int l, int r) {
+                          return heatMapData[l] > heatMapData[r];
+                      });
+
+    for (size_t personId = 0; personId < maxNumPeople; personId++) {
+        int index = indices[personId];
+        int x = index / outputSize.width;
+        int y = index % outputSize.width;
+        float tag = aembdsMaps[jointId].at<float>(x, y);
+        float score = heatMapData[index];
+        allPeaks[jointId].reserve(maxNumPeople);
+        if (score > detectionThreshold) {
+            allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast<float>(x), static_cast<float>(y)), score, tag});
+        }
+    }
+}
+
+std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
+                             size_t maxNumPeople,
+                             size_t numJoints,
+                             float tagThreshold) {
+    size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16};
+    std::vector<Pose> allPoses;
+    for (size_t jointId : jointOrder) {
+        std::vector<Peak>& jointPeaks = allPeaks[jointId];
+        std::vector<float> tags;
+        for (auto& peak : jointPeaks) {
+            tags.push_back(peak.tag);
+        }
+        if (allPoses.empty()) {
+            for (size_t personId = 0; personId < jointPeaks.size(); personId++) {
+                Peak peak = jointPeaks[personId];
+                Pose pose = Pose(numJoints);
+                pose.add(jointId, peak);
+                allPoses.push_back(pose);
+            }
+            continue;
+        }
+        if (jointPeaks.empty() || (allPoses.size() == maxNumPeople)) {
+            continue;
+        }
+        std::vector<float> posesTags;
+        std::vector<cv::Point2f> posesCenters;
+        for (auto& pose : allPoses) {
+            posesTags.push_back(pose.getPoseTag());
+            posesCenters.push_back(pose.getPoseCenter());
+        }
+        size_t numAdded = tags.size();
+        size_t numGrouped = posesTags.size();
+        cv::Mat tagsDiff(numAdded, numGrouped, CV_32F);
+        cv::Mat matchingCost(numAdded, numGrouped, CV_32F);
+        std::vector<float> dists(numAdded);
+        for (size_t j = 0; j < numGrouped; j++) {
+            float minDist = std::numeric_limits<float>::max();
+            // Compute euclidean distance (in spatial space) between the pose center and all joints.
+            const cv::Point2f center = posesCenters.at(j);
+            for (size_t i = 0; i < numAdded; i++) {
+                cv::Point2f v = jointPeaks.at(i).keypoint - center;
+                float dist = std::sqrt(v.x * v.x + v.y * v.y);
+                dists[i] = dist;
+                minDist = std::min(dist, minDist);
+            }
+            // Compute semantic distance (in embedding space) between the pose tag and all joints
+            // and corresponding matching costs.
+            auto poseTag = posesTags[j];
+            for (size_t i = 0; i < numAdded; i++) {
+                float diff = static_cast<float>(cv::norm(tags[i] - poseTag));
+                tagsDiff.at<float>(i, j) = diff;
+                if (diff < tagThreshold) {
+                    diff *= dists[i] / (minDist + 1e-10f);
+                }
+                matchingCost.at<float>(i, j) = std::round(diff) * 100 - jointPeaks[i].score;
+            }
+        }
+
+        if (numAdded > numGrouped) {
+            cv::copyMakeBorder(matchingCost,
+                               matchingCost,
+                               0,
+                               0,
+                               0,
+                               numAdded - numGrouped,
+                               cv::BORDER_CONSTANT,
+                               10000000);
+        }
+        // Get pairs
+        auto res = KuhnMunkres().Solve(matchingCost);
+        for (size_t row = 0; row < res.size(); row++) {
+            size_t col = res[row];
+            if (row < numAdded && col < numGrouped && tagsDiff.at<float>(row, col) < tagThreshold) {
+                allPoses[col].add(jointId, jointPeaks[row]);
+            } else {
+                Pose pose = Pose(numJoints);
+                pose.add(jointId, jointPeaks[row]);
+                allPoses.push_back(pose);
+            }
+        }
+    }
+    return allPoses;
+}
+
+namespace {
+cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) {
+    cv::Point2f delta(0.f, 0.f);
+    int width = heatMap.cols;
+    int height = heatMap.rows;
+    if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) {
+        auto diffX = heatMap.at<float>(y, x + 1) - heatMap.at<float>(y, x - 1);
+        auto diffY = heatMap.at<float>(y + 1, x) - heatMap.at<float>(y - 1, x);
+        delta.x = diffX > 0 ? 0.25f : -0.25f;
+        delta.y = diffY > 0 ? 0.25f : -0.25f;
+    }
+    return delta;
+}
+}  // namespace
+
+void adjustAndRefine(std::vector<Pose>& allPoses,
+                     const std::vector<cv::Mat>& heatMaps,
+                     const std::vector<cv::Mat>& aembdsMaps,
+                     int poseId,
+                     const float delta) {
+    Pose& pose = allPoses[poseId];
+    float poseTag = pose.getPoseTag();
+    for (size_t jointId = 0; jointId < pose.size(); jointId++) {
+        Peak& peak = pose.getPeak(jointId);
+        const cv::Mat& heatMap = heatMaps[jointId];
+        const cv::Mat& aembds = aembdsMaps[jointId];
+
+        if (peak.score > 0) {
+            // Adjust
+            int x = static_cast<int>(peak.keypoint.x);
+            int y = static_cast<int>(peak.keypoint.y);
+            peak.keypoint += adjustLocation(x, y, heatMap);
+            if (delta) {
+                peak.keypoint.x += delta;
+                peak.keypoint.y += delta;
+            }
+        } else {
+            // Refine
+            // Get position with the closest tag value to the pose tag
+            cv::Mat diff = cv::abs(aembds - poseTag);
+            diff.convertTo(diff, CV_32S, 1.0, 0.0);
+            diff.convertTo(diff, CV_32F);
+            diff -= heatMap;
+            double min;
+            cv::Point2i minLoc;
+            cv::minMaxLoc(diff, &min, 0, &minLoc);
+            int x = minLoc.x;
+            int y = minLoc.y;
+            float val = heatMap.at<float>(y, x);
+            if (val > 0) {
+                peak.keypoint.x = static_cast<float>(x);
+                peak.keypoint.y = static_cast<float>(y);
+                peak.keypoint += adjustLocation(x, y, heatMap);
+                // Peak score is assigned directly, so it does not affect the pose score.
+                peak.score = val;
+            }
+        }
+    }
+}
--- a/engines/OpenVINOEngine/src/models/classification_model.cpp
+++ b/engines/OpenVINOEngine/src/models/classification_model.cpp
@@ -0,0 +1,196 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/classification_model.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iterator>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/op/softmax.hpp>
+#include <openvino/op/topk.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/slog.hpp>
+
+#include "models/results.h"
+
+ClassificationModel::ClassificationModel(const std::string& modelFileName,
+                                         size_t nTop,
+                                         bool useAutoResize,
+                                         const std::vector<std::string>& labels,
+                                         const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout),
+      nTop(nTop),
+      labels(labels) {}
+
+std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
+    const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
+    const int* indicesPtr = indicesTensor.data<int>();
+    const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    result->topLabels.reserve(scoresTensor.get_size());
+    for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
+        int ind = indicesPtr[i];
+        if (ind < 0 || ind >= static_cast<int>(labels.size())) {
+            throw std::runtime_error("Invalid index for the class label is found during postprocessing");
+        }
+        result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]);
+    }
+
+    return retVal;
+}
+
+std::vector<std::string> ClassificationModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labels;
+
+    /* Read labels */
+    std::ifstream inputFile(labelFilename);
+    if (!inputFile.is_open())
+        throw std::runtime_error("Can't open the labels file: " + labelFilename);
+    std::string labelsLine;
+    while (std::getline(inputFile, labelsLine)) {
+        size_t labelBeginIdx = labelsLine.find(' ');
+        size_t labelEndIdx = labelsLine.find(',');  // can be npos when class has only one label
+        if (labelBeginIdx == std::string::npos) {
+            throw std::runtime_error("The labels file has incorrect format.");
+        }
+        labels.push_back(labelsLine.substr(labelBeginIdx + 1, labelEndIdx - (labelBeginIdx + 1)));
+    }
+    if (labels.empty())
+        throw std::logic_error("File is empty: " + labelFilename);
+
+    return labels;
+}
+
+void ClassificationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Classification model wrapper supports topologies with only 1 input");
+    }
+    const auto& input = model->input();
+    inputsNames.push_back(input.get_any_name());
+
+    const ov::Shape& inputShape = input.get_shape();
+    const ov::Layout& inputLayout = getInputLayout(input);
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    const auto width = inputShape[ov::layout::width_idx(inputLayout)];
+    const auto height = inputShape[ov::layout::height_idx(inputLayout)];
+    if (height != width) {
+        throw std::logic_error("Model input has incorrect image shape. Must be NxN square."
+                               " Got " +
+                               std::to_string(height) + "x" + std::to_string(width) + ".");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({ "NHWC" });
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Classification model wrapper supports topologies with only 1 output");
+    }
+
+    const ov::Shape& outputShape = model->output().get_shape();
+    if (outputShape.size() != 2 && outputShape.size() != 4) {
+        throw std::logic_error("Classification model wrapper supports topologies only with"
+                               " 2-dimensional or 4-dimensional output");
+    }
+
+    const ov::Layout outputLayout("NCHW");
+    if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 ||
+                                    outputShape[ov::layout::width_idx(outputLayout)] != 1)) {
+        throw std::logic_error("Classification model wrapper supports topologies only"
+                               " with 4-dimensional output which has last two dimensions of size 1");
+    }
+
+    size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)];
+    if (nTop > classesNum) {
+        throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " +
+                               std::to_string(nTop) + " labels are requested to be predicted");
+    }
+    if (classesNum == labels.size() + 1) {
+        labels.insert(labels.begin(), "other");
+        slog::warn << "Inserted 'other' label as first." << slog::endl;
+    } else if (classesNum != labels.size()) {
+        throw std::logic_error("Model's number of classes and parsed labels must match (" +
+                               std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')');
+    }
+
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    // --------------------------- Adding softmax and topK output  ---------------------------
+    auto nodes = model->get_ops();
+    auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr<ov::Node>& op) {
+        return std::string(op->get_type_name()) == "Softmax";
+    });
+
+    std::shared_ptr<ov::Node> softmaxNode;
+    if (softmaxNodeIt == nodes.end()) {
+        auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node();
+        softmaxNode = std::make_shared<ov::op::v1::Softmax>(logitsNode->output(0), 1);
+    } else {
+        softmaxNode = *softmaxNodeIt;
+    }
+    const auto k = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, std::vector<size_t>{nTop});
+    std::shared_ptr<ov::Node> topkNode = std::make_shared<ov::op::v3::TopK>(softmaxNode,
+                                                                            k,
+                                                                            1,
+                                                                            ov::op::v3::TopK::Mode::MAX,
+                                                                            ov::op::v3::TopK::SortType::SORT_VALUES);
+
+    auto indices = std::make_shared<ov::op::v0::Result>(topkNode->output(0));
+    auto scores = std::make_shared<ov::op::v0::Result>(topkNode->output(1));
+    ov::ResultVector res({scores, indices});
+    model = std::make_shared<ov::Model>(res, model->get_parameters(), "classification");
+
+    // manually set output tensors name for created topK node
+    model->outputs()[0].set_names({"indices"});
+    outputsNames.push_back("indices");
+    model->outputs()[1].set_names({"scores"});
+    outputsNames.push_back("scores");
+
+    // set output precisions
+    ppp = ov::preprocess::PrePostProcessor(model);
+    ppp.output("indices").tensor().set_element_type(ov::element::i32);
+    ppp.output("scores").tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+}
--- a/engines/OpenVINOEngine/src/models/detection_model.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model.cpp
@@ -0,0 +1,54 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model.h"
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+DetectionModel::DetectionModel() {
+}
+DetectionModel::DetectionModel(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               bool useAutoResize,
+                               const std::vector<std::string>& labels,
+                               const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout),
+      confidenceThreshold(confidenceThreshold),
+      labels(labels) {}
+
+std::vector<std::string> DetectionModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labelsList;
+
+    /* Read labels (if any) */
+    if (!labelFilename.empty()) {
+        std::ifstream inputFile(labelFilename);
+        if (!inputFile.is_open())
+            throw std::runtime_error("Can't open the labels file: " + labelFilename);
+        std::string label;
+        while (std::getline(inputFile, label)) {
+            labelsList.push_back(label);
+        }
+        if (labelsList.empty())
+            throw std::logic_error("File is empty: " + labelFilename);
+    }
+
+    return labelsList;
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
@@ -0,0 +1,302 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_centernet.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <utility>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelCenterNet::ModelCenterNet(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               const std::vector<std::string>& labels,
+                               const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {}
+
+void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("CenterNet model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 3) {
+        throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs");
+    }
+
+    const ov::Layout outLayout{"NCHW"};
+    for (const auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout);
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+    model = ppp.build();
+}
+
+cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) {
+    float sn = sinf(rotRadius);
+    float cs = cosf(rotRadius);
+
+    cv::Point2f srcResult(0.0f, 0.0f);
+    srcResult.x = srcPoint.x * cs - srcPoint.y * sn;
+    srcResult.y = srcPoint.x * sn + srcPoint.y * cs;
+
+    return srcResult;
+}
+
+cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) {
+    cv::Point2f direct = a - b;
+    return b + cv::Point2f(-direct.y, direct.x);
+}
+
+cv::Mat getAffineTransform(float centerX,
+                           float centerY,
+                           int srcW,
+                           float rot,
+                           size_t outputWidth,
+                           size_t outputHeight,
+                           bool inv = false) {
+    float rotRad = static_cast<float>(CV_PI) * rot / 180.0f;
+    auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad);
+    cv::Point2f dstDir(0.0f, -0.5f * outputWidth);
+    std::vector<cv::Point2f> src(3, {0.0f, 0.0f});
+    std::vector<cv::Point2f> dst(3, {0.0f, 0.0f});
+
+    src[0] = {centerX, centerY};
+    src[1] = srcDir + src[0];
+    src[2] = get3rdPoint(src[0], src[1]);
+
+    dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f};
+    dst[1] = dst[0] + dstDir;
+    dst[2] = get3rdPoint(dst[0], dst[1]);
+
+    cv::Mat trans;
+    if (inv) {
+        trans = cv::getAffineTransform(dst, src);
+    } else {
+        trans = cv::getAffineTransform(src, dst);
+    }
+
+    return trans;
+}
+
+std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    auto& img = inputData.asRef<ImageInputData>().inputImage;
+    const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX);
+
+    request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg)));
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) {
+    std::vector<std::pair<size_t, float>> scores;
+    scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE);
+    auto chSize = shape[2] * shape[3];
+
+    for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) {
+        scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i]));
+    }
+
+    for (size_t ch = 0; ch < shape[1]; ++ch) {
+        for (size_t w = 0; w < shape[2]; ++w) {
+            for (size_t h = 0; h < shape[3]; ++h) {
+                float max = scoresPtr[chSize * ch + shape[2] * w + h];
+
+                // ---------------------  filter on threshold--------------------------------------
+                if (max < threshold) {
+                    continue;
+                }
+
+                // ---------------------  store index and score------------------------------------
+                scores.push_back({chSize * ch + shape[2] * w + h, max});
+
+                bool next = true;
+                // ---------------------- maxpool2d -----------------------------------------------
+                for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) {
+                    for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) {
+                        if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) {
+                            if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) {
+                                scores.pop_back();
+                                next = false;
+                                break;
+                            }
+                        } else {
+                            if (max < 0) {
+                                scores.pop_back();
+                                next = false;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return scores;
+}
+
+static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) {
+    auto shape = scoresTensor.get_shape();
+    float* scoresPtr = scoresTensor.data<float>();
+
+    return nms(scoresPtr, shape, threshold);
+}
+
+std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor,
+                                               const std::vector<std::pair<size_t, float>>& scores,
+                                               size_t chSize) {
+    const float* regPtr = regressionTensor.data<float>();
+    std::vector<std::pair<float, float>> reg;
+
+    for (auto s : scores) {
+        reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]});
+    }
+
+    return reg;
+}
+
+std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor,
+                                              const std::vector<std::pair<size_t, float>>& scores,
+                                              size_t chSize) {
+    const float* whPtr = whTensor.data<float>();
+    std::vector<std::pair<float, float>> wh;
+
+    for (auto s : scores) {
+        wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]});
+    }
+
+    return wh;
+}
+
+std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores,
+                                            const std::vector<std::pair<float, float>>& reg,
+                                            const std::vector<std::pair<float, float>>& wh,
+                                            const ov::Shape& shape) {
+    std::vector<ModelCenterNet::BBox> boxes(scores.size());
+
+    for (size_t i = 0; i < boxes.size(); ++i) {
+        size_t chIdx = scores[i].first % (shape[2] * shape[3]);
+        auto xCenter = chIdx % shape[3];
+        auto yCenter = chIdx / shape[3];
+
+        boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f;
+        boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f;
+        boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f;
+        boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f;
+    }
+
+    return boxes;
+}
+
+void transform(std::vector<ModelCenterNet::BBox>& boxes,
+               const ov::Shape& shape,
+               int scale,
+               float centerX,
+               float centerY) {
+    cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true);
+
+    for (auto& b : boxes) {
+        ModelCenterNet::BBox newbb;
+
+        newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2);
+        newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2);
+        newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2);
+        newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2);
+
+        b = newbb;
+    }
+}
+
+std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) {
+    // --------------------------- Filter data and get valid indices ---------------------------------
+    const auto& heatmapTensor = infResult.outputsData[outputsNames[0]];
+    const auto& heatmapTensorShape = heatmapTensor.get_shape();
+    const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3];
+    const auto scores = filterScores(heatmapTensor, confidenceThreshold);
+
+    const auto& regressionTensor = infResult.outputsData[outputsNames[1]];
+    const auto reg = filterReg(regressionTensor, scores, chSize);
+
+    const auto& whTensor = infResult.outputsData[outputsNames[2]];
+    const auto wh = filterWH(whTensor, scores, chSize);
+
+    // --------------------------- Calculate bounding boxes & apply inverse affine transform ----------
+    auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape);
+
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const auto scale = std::max(imgWidth, imgHeight);
+    const float centerX = imgWidth / 2.0f;
+    const float centerY = imgHeight / 2.0f;
+
+    transform(boxes, heatmapTensorShape, scale, centerX, centerY);
+
+    // --------------------------- Create detection result objects ------------------------------------
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+    result->objects.reserve(scores.size());
+    for (size_t i = 0; i < scores.size(); ++i) {
+        DetectedObject desc;
+        desc.confidence = scores[i].second;
+        desc.labelID = scores[i].first / chSize;
+        desc.label = getLabelName(desc.labelID);
+        desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+
+        result->objects.push_back(desc);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
@@ -0,0 +1,261 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_faceboxes.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               bool useAutoResize,
+                               float boxIOUThreshold,
+                               const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),
+      maxProposalsCount(0),
+      boxIOUThreshold(boxIOUThreshold),
+      variance({0.1f, 0.2f}),
+      steps({32, 64, 128}),
+      minSizes({{32, 64, 128}, {256}, {512}}) {}
+
+void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 2) {
+        throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs");
+    }
+
+    const ov::Layout outputLayout{"CHW"};
+    maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)];
+    for (const auto& output : model->outputs()) {
+        const auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+    model = ppp.build();
+
+    // --------------------------- Calculating anchors ----------------------------------------------------
+    std::vector<std::pair<size_t, size_t>> featureMaps;
+    for (auto s : steps) {
+        featureMaps.push_back({netInputHeight / s, netInputWidth / s});
+    }
+
+    priorBoxes(featureMaps);
+}
+
+void calculateAnchors(std::vector<Anchor>& anchors,
+                      const std::vector<float>& vx,
+                      const std::vector<float>& vy,
+                      const int minSize,
+                      const int step) {
+    float skx = static_cast<float>(minSize);
+    float sky = static_cast<float>(minSize);
+
+    std::vector<float> dense_cx, dense_cy;
+
+    for (auto x : vx) {
+        dense_cx.push_back(x * step);
+    }
+
+    for (auto y : vy) {
+        dense_cy.push_back(y * step);
+    }
+
+    for (auto cy : dense_cy) {
+        for (auto cx : dense_cx) {
+            anchors.push_back(
+                {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky});  // left top right bottom
+        }
+    }
+}
+
+void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
+                               const int fx,
+                               const int fy,
+                               const std::vector<int>& minSizes,
+                               const int step) {
+    for (auto s : minSizes) {
+        std::vector<float> vx, vy;
+        if (s == 32) {
+            vx.push_back(static_cast<float>(fx));
+            vx.push_back(fx + 0.25f);
+            vx.push_back(fx + 0.5f);
+            vx.push_back(fx + 0.75f);
+
+            vy.push_back(static_cast<float>(fy));
+            vy.push_back(fy + 0.25f);
+            vy.push_back(fy + 0.5f);
+            vy.push_back(fy + 0.75f);
+        } else if (s == 64) {
+            vx.push_back(static_cast<float>(fx));
+            vx.push_back(fx + 0.5f);
+
+            vy.push_back(static_cast<float>(fy));
+            vy.push_back(fy + 0.5f);
+        } else {
+            vx.push_back(fx + 0.5f);
+            vy.push_back(fy + 0.5f);
+        }
+        calculateAnchors(anchors, vx, vy, s, step);
+    }
+}
+
+void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) {
+    anchors.reserve(maxProposalsCount);
+
+    for (size_t k = 0; k < featureMaps.size(); ++k) {
+        std::vector<float> a;
+        for (size_t i = 0; i < featureMaps[k].first; ++i) {
+            for (size_t j = 0; j < featureMaps[k].second; ++j) {
+                if (k == 0) {
+                    calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]);
+                } else {
+                    calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]);
+                }
+            }
+        }
+    }
+}
+
+std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor,
+                                                                const float confidenceThreshold) {
+    auto shape = scoresTensor.get_shape();
+    const float* scoresPtr = ov::Tensor{scoresTensor}.data<const float>();
+
+    std::vector<size_t> indices;
+    std::vector<float> scores;
+    scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) {
+        if (scoresPtr[i] > confidenceThreshold) {
+            indices.push_back(i / 2);
+            scores.push_back(scoresPtr[i]);
+        }
+    }
+
+    return {indices, scores};
+}
+
+std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
+                                const std::vector<Anchor>& anchors,
+                                const std::vector<size_t>& validIndices,
+                                const std::vector<float>& variance) {
+    auto shape = boxesTensor.get_shape();
+    const float* boxesPtr = ov::Tensor{boxesTensor}.data<const float>();
+
+    std::vector<Anchor> boxes;
+    boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    for (auto i : validIndices) {
+        auto objStart = shape[2] * i;
+
+        auto dx = boxesPtr[objStart];
+        auto dy = boxesPtr[objStart + 1];
+        auto dw = boxesPtr[objStart + 2];
+        auto dh = boxesPtr[objStart + 3];
+
+        auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter();
+        auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter();
+        auto predW = exp(dw * variance[1]) * anchors[i].getWidth();
+        auto predH = exp(dh * variance[1]) * anchors[i].getHeight();
+
+        boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW),
+                         static_cast<float>(predCtrY - 0.5f * predH),
+                         static_cast<float>(predCtrX + 0.5f * predW),
+                         static_cast<float>(predCtrY + 0.5f * predH)});
+    }
+
+    return boxes;
+}
+
+std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) {
+    // Filter scores and get valid indices for bounding boxes
+    const auto scoresTensor = infResult.outputsData[outputsNames[1]];
+    const auto scores = filterScores(scoresTensor, confidenceThreshold);
+
+    // Filter bounding boxes on indices
+    auto boxesTensor = infResult.outputsData[outputsNames[0]];
+    std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);
+
+    // Apply Non-maximum Suppression
+    const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold);
+
+    // Create detection result objects
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const float scaleX = static_cast<float>(netInputWidth) / imgWidth;
+    const float scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+    result->objects.reserve(keep.size());
+    for (auto i : keep) {
+        DetectedObject desc;
+        desc.confidence = scores.second[i];
+        desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight));
+        desc.labelID = 0;
+        desc.label = labels[0];
+
+        result->objects.push_back(desc);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
@@ -0,0 +1,394 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 bool useAutoResize,
+                                 float boxIOUThreshold,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),  // Default label is "Face"
+      shouldDetectMasks(false),
+      shouldDetectLandmarks(false),
+      boxIOUThreshold(boxIOUThreshold),
+      maskThreshold(0.8f),
+      landmarkStd(1.0f),
+      anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {
+    generateAnchorsFpn();
+}
+
+void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");
+    }
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");
+    }
+
+    const ov::Layout outputLayout{"NCHW"};
+    std::vector<size_t> outputsSizes[OUT_MAX];
+    for (const auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+
+        OutputType type = OUT_MAX;
+        if (outTensorName.find("box") != std::string::npos) {
+            type = OUT_BOXES;
+        } else if (outTensorName.find("cls") != std::string::npos) {
+            type = OUT_SCORES;
+        } else if (outTensorName.find("landmark") != std::string::npos) {
+            type = OUT_LANDMARKS;
+            shouldDetectLandmarks = true;
+        } else if (outTensorName.find("type") != std::string::npos) {
+            type = OUT_MASKSCORES;
+            labels.clear();
+            labels.push_back("No Mask");
+            labels.push_back("Mask");
+            shouldDetectMasks = true;
+            landmarkStd = 0.2f;
+        } else {
+            continue;
+        }
+
+        size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];
+        size_t i = 0;
+        for (; i < outputsSizes[type].size(); ++i) {
+            if (num < outputsSizes[type][i]) {
+                break;
+            }
+        }
+        separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);
+        outputsSizes[type].insert(outputsSizes[type].begin() + i, num);
+    }
+    model = ppp.build();
+
+    for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {
+        size_t width = outputsSizes[OUT_BOXES][idx];
+        size_t height = outputsSizes[OUT_BOXES][idx];
+        auto s = anchorCfg[idx].stride;
+        auto anchorNum = anchorsFpn[s].size();
+
+        anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
+        for (size_t iw = 0; iw < width; ++iw) {
+            size_t sw = iw * s;
+            for (size_t ih = 0; ih < height; ++ih) {
+                size_t sh = ih * s;
+                for (size_t k = 0; k < anchorNum; ++k) {
+                    Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];
+                    anc.left = anchorsFpn[s][k].left + sw;
+                    anc.top = anchorsFpn[s][k].top + sh;
+                    anc.right = anchorsFpn[s][k].right + sw;
+                    anc.bottom = anchorsFpn[s][k].bottom + sh;
+                }
+            }
+        }
+    }
+}
+
+std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
+    std::vector<Anchor> retVal;
+    const auto w = anchor.getWidth();
+    const auto h = anchor.getHeight();
+    const auto xCtr = anchor.getXCenter();
+    const auto yCtr = anchor.getYCenter();
+
+    for (const auto ratio : ratios) {
+        const auto size = w * h;
+        const auto sizeRatio = static_cast<float>(size) / ratio;
+        const auto ws = sqrt(sizeRatio);
+        const auto hs = ws * ratio;
+        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+    }
+    return retVal;
+}
+
+std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
+    std::vector<Anchor> retVal;
+    const auto w = anchor.getWidth();
+    const auto h = anchor.getHeight();
+    const auto xCtr = anchor.getXCenter();
+    const auto yCtr = anchor.getYCenter();
+
+    for (auto scale : scales) {
+        const auto ws = w * scale;
+        const auto hs = h * scale;
+        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+    }
+    return retVal;
+}
+
+std::vector<Anchor> generateAnchors(const int baseSize,
+                                                     const std::vector<int>& ratios,
+                                                     const std::vector<int>& scales) {
+    Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
+    auto ratioAnchors = ratioEnum(baseAnchor, ratios);
+    std::vector<Anchor> retVal;
+
+    for (const auto& ra : ratioAnchors) {
+        auto addon = scaleEnum(ra, scales);
+        retVal.insert(retVal.end(), addon.begin(), addon.end());
+    }
+    return retVal;
+}
+
+void ModelRetinaFace::generateAnchorsFpn() {
+    auto cfg = anchorCfg;
+    std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {
+        return x.stride > y.stride;
+    });
+
+    for (const auto& cfgLine : cfg) {
+        anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));
+    }
+}
+
+std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {
+    std::vector<size_t> indices;
+    indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);
+    auto shape = scoresTensor.get_shape();
+    size_t restAnchors = shape[1] - anchorNum;
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    for (size_t x = anchorNum; x < shape[1]; ++x) {
+        for (size_t y = 0; y < shape[2]; ++y) {
+            for (size_t z = 0; z < shape[3]; ++z) {
+                auto idx = (x * shape[2] + y) * shape[3] + z;
+                auto score = scoresPtr[idx];
+                if (score >= confidenceThreshold) {
+                    indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));
+                }
+            }
+        }
+    }
+
+    return indices;
+}
+
+void filterScores(std::vector<float>& scores,
+                  const std::vector<size_t>& indices,
+                  const ov::Tensor& scoresTensor,
+                  const int anchorNum) {
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+    const auto start = shape[2] * shape[3] * anchorNum;
+
+    for (auto i : indices) {
+        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+        scores.push_back(scoresPtr[start + offset]);
+    }
+}
+
+void filterBoxes(std::vector<Anchor>& boxes,
+                 const std::vector<size_t>& indices,
+                 const ov::Tensor& boxesTensor,
+                 int anchorNum,
+                 const std::vector<Anchor>& anchors) {
+    const auto& shape = boxesTensor.get_shape();
+    const float* boxesPtr = boxesTensor.data<float>();
+    const auto boxPredLen = shape[1] / anchorNum;
+    const auto blockWidth = shape[2] * shape[3];
+
+    for (auto i : indices) {
+        auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);
+
+        const auto dx = boxesPtr[offset];
+        const auto dy = boxesPtr[offset + blockWidth];
+        const auto dw = boxesPtr[offset + blockWidth * 2];
+        const auto dh = boxesPtr[offset + blockWidth * 3];
+
+        const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();
+        const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();
+        const auto predW = exp(dw) * anchors[i].getWidth();
+        const auto predH = exp(dh) * anchors[i].getHeight();
+
+        boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),
+                         static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),
+                         static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),
+                         static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});
+    }
+}
+
+void filterLandmarks(std::vector<cv::Point2f>& landmarks,
+                     const std::vector<size_t>& indices,
+                     const ov::Tensor& landmarksTensor,
+                     int anchorNum,
+                     const std::vector<Anchor>& anchors,
+                     const float landmarkStd) {
+    const auto& shape = landmarksTensor.get_shape();
+    const float* landmarksPtr = landmarksTensor.data<float>();
+    const auto landmarkPredLen = shape[1] / anchorNum;
+    const auto blockWidth = shape[2] * shape[3];
+
+    for (auto i : indices) {
+        for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {
+            auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;
+            auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;
+            auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;
+            landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),
+                                 deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});
+        }
+    }
+}
+
+void filterMasksScores(std::vector<float>& masks,
+                       const std::vector<size_t>& indices,
+                       const ov::Tensor& maskScoresTensor,
+                       const int anchorNum) {
+    auto shape = maskScoresTensor.get_shape();
+    const float* maskScoresPtr = maskScoresTensor.data<float>();
+    auto start = shape[2] * shape[3] * anchorNum * 2;
+
+    for (auto i : indices) {
+        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+        masks.push_back(maskScoresPtr[start + offset]);
+    }
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {
+    std::vector<float> scores;
+    scores.reserve(INIT_VECTOR_SIZE);
+    std::vector<Anchor> boxes;
+    boxes.reserve(INIT_VECTOR_SIZE);
+    std::vector<cv::Point2f> landmarks;
+    std::vector<float> masks;
+
+    if (shouldDetectLandmarks) {
+        landmarks.reserve(INIT_VECTOR_SIZE);
+    }
+    if (shouldDetectMasks) {
+        masks.reserve(INIT_VECTOR_SIZE);
+    }
+
+    // --------------------------- Gather & Filter output from all levels
+    // ----------------------------------------------------------
+    for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {
+        const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];
+        const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];
+        auto s = anchorCfg[idx].stride;
+        auto anchorNum = anchorsFpn[s].size();
+
+        auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);
+        filterScores(scores, validIndices, scoresRaw, anchorNum);
+        filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);
+        if (shouldDetectLandmarks) {
+            const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];
+            filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);
+        }
+        if (shouldDetectMasks) {
+            const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];
+            filterMasksScores(masks, validIndices, masksRaw, anchorNum);
+        }
+    }
+    // --------------------------- Apply Non-maximum Suppression
+    // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if
+    // true - boundaries are included in areas calculation
+    const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);
+
+    // --------------------------- Create detection result objects
+    // --------------------------------------------------------
+    RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;
+    const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+    result->objects.reserve(keep.size());
+    result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);
+    for (auto i : keep) {
+        DetectedObject desc;
+        desc.confidence = scores[i];
+        //--- Scaling coordinates
+        boxes[i].left /= scaleX;
+        boxes[i].top /= scaleY;
+        boxes[i].right /= scaleX;
+        boxes[i].bottom /= scaleY;
+
+        desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+        //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask
+        desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;
+        desc.label = labels[desc.labelID];
+        result->objects.push_back(desc);
+
+        //--- Scaling landmarks coordinates
+        for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {
+            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =
+                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));
+            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =
+                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));
+            result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_retinaface_pt.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_retinaface_pt.cpp
@@ -0,0 +1,277 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface_pt.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName,
+                                     float confidenceThreshold,
+                                     bool useAutoResize,
+                                     float boxIOUThreshold,
+                                     const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),  // Default label is "Face"
+      landmarksNum(0),
+      boxIOUThreshold(boxIOUThreshold) {}
+
+void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("RetinaFacePT model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_partial_shape().get_max_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 3) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have 3 outputs");
+    }
+
+    landmarksNum = 0;
+
+    outputsNames.resize(2);
+    std::vector<uint32_t> outputsSizes[OUT_MAX];
+    const ov::Layout chw("CHW");
+    const ov::Layout nchw("NCHW");
+    for (auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName)
+            .tensor()
+            .set_element_type(ov::element::f32)
+            .set_layout(output.get_partial_shape().size() == 4 ? nchw : chw);
+
+        if (outTensorName.find("bbox") != std::string::npos) {
+            outputsNames[OUT_BOXES] = outTensorName;
+        } else if (outTensorName.find("cls") != std::string::npos) {
+            outputsNames[OUT_SCORES] = outTensorName;
+        } else if (outTensorName.find("landmark") != std::string::npos) {
+            // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the
+            // last item of array Considering that other outputs names are already filled in or will be filled later
+            outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1));
+            outputsNames[OUT_LANDMARKS] = outTensorName;
+            landmarksNum =
+                output.get_partial_shape()[ov::layout::width_idx(chw)].get_length() / 2;  // Each landmark consist of 2 variables (x and y)
+        } else {
+            continue;
+        }
+    }
+
+    if (outputsNames[OUT_BOXES] == "" || outputsNames[OUT_SCORES] == "") {
+        throw std::logic_error("Bbox or cls layers are not found");
+    }
+
+    model = ppp.build();
+    priors = generatePriorData();
+}
+
+std::vector<size_t> ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) {
+    std::vector<size_t> indicies;
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    for (size_t x = 0; x < shape[1]; ++x) {
+        const auto idx = (x * shape[2] + 1);
+        const auto score = scoresPtr[idx];
+        if (score >= confidenceThreshold) {
+            indicies.push_back(x);
+        }
+    }
+
+    return indicies;
+}
+
+std::vector<float> ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor,
+                                                        const std::vector<size_t>& indicies) {
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    std::vector<float> scores;
+    scores.reserve(indicies.size());
+
+    for (auto i : indicies) {
+        scores.push_back(scoresPtr[i * shape[2] + 1]);
+    }
+    return scores;
+}
+
+std::vector<cv::Point2f> ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor,
+                                                                 const std::vector<size_t>& indicies,
+                                                                 int imgWidth,
+                                                                 int imgHeight) {
+    const auto& shape = landmarksTensor.get_shape();
+    const float* landmarksPtr = landmarksTensor.data<float>();
+
+    std::vector<cv::Point2f> landmarks(landmarksNum * indicies.size());
+
+    for (size_t i = 0; i < indicies.size(); i++) {
+        const size_t idx = indicies[i];
+        const auto& prior = priors[idx];
+        for (size_t j = 0; j < landmarksNum; j++) {
+            landmarks[i * landmarksNum + j].x =
+                clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth;
+            landmarks[i * landmarksNum + j].y =
+                clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) *
+                imgHeight;
+        }
+    }
+    return landmarks;
+}
+
+std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() {
+    const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}};
+    const float steps[] = {8., 16., 32.};
+    std::vector<ModelRetinaFacePT::Box> anchors;
+    for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) {
+        const int featureW = static_cast<int>(std::round(netInputWidth / steps[stepNum]));
+        const int featureH = static_cast<int>(std::round(netInputHeight / steps[stepNum]));
+
+        const auto& minSizes = globalMinSizes[stepNum];
+        for (int i = 0; i < featureH; i++) {
+            for (int j = 0; j < featureW; j++) {
+                for (auto minSize : minSizes) {
+                    const float sKX = minSize / netInputWidth;
+                    const float sKY = minSize / netInputHeight;
+                    const float denseCY = (i + 0.5f) * steps[stepNum] / netInputHeight;
+                    const float denseCX = (j + 0.5f) * steps[stepNum] / netInputWidth;
+                    anchors.push_back(ModelRetinaFacePT::Box{denseCX, denseCY, sKX, sKY});
+                }
+            }
+        }
+    }
+    return anchors;
+}
+
+std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
+                                                                             const std::vector<size_t>& indicies,
+                                                                             int imgWidth,
+                                                                             int imgHeight) {
+    std::vector<Anchor> rects;
+    rects.reserve(indicies.size());
+
+    const auto& shape = boxesTensor.get_shape();
+    const float* boxesPtr = boxesTensor.data<float>();
+
+    if (shape[1] != priors.size()) {
+        throw std::logic_error("rawBoxes size is not equal to priors size");
+    }
+
+    for (auto i : indicies) {
+        const auto pRawBox = reinterpret_cast<const Box*>(boxesPtr + i * shape[2]);
+        const auto& prior = priors[i];
+        const float cX = priors[i].cX + pRawBox->cX * variance[0] * prior.width;
+        const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height;
+        const float width = prior.width * exp(pRawBox->width * variance[1]);
+        const float height = prior.height * exp(pRawBox->height * variance[1]);
+        rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
+                               clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
+    }
+
+    return rects;
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFacePT::postprocess(InferenceResult& infResult) {
+    // (raw_output, scale_x, scale_y, face_prob_threshold, image_size):
+    const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]];
+    const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]];
+
+    const auto& validIndicies = filterByScore(scoresTensor, confidenceThreshold);
+    const auto& scores = getFilteredScores(scoresTensor, validIndicies);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]],
+                                                                validIndicies,
+                                                                internalData.inputImgWidth,
+                                                                internalData.inputImgHeight)
+                                         : std::vector<cv::Point2f>();
+
+    const auto& proposals =
+        getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight);
+
+    const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum);
+
+    // --------------------------- Create detection result objects
+    // --------------------------------------------------------
+    RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+    result->objects.reserve(keptIndicies.size());
+    result->landmarks.reserve(keptIndicies.size() * landmarksNum);
+    for (auto i : keptIndicies) {
+        DetectedObject desc;
+        desc.confidence = scores[i];
+
+        //--- Scaling coordinates
+        desc.x = proposals[i].left;
+        desc.y = proposals[i].top;
+        desc.width = proposals[i].getWidth();
+        desc.height = proposals[i].getHeight();
+
+        desc.labelID = 0;
+        desc.label = labels[desc.labelID];
+        result->objects.push_back(desc);
+
+        //--- Filtering landmarks coordinates
+        for (uint32_t l = 0; l < landmarksNum; ++l) {
+            result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
@@ -0,0 +1,286 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_ssd.h"
+
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+struct InputData;
+
+ModelSSD::ModelSSD() 
+{
+	// Constructor
+
+}
+ModelSSD::ModelSSD(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   bool useAutoResize,
+                   const std::vector<std::string>& labels,
+                   const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
+
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    if (inputsNames.size() > 1) {
+        auto imageInfoTensor = request.get_tensor(inputsNames[1]);
+        auto info = imageInfoTensor.data<float>();
+        info[0] = static_cast<float>(netInputHeight);
+        info[1] = static_cast<float>(netInputWidth);
+        info[2] = 1;
+        request.set_tensor(inputsNames[1], imageInfoTensor);
+    }
+
+    return DetectionModel::preprocess(inputData, request);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
+    return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
+    const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
+    size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
+    const float* detections = ov::Tensor{detectionsTensor}.data<const float>();
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float image_id = detections[i * objectSize + 0];
+        if (image_id < 0) {
+            break;
+        }
+
+        float confidence = detections[i * objectSize + 2];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
+                           0.f,
+                           static_cast<float>(internalData.inputImgWidth));
+            desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
+                           0.f,
+                           static_cast<float>(internalData.inputImgHeight));
+            desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
+                               0.f,
+                               static_cast<float>(internalData.inputImgWidth)) -
+                         desc.x;
+            desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
+                                0.f,
+                                static_cast<float>(internalData.inputImgHeight)) -
+                          desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
+    const float* boxes = infResult.outputsData[outputsNames[0]].data<const float>();
+    size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
+    const float* labels = infResult.outputsData[outputsNames[1]].data<const float>();
+    const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<const float>() : nullptr;
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    // In models with scores are stored in separate output, coordinates are normalized to [0,1]
+    // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
+    float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
+    float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(labels[i]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
+            desc.y =
+                clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
+            desc.width =
+                clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
+                desc.x;
+            desc.height =
+                clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
+                desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input ------------------------------------------------------
+    ov::preprocess::PrePostProcessor ppp(model);
+    for (const auto& input : model->inputs()) {
+        auto inputTensorName = input.get_any_name();
+        const ov::Shape& shape = input.get_shape();
+        ov::Layout inputLayout = getInputLayout(input);
+
+        if (shape.size() == 4) {  // 1st input contains images
+            if (inputsNames.empty()) {
+                inputsNames.push_back(inputTensorName);
+            } else {
+                inputsNames[0] = inputTensorName;
+            }
+
+            inputTransform.setPrecision(ppp, inputTensorName);
+            ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
+
+            if (useAutoResize) {
+                ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
+
+                ppp.input(inputTensorName)
+                    .preprocess()
+                    .convert_element_type(ov::element::f32)
+                    .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+            }
+
+            ppp.input(inputTensorName).model().set_layout(inputLayout);
+
+            netInputWidth = shape[ov::layout::width_idx(inputLayout)];
+            netInputHeight = shape[ov::layout::height_idx(inputLayout)];
+        } else if (shape.size() == 2) {  // 2nd input contains image info
+            inputsNames.resize(2);
+            inputsNames[1] = inputTensorName;
+            ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
+        } else {
+            throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
+                                   "D "
+                                   "input layer '" +
+                                   input.get_any_name() +
+                                   "'. "
+                                   "Only 2D and 4D input layers are supported");
+        }
+    }
+    model = ppp.build();
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() == 1) {
+        prepareSingleOutput(model);
+    } else {
+        prepareMultipleOutputs(model);
+    }
+}
+
+void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+
+    const ov::Shape& shape = output.get_shape();
+    const ov::Layout& layout("NCHW");
+    if (shape.size() != 4) {
+        throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
+    }
+    detectionsNumId = ov::layout::height_idx(layout);
+    objectSize = shape[ov::layout::width_idx(layout)];
+    if (objectSize != 7) {
+        throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
+                               std::to_string(objectSize));
+    }
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
+    model = ppp.build();
+}
+
+void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
+    const ov::OutputVector& outputs = model->outputs();
+    for (auto& output : outputs) {
+        const auto& tensorNames = output.get_names();
+        for (const auto& name : tensorNames) {
+            if (name.find("boxes") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("labels") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("scores") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            }
+        }
+    }
+    if (outputsNames.size() != 2 && outputsNames.size() != 3) {
+        throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
+                               std::to_string(outputsNames.size()));
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
+
+    ov::Layout boxesLayout;
+    if (boxesShape.size() == 2) {
+        boxesLayout = "NC";
+        detectionsNumId = ov::layout::batch_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
+
+        if (objectSize != 5) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
+        }
+    } else if (boxesShape.size() == 3) {
+        boxesLayout = "CHW";
+        detectionsNumId = ov::layout::height_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
+
+        if (objectSize != 4) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
+        }
+    } else {
+        throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
+                               std::to_string(boxesShape.size()));
+    }
+
+    ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
+
+    for (const auto& outName : outputsNames) {
+        ppp.output(outName).tensor().set_element_type(ov::element::f32);
+    }
+    model = ppp.build();
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
@@ -0,0 +1,482 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolo.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+std::vector<float> defaultAnchors[] = {
+    // YOLOv1v2
+    {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f},
+    // YOLOv3
+    {10.0f,
+     13.0f,
+     16.0f,
+     30.0f,
+     33.0f,
+     23.0f,
+     30.0f,
+     61.0f,
+     62.0f,
+     45.0f,
+     59.0f,
+     119.0f,
+     116.0f,
+     90.0f,
+     156.0f,
+     198.0f,
+     373.0f,
+     326.0f},
+    // YOLOv4
+    {12.0f,
+     16.0f,
+     19.0f,
+     36.0f,
+     40.0f,
+     28.0f,
+     36.0f,
+     75.0f,
+     76.0f,
+     55.0f,
+     72.0f,
+     146.0f,
+     142.0f,
+     110.0f,
+     192.0f,
+     243.0f,
+     459.0f,
+     401.0f},
+    // YOLOv4_Tiny
+    {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f},
+    // YOLOF
+    {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}};
+
+const std::vector<int64_t> defaultMasks[] = {
+    // YOLOv1v2
+    {},
+    // YOLOv3
+    {},
+    // YOLOv4
+    {0, 1, 2, 3, 4, 5, 6, 7, 8},
+    // YOLOv4_Tiny
+    {1, 2, 3, 3, 4, 5},
+    // YOLOF
+    {0, 1, 2, 3, 4, 5}};
+
+static inline float sigmoid(float x) {
+    return 1.f / (1.f + exp(-x));
+}
+
+static inline float linear(float x) {
+    return x;
+}
+
+ModelYolo::ModelYolo(const std::string& modelFileName,
+                     float confidenceThreshold,
+                     bool useAutoResize,
+                     bool useAdvancedPostprocessing,
+                     float boxIOUThreshold,
+                     const std::vector<std::string>& labels,
+                     const std::vector<float>& anchors,
+                     const std::vector<int64_t>& masks,
+                     const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout),
+      boxIOUThreshold(boxIOUThreshold),
+      useAdvancedPostprocessing(useAdvancedPostprocessing),
+      yoloVersion(YOLO_V3),
+      presetAnchors(anchors),
+      presetMasks(masks) {}
+
+void ModelYolo::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("YOLO model wrapper accepts models that have only 1 input");
+    }
+
+    const auto& input = model->input();
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(input);
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, input.get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    //--- Reading image input parameters
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    std::map<std::string, ov::Shape> outShapes;
+    for (auto& out : outputs) {
+        ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32);
+        if (out.get_shape().size() == 4) {
+            if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] &&
+                out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) {
+                ppp.output(out.get_any_name()).model().set_layout("NHWC");
+                // outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build()
+                yoloRegionLayout = "NHWC";
+            }
+            // yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors
+            ppp.output(out.get_any_name()).tensor().set_layout("NCHW");
+        }
+        outputsNames.push_back(out.get_any_name());
+        outShapes[out.get_any_name()] = out.get_shape();
+    }
+    model = ppp.build();
+
+    yoloVersion = YOLO_V3;
+    bool isRegionFound = false;
+    for (const auto& op : model->get_ordered_ops()) {
+        if (std::string("RegionYolo") == op->get_type_name()) {
+            auto regionYolo = std::dynamic_pointer_cast<ov::op::v0::RegionYolo>(op);
+
+            if (regionYolo) {
+                if (!regionYolo->get_mask().size()) {
+                    yoloVersion = YOLO_V1V2;
+                }
+
+                const auto& opName = op->get_friendly_name();
+                for (const auto& out : outputs) {
+                    if (out.get_node()->get_friendly_name() == opName ||
+                        out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) {
+                        isRegionFound = true;
+                        regions.emplace(out.get_any_name(), Region(regionYolo));
+                    }
+                }
+            }
+        }
+    }
+
+    if (!isRegionFound) {
+        switch (outputsNames.size()) {
+            case 1:
+                yoloVersion = YOLOF;
+                break;
+            case 2:
+                yoloVersion = YOLO_V4_TINY;
+                break;
+            case 3:
+                yoloVersion = YOLO_V4;
+                break;
+        }
+
+        int num = yoloVersion == YOLOF ? 6 : 3;
+        isObjConf = yoloVersion == YOLOF ? 0 : 1;
+        int i = 0;
+
+        auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion];
+        if (chosenMasks.size() != num * outputs.size()) {
+            throw std::runtime_error(std::string("Invalid size of masks array, got ") +
+                                     std::to_string(presetMasks.size()) + ", should be " +
+                                     std::to_string(num * outputs.size()));
+        }
+
+        std::sort(outputsNames.begin(),
+                  outputsNames.end(),
+                  [&outShapes, this](const std::string& x, const std::string& y) {
+                      return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] >
+                             outShapes[y][ov::layout::height_idx(yoloRegionLayout)];
+                  });
+
+        for (const auto& name : outputsNames) {
+            const auto& shape = outShapes[name];
+            if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) {
+                throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension");
+            }
+            regions.emplace(
+                name,
+                Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0),
+                       4,
+                       presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion],
+                       std::vector<int64_t>(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num),
+                       shape[ov::layout::width_idx(yoloRegionLayout)],
+                       shape[ov::layout::height_idx(yoloRegionLayout)]));
+            i++;
+        }
+    } else {
+        // Currently externally set anchors and masks are supported only for YoloV4
+        if (presetAnchors.size() || presetMasks.size()) {
+            slog::warn << "Preset anchors and mask can be set for YoloV4 model only. "
+                          "This model is not YoloV4, so these options will be ignored."
+                       << slog::endl;
+        }
+    }
+}
+
+std::unique_ptr<ResultBase> ModelYolo::postprocess(InferenceResult& infResult) {
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    std::vector<DetectedObject> objects;
+
+    // Parsing outputs
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    for (auto& output : infResult.outputsData) {
+        this->parseYOLOOutput(output.first,
+                              output.second,
+                              netInputHeight,
+                              netInputWidth,
+                              internalData.inputImgHeight,
+                              internalData.inputImgWidth,
+                              objects);
+    }
+
+    if (useAdvancedPostprocessing) {
+        // Advanced postprocessing
+        // Checking IOU threshold conformance
+        // For every i-th object we're finding all objects it intersects with, and comparing confidence
+        // If i-th object has greater confidence than all others, we include it into result
+        for (const auto& obj1 : objects) {
+            bool isGoodResult = true;
+            for (const auto& obj2 : objects) {
+                if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence &&
+                    intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) {  // if obj1 is the same as obj2, condition
+                                                                             // expression will evaluate to false anyway
+                    isGoodResult = false;
+                    break;
+                }
+            }
+            if (isGoodResult) {
+                result->objects.push_back(obj1);
+            }
+        }
+    } else {
+        // Classic postprocessing
+        std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) {
+            return x.confidence > y.confidence;
+        });
+        for (size_t i = 0; i < objects.size(); ++i) {
+            if (objects[i].confidence == 0)
+                continue;
+            for (size_t j = i + 1; j < objects.size(); ++j)
+                if (intersectionOverUnion(objects[i], objects[j]) >= boxIOUThreshold)
+                    objects[j].confidence = 0;
+            result->objects.push_back(objects[i]);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+void ModelYolo::parseYOLOOutput(const std::string& output_name,
+                                const ov::Tensor& tensor,
+                                const unsigned long resized_im_h,
+                                const unsigned long resized_im_w,
+                                const unsigned long original_im_h,
+                                const unsigned long original_im_w,
+                                std::vector<DetectedObject>& objects) {
+    // --------------------------- Extracting layer parameters -------------------------------------
+    auto it = regions.find(output_name);
+    if (it == regions.end()) {
+        throw std::runtime_error(std::string("Can't find output layer with name ") + output_name);
+    }
+    auto& region = it->second;
+
+    int sideW = 0;
+    int sideH = 0;
+    unsigned long scaleH;
+    unsigned long scaleW;
+    switch (yoloVersion) {
+        case YOLO_V1V2:
+            sideH = region.outputHeight;
+            sideW = region.outputWidth;
+            scaleW = region.outputWidth;
+            scaleH = region.outputHeight;
+            break;
+        case YOLO_V3:
+        case YOLO_V4:
+        case YOLO_V4_TINY:
+        case YOLOF:
+            sideH = static_cast<int>(tensor.get_shape()[ov::layout::height_idx("NCHW")]);
+            sideW = static_cast<int>(tensor.get_shape()[ov::layout::width_idx("NCHW")]);
+            scaleW = resized_im_w;
+            scaleH = resized_im_h;
+            break;
+    }
+
+    auto entriesNum = sideW * sideH;
+    const float* outData = tensor.data<float>();
+
+    auto postprocessRawData =
+        (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear;
+
+    // --------------------------- Parsing YOLO Region output -------------------------------------
+    for (int i = 0; i < entriesNum; ++i) {
+        int row = i / sideW;
+        int col = i % sideW;
+        for (int n = 0; n < region.num; ++n) {
+            //--- Getting region data
+            int obj_index = calculateEntryIndex(entriesNum,
+                                                region.coords,
+                                                region.classes + isObjConf,
+                                                n * entriesNum + i,
+                                                region.coords);
+            int box_index =
+                calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0);
+            float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1;
+
+            //--- Preliminary check for confidence threshold conformance
+            if (scale >= confidenceThreshold) {
+                //--- Calculating scaled region's coordinates
+                float x, y;
+                if (yoloVersion == YOLOF) {
+                    x = (static_cast<float>(col) / sideW +
+                         outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) *
+                        original_im_w;
+                    y = (static_cast<float>(row) / sideH +
+                         outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) *
+                        original_im_h;
+                } else {
+                    x = static_cast<float>((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW *
+                                           original_im_w);
+                    y = static_cast<float>((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH *
+                                           original_im_h);
+                }
+                float height = static_cast<float>(std::exp(outData[box_index + 3 * entriesNum]) *
+                                                  region.anchors[2 * n + 1] * original_im_h / scaleH);
+                float width = static_cast<float>(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] *
+                                                 original_im_w / scaleW);
+
+                DetectedObject obj;
+                obj.x = clamp(x - width / 2, 0.f, static_cast<float>(original_im_w));
+                obj.y = clamp(y - height / 2, 0.f, static_cast<float>(original_im_h));
+                obj.width = clamp(width, 0.f, static_cast<float>(original_im_w - obj.x));
+                obj.height = clamp(height, 0.f, static_cast<float>(original_im_h - obj.y));
+
+                for (size_t j = 0; j < region.classes; ++j) {
+                    int class_index = calculateEntryIndex(entriesNum,
+                                                          region.coords,
+                                                          region.classes + isObjConf,
+                                                          n * entriesNum + i,
+                                                          region.coords + isObjConf + j);
+                    float prob = scale * postprocessRawData(outData[class_index]);
+
+                    //--- Checking confidence threshold conformance and adding region to the list
+                    if (prob >= confidenceThreshold) {
+                        obj.confidence = prob;
+                        obj.labelID = j;
+                        obj.label = getLabelName(obj.labelID);
+                        objects.push_back(obj);
+                    }
+                }
+            }
+        }
+    }
+}
+
+int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) {
+    int n = location / totalCells;
+    int loc = location % totalCells;
+    return (n * (lcoords + lclasses) + entry) * totalCells + loc;
+}
+
+double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) {
+    double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x);
+    double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y);
+    double intersectionArea =
+        (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth;
+    double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea;
+    return intersectionArea / unionArea;
+}
+
+ModelYolo::Region::Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo) {
+    coords = regionYolo->get_num_coords();
+    classes = regionYolo->get_num_classes();
+    auto mask = regionYolo->get_mask();
+    num = mask.size();
+
+    auto shape = regionYolo->get_input_shape(0);
+    outputWidth = shape[3];
+    outputHeight = shape[2];
+
+    if (num) {
+        // Parsing YoloV3 parameters
+        anchors.resize(num * 2);
+
+        for (int i = 0; i < num; ++i) {
+            anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2];
+            anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1];
+        }
+    } else {
+        // Parsing YoloV2 parameters
+        num = regionYolo->get_num_regions();
+        anchors = regionYolo->get_anchors();
+        if (anchors.empty()) {
+            anchors = defaultAnchors[YOLO_V1V2];
+            num = 5;
+        }
+    }
+}
+
+ModelYolo::Region::Region(size_t classes,
+                          int coords,
+                          const std::vector<float>& anchors,
+                          const std::vector<int64_t>& masks,
+                          size_t outputWidth,
+                          size_t outputHeight)
+    : classes(classes),
+      coords(coords),
+      outputWidth(outputWidth),
+      outputHeight(outputHeight) {
+    num = masks.size();
+
+    if (anchors.size() == 0 || anchors.size() % 2 != 0) {
+        throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector");
+    }
+
+    if (num) {
+        this->anchors.resize(num * 2);
+
+        for (int i = 0; i < num; ++i) {
+            this->anchors[i * 2] = anchors[masks[i] * 2];
+            this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1];
+        }
+    } else {
+        this->anchors = anchors;
+        num = anchors.size() / 2;
+    }
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolov3_onnx.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolov3_onnx.cpp
@@ -0,0 +1,180 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolov3_onnx.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+
+ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 const std::vector<std::string>& labels,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
+        interpolationMode = cv::INTER_CUBIC;
+        resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
+    }
+
+
+void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare inputs ------------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 2) {
+        throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputsNames.reserve(inputs.size());
+    for (auto& input : inputs) {
+        const ov::Shape& currentShape = input.get_shape();
+        std::string currentName = input.get_any_name();
+        const ov::Layout& currentLayout = getInputLayout(input);
+
+        if (currentShape.size() == 4) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
+                throw std::logic_error("Expected 4D image input with 3 channels");
+            }
+            inputsNames[0] = currentName;
+            netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
+            netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
+            ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+        } else if (currentShape.size() == 2) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
+                throw std::logic_error("Expected 2D image info input with 2 channels");
+            }
+            inputsNames[1] = currentName;
+            ppp.input(currentName).tensor().set_element_type(ov::element::i32);
+        }
+        ppp.input(currentName).model().set_layout(currentLayout);
+    }
+
+    // --------------------------- Prepare outputs -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 3) {
+        throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
+    }
+
+    for (auto& output : outputs) {
+        const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
+        std::string currentName = output.get_any_name();
+        if (currentShape.back() == 3) {
+            indicesOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::i32);
+        } else if (currentShape[2] == 4) {
+            boxesOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+        } else if (currentShape[1] == numberOfClasses) {
+            scoresOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+        } else {
+            throw std::logic_error("Expected shapes [:,:,4], [:,"
+                + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs");
+        }
+        outputsNames.push_back(currentName);
+    }
+    model = ppp.build();
+}
+
+std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
+                                                               ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    ov::Tensor info{ov::element::i32, ov::Shape({1, 2})};
+    int32_t* data = info.data<int32_t>();
+    data[0] = origImg.rows;
+    data[1] = origImg.cols;
+    request.set_tensor(inputsNames[1], info);
+    return ImageModel::preprocess(inputData, request);
+}
+
+namespace {
+float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
+    return scoresTensor.data<float>()[classInd * scoresTensor.get_shape()[2] + boxInd];
+}
+}
+
+std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
+    // Get info about input image
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+
+    // Get outputs tensors
+    const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
+    const float* boxesPtr = boxes.data<float>();
+
+    const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
+    const ov::Tensor& indices = infResult.outputsData[indicesOutputName];
+
+    const int* indicesData = indices.data<int>();
+    const auto indicesShape = indices.get_shape();
+    const auto boxShape = boxes.get_shape();
+
+    // Generate detection results
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
+    size_t indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
+
+    for (size_t i = 0; i < numberOfBoxes; ++i) {
+        int batchInd = indicesData[i * indicesStride];
+        int classInd = indicesData[i * indicesStride + 1];
+        int boxInd = indicesData[i * indicesStride + 2];
+
+        if (batchInd == -1) {
+            break;
+        }
+
+        float score = getScore(scores, classInd, boxInd);
+
+        if (score > confidenceThreshold) {
+            DetectedObject obj;
+            size_t startPos = boxShape[2] * boxInd;
+
+            auto x = boxesPtr[startPos + 1];
+            auto y = boxesPtr[startPos];
+            auto width = boxesPtr[startPos + 3] - x;
+            auto height = boxesPtr[startPos + 2] - y;
+
+            // Create new detected box
+            obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
+            obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
+            obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
+            obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
+            obj.confidence = score;
+            obj.labelID = classInd;
+            obj.label = getLabelName(classInd);
+
+            result->objects.push_back(obj);
+
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
@@ -0,0 +1,194 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolox.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+#include "utils/nms.hpp"
+
+ModelYoloX::ModelYoloX(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 float boxIOUThreshold,
+                                 const std::vector<std::string>& labels,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout),
+      boxIOUThreshold(boxIOUThreshold) {
+        resizeMode = RESIZE_KEEP_ASPECT;
+}
+
+void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1) {
+        throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input");
+    }
+
+    //--- Check image input
+    const auto& input = model->input();
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(input);
+
+    if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 4D image input with 3 channels");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    //--- Reading image input parameters
+    inputsNames.push_back(input.get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+    setStridesGrids();
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("YoloX model wrapper expects models that have only 1 output");
+    }
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+    const ov::Shape& shape = output.get_shape();
+
+    if (shape.size() != 3) {
+        throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size()));
+    }
+    ppp.output().tensor().set_element_type(ov::element::f32);
+
+    model = ppp.build();
+}
+
+void ModelYoloX::setStridesGrids() {
+    std::vector<size_t> strides = {8, 16, 32};
+    std::vector<size_t> hsizes(3);
+    std::vector<size_t> wsizes(3);
+
+    for (size_t i = 0; i < strides.size(); ++i) {
+        hsizes[i] = netInputHeight / strides[i];
+        wsizes[i] = netInputWidth / strides[i];
+    }
+
+    for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) {
+        for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) {
+            for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) {
+                grids.emplace_back(w_index, h_index);
+                expandedStrides.push_back(strides[size_index]);
+            }
+        }
+    }
+}
+
+std::shared_ptr<InternalModelData> ModelYoloX::preprocess(const InputData& inputData,
+                                                          ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    float scale = std::min(static_cast<float>(netInputWidth) / origImg.cols,
+                           static_cast<float>(netInputHeight) / origImg.rows);
+
+    cv::Mat resizedImage = resizeImageExt(origImg, netInputWidth, netInputHeight, resizeMode,
+                                          interpolationMode, nullptr, cv::Scalar(114, 114, 114));
+
+    request.set_input_tensor(wrapMat2Tensor(resizedImage));
+    return std::make_shared<InternalScaleData>(origImg.cols, origImg.rows, scale, scale);
+}
+
+std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult) {
+    // Get metadata about input image shape and scale
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+
+    // Get output tensor
+    const ov::Tensor& output = infResult.outputsData[outputsNames[0]];
+    const auto& outputShape = output.get_shape();
+    float* outputPtr = output.data<float>();
+
+    // Generate detection results
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+    // Update coordinates according to strides
+    for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+        size_t startPos = outputShape[2] * box_index;
+        outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index];
+        outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index];
+        outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index];
+        outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index];
+    }
+
+    // Filter predictions
+    std::vector<Anchor> validBoxes;
+    std::vector<float> scores;
+    std::vector<size_t> classes;
+    for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+        size_t startPos = outputShape[2] * box_index;
+        float score = outputPtr[startPos + 4];
+        if (score < confidenceThreshold)
+            continue;
+        float maxClassScore = -1;
+        size_t mainClass = 0;
+        for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) {
+            if (outputPtr[startPos + 5 + class_index] > maxClassScore) {
+                maxClassScore = outputPtr[startPos + 5 + class_index];
+                mainClass = class_index;
+            }
+        }
+
+        // Filter by score
+        score *= maxClassScore;
+        if (score < confidenceThreshold)
+            continue;
+
+        // Add successful boxes
+        scores.push_back(score);
+        classes.push_back(mainClass);
+        Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2,
+                          outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2};
+        validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, trueBox.top / scale.scaleY,
+                                     trueBox.right / scale.scaleX, trueBox.bottom / scale.scaleY}));
+    }
+
+    // NMS for valid boxes
+    std::vector<int> keep = nms(validBoxes, scores, boxIOUThreshold, true);
+    for (auto& index: keep) {
+        // Create new detected box
+        DetectedObject obj;
+        obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+        obj.y = clamp(validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+        obj.height = clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+        obj.width = clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+        obj.confidence = scores[index];
+        obj.labelID = classes[index];
+        obj.label = getLabelName(classes[index]);
+        result->objects.push_back(obj);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/hpe_model_associative_embedding.cpp
+++ b/engines/OpenVINOEngine/src/models/hpe_model_associative_embedding.cpp
@@ -0,0 +1,264 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_associative_embedding.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/associative_embedding_decoder.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128);
+const float HpeAssociativeEmbedding::detectionThreshold = 0.1f;
+const float HpeAssociativeEmbedding::tagThreshold = 1.0f;
+
+HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName,
+                                                 double aspectRatio,
+                                                 int targetSize,
+                                                 float confidenceThreshold,
+                                                 const std::string& layout,
+                                                 float delta,
+                                                 RESIZE_MODE resizeMode)
+    : ImageModel(modelFileName, false, layout),
+      aspectRatio(aspectRatio),
+      targetSize(targetSize),
+      confidenceThreshold(confidenceThreshold),
+      delta(delta) {
+        this->resizeMode = resizeMode;
+        interpolationMode = cv::INTER_CUBIC;
+      }
+
+void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input Tensors ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("HPE AE model wrapper supports topologies with only 1 input.");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output Tensors -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 2 && outputs.size() != 3) {
+        throw std::logic_error("HPE AE model model wrapper supports topologies only with 2 or 3 outputs");
+    }
+
+    for (const auto& output : model->outputs()) {
+        const auto& outTensorName = output.get_any_name();
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32);
+
+        for (const auto& name : output.get_names()) {
+            outputsNames.push_back(name);
+        }
+
+        const ov::Shape& outputShape = output.get_shape();
+        if (outputShape.size() != 4 && outputShape.size() != 5) {
+            throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional");
+        }
+        if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) {
+            throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels");
+        }
+    }
+    model = ppp.build();
+
+    embeddingsTensorName = findTensorByName("embeddings", outputsNames);
+    heatmapsTensorName = findTensorByName("heatmaps", outputsNames);
+    try {
+        nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames);
+    } catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; }
+
+    changeInputSize(model);
+}
+
+void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->input());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (!targetSize) {
+        targetSize = static_cast<int>(std::min(inputShape[heightId], inputShape[widthId]));
+    }
+    int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast<int>(std::round(targetSize / aspectRatio));
+    int inputWidth = aspectRatio >= 1.0 ? static_cast<int>(std::round(targetSize * aspectRatio)) : targetSize;
+    int height = static_cast<int>((inputHeight + stride - 1) / stride) * stride;
+    int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+    inputShape[batchId] = 1;
+    inputShape[heightId] = height;
+    inputShape[widthId] = width;
+    inputLayerSize = cv::Size(width, height);
+
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HpeAssociativeEmbedding::preprocess(const InputData& inputData,
+                                                                       ov::InferRequest& request) {
+    auto& image = inputData.asRef<ImageInputData>().inputImage;
+    cv::Rect roi;
+    auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+    if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+    request.set_input_tensor(wrapMat2Tensor(paddedImage));
+
+    return std::make_shared<InternalScaleData>(paddedImage.cols,
+                                               paddedImage.rows,
+                                               image.size().width / static_cast<float>(roi.width),
+                                               image.size().height / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) {
+    HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+    const auto& aembds = infResult.outputsData[embeddingsTensorName];
+    const ov::Shape& aembdsShape = aembds.get_shape();
+    float* const aembdsMapped = aembds.data<float>();
+    std::vector<cv::Mat> aembdsMaps = split(aembdsMapped, aembdsShape);
+
+    const auto& heats = infResult.outputsData[heatmapsTensorName];
+    const ov::Shape& heatMapsShape = heats.get_shape();
+    float* const heatMapsMapped = heats.data<float>();
+    std::vector<cv::Mat> heatMaps = split(heatMapsMapped, heatMapsShape);
+
+    std::vector<cv::Mat> nmsHeatMaps = heatMaps;
+    if (nmsHeatmapsTensorName != heatmapsTensorName) {
+        const auto& nmsHeats = infResult.outputsData[nmsHeatmapsTensorName];
+        const ov::Shape& nmsHeatMapsShape = nmsHeats.get_shape();
+        float* const nmsHeatMapsMapped = nmsHeats.data<float>();
+        nmsHeatMaps = split(nmsHeatMapsMapped, nmsHeatMapsShape);
+    }
+    std::vector<HumanPose> poses = extractPoses(heatMaps, aembdsMaps, nmsHeatMaps);
+
+    // Rescale poses to the original image
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+    const float outputScale = inputLayerSize.width / static_cast<float>(heatMapsShape[3]);
+    float shiftX = 0.0, shiftY = 0.0;
+    float scaleX = 1.0, scaleY = 1.0;
+
+    if (resizeMode == RESIZE_KEEP_ASPECT_LETTERBOX) {
+        scaleX = scaleY = std::min(scale.scaleX, scale.scaleY);
+        if (aspectRatio >= 1.0)
+            shiftX = static_cast<float>((targetSize * scaleX * aspectRatio - scale.inputImgWidth * scaleX) / 2);
+        else
+            shiftY = static_cast<float>((targetSize * scaleY / aspectRatio - scale.inputImgHeight * scaleY) / 2);
+        scaleX = scaleY *= outputScale;
+    } else {
+        scaleX = scale.scaleX * outputScale;
+        scaleY = scale.scaleY * outputScale;
+    }
+
+    for (auto& pose : poses) {
+        for (auto& keypoint : pose.keypoints) {
+            if (keypoint != cv::Point2f(-1, -1)) {
+                keypoint.x = keypoint.x * scaleX + shiftX;
+                keypoint.y = keypoint.y * scaleY + shiftY;
+            }
+        }
+        result->poses.push_back(pose);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName,
+                                                      const std::vector<std::string>& outputsNames) {
+    std::vector<std::string> suitableLayers;
+    for (auto& outputName : outputsNames) {
+        if (outputName.rfind(tensorName, 0) == 0) {
+            suitableLayers.push_back(outputName);
+        }
+    }
+    if (suitableLayers.empty()) {
+        throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found");
+    } else if (suitableLayers.size() > 1) {
+        throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output");
+    }
+    return suitableLayers[0];
+}
+
+std::vector<cv::Mat> HpeAssociativeEmbedding::split(float* data, const ov::Shape& shape) {
+    std::vector<cv::Mat> flattenData(shape[1]);
+    for (size_t i = 0; i < flattenData.size(); i++) {
+        flattenData[i] = cv::Mat(shape[2], shape[3], CV_32FC1, data + i * shape[2] * shape[3]);
+    }
+    return flattenData;
+}
+
+std::vector<HumanPose> HpeAssociativeEmbedding::extractPoses(std::vector<cv::Mat>& heatMaps,
+                                                             const std::vector<cv::Mat>& aembdsMaps,
+                                                             const std::vector<cv::Mat>& nmsHeatMaps) const {
+    std::vector<std::vector<Peak>> allPeaks(numJoints);
+    for (int i = 0; i < numJoints; i++) {
+        findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold);
+    }
+    std::vector<Pose> allPoses = matchByTag(allPeaks, maxNumPeople, numJoints, tagThreshold);
+    // swap for all poses
+    for (auto& pose : allPoses) {
+        for (size_t j = 0; j < numJoints; j++) {
+            Peak& peak = pose.getPeak(j);
+            std::swap(peak.keypoint.x, peak.keypoint.y);
+        }
+    }
+    std::vector<HumanPose> poses;
+    for (size_t i = 0; i < allPoses.size(); i++) {
+        Pose& pose = allPoses[i];
+        // Filtering poses with low mean scores
+        if (pose.getMeanScore() <= confidenceThreshold) {
+            continue;
+        }
+        for (size_t j = 0; j < heatMaps.size(); j++) {
+            heatMaps[j] = cv::abs(heatMaps[j]);
+        }
+        adjustAndRefine(allPoses, heatMaps, aembdsMaps, i, delta);
+        std::vector<cv::Point2f> keypoints;
+        for (size_t j = 0; j < numJoints; j++) {
+            Peak& peak = pose.getPeak(j);
+            keypoints.push_back(peak.keypoint);
+        }
+        poses.push_back({keypoints, pose.getMeanScore()});
+    }
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
+++ b/engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
@@ -0,0 +1,256 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_openpose.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/openpose_decoder.h"
+#include "models/results.h"
+
+const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128);
+const float HPEOpenPose::minPeaksDistance = 3.0f;
+const float HPEOpenPose::midPointsScoreThreshold = 0.05f;
+const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f;
+const float HPEOpenPose::minSubsetScore = 0.2f;
+
+HPEOpenPose::HPEOpenPose(const std::string& modelFileName,
+                         double aspectRatio,
+                         int targetSize,
+                         float confidenceThreshold,
+                         const std::string& layout)
+    : ImageModel(modelFileName, false, layout),
+      aspectRatio(aspectRatio),
+      targetSize(targetSize),
+      confidenceThreshold(confidenceThreshold) {
+        resizeMode = RESIZE_KEEP_ASPECT;
+        interpolationMode = cv::INTER_CUBIC;
+      }
+
+void HPEOpenPose::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("HPE OpenPose model wrapper supports topologies with only 1 input");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3)
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 2) {
+        throw std::runtime_error("HPE OpenPose supports topologies with only 2 outputs");
+    }
+
+    const ov::Layout outputLayout("NCHW");
+    for (const auto& output : model->outputs()) {
+        const auto& outTensorName = output.get_any_name();
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+        outputsNames.push_back(outTensorName);
+    }
+    model = ppp.build();
+
+    const size_t batchId = ov::layout::batch_idx(outputLayout);
+    const size_t channelsId = ov::layout::channels_idx(outputLayout);
+    const size_t widthId = ov::layout::width_idx(outputLayout);
+    const size_t heightId = ov::layout::height_idx(outputLayout);
+
+    ov::Shape heatmapsOutputShape = model->outputs().front().get_shape();
+    ov::Shape pafsOutputShape = model->outputs().back().get_shape();
+    if (heatmapsOutputShape[channelsId] > pafsOutputShape[channelsId]) {
+        std::swap(heatmapsOutputShape, pafsOutputShape);
+        std::swap(outputsNames[0], outputsNames[1]);
+    }
+
+    if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 ||
+        heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) {
+        throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) +
+                               "xHFMxWFM dimension of model's heatmap is expected");
+    }
+    if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 ||
+        pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) {
+        throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) +
+                               "xHFMxWFM dimension of model's output is expected");
+    }
+    if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] ||
+        pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) {
+        throw std::logic_error("output and heatmap are expected to have matching last two dimensions");
+    }
+
+    changeInputSize(model);
+}
+
+void HPEOpenPose::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (!targetSize) {
+        targetSize = inputShape[heightId];
+    }
+    int height = static_cast<int>((targetSize + stride - 1) / stride) * stride;
+    int inputWidth = static_cast<int>(std::round(targetSize * aspectRatio));
+    int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+    inputShape[batchId] = 1;
+    inputShape[heightId] = height;
+    inputShape[widthId] = width;
+    inputLayerSize = cv::Size(width, height);
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    auto& image = inputData.asRef<ImageInputData>().inputImage;
+    cv::Rect roi;
+    auto paddedImage =
+        resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+    if (inputLayerSize.width < roi.width)
+        throw std::runtime_error("The image aspect ratio doesn't fit current model shape");
+
+    if (inputLayerSize.width - stride >= roi.width) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+
+    request.set_input_tensor(wrapMat2Tensor(paddedImage));
+    return std::make_shared<InternalScaleData>(paddedImage.cols,
+                                               paddedImage.rows,
+                                               image.cols / static_cast<float>(roi.width),
+                                               image.rows / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HPEOpenPose::postprocess(InferenceResult& infResult) {
+    HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+    auto& heatMapsMapped = infResult.outputsData[outputsNames[0]];
+    auto& outputMapped = infResult.outputsData[outputsNames[1]];
+
+    const ov::Shape& outputShape = outputMapped.get_shape();
+    const ov::Shape& heatMapShape = heatMapsMapped.get_shape();
+
+    float* const predictions = outputMapped.data<float>();
+    float* const heats = heatMapsMapped.data<float>();
+
+    std::vector<cv::Mat> heatMaps(keypointsNumber);
+    for (size_t i = 0; i < heatMaps.size(); i++) {
+        heatMaps[i] =
+            cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]);
+    }
+    resizeFeatureMaps(heatMaps);
+
+    std::vector<cv::Mat> pafs(outputShape[1]);
+    for (size_t i = 0; i < pafs.size(); i++) {
+        pafs[i] =
+            cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]);
+    }
+    resizeFeatureMaps(pafs);
+
+    std::vector<HumanPose> poses = extractPoses(heatMaps, pafs);
+
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+    float scaleX = stride / upsampleRatio * scale.scaleX;
+    float scaleY = stride / upsampleRatio * scale.scaleY;
+    for (auto& pose : poses) {
+        for (auto& keypoint : pose.keypoints) {
+            if (keypoint != cv::Point2f(-1, -1)) {
+                keypoint.x *= scaleX;
+                keypoint.y *= scaleY;
+            }
+        }
+    }
+    for (size_t i = 0; i < poses.size(); ++i) {
+        result->poses.push_back(poses[i]);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+void HPEOpenPose::resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const {
+    for (auto& featureMap : featureMaps) {
+        cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC);
+    }
+}
+
+class FindPeaksBody : public cv::ParallelLoopBody {
+public:
+    FindPeaksBody(const std::vector<cv::Mat>& heatMaps,
+                  float minPeaksDistance,
+                  std::vector<std::vector<Peak>>& peaksFromHeatMap,
+                  float confidenceThreshold)
+        : heatMaps(heatMaps),
+          minPeaksDistance(minPeaksDistance),
+          peaksFromHeatMap(peaksFromHeatMap),
+          confidenceThreshold(confidenceThreshold) {}
+
+    void operator()(const cv::Range& range) const override {
+        for (int i = range.start; i < range.end; i++) {
+            findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i, confidenceThreshold);
+        }
+    }
+
+private:
+    const std::vector<cv::Mat>& heatMaps;
+    float minPeaksDistance;
+    std::vector<std::vector<Peak>>& peaksFromHeatMap;
+    float confidenceThreshold;
+};
+
+std::vector<HumanPose> HPEOpenPose::extractPoses(const std::vector<cv::Mat>& heatMaps,
+                                                 const std::vector<cv::Mat>& pafs) const {
+    std::vector<std::vector<Peak>> peaksFromHeatMap(heatMaps.size());
+    FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold);
+    cv::parallel_for_(cv::Range(0, static_cast<int>(heatMaps.size())), findPeaksBody);
+    int peaksBefore = 0;
+    for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) {
+        peaksBefore += static_cast<int>(peaksFromHeatMap[heatmapId - 1].size());
+        for (auto& peak : peaksFromHeatMap[heatmapId]) {
+            peak.id += peaksBefore;
+        }
+    }
+    std::vector<HumanPose> poses = groupPeaksToPoses(peaksFromHeatMap,
+                                                     pafs,
+                                                     keypointsNumber,
+                                                     midPointsScoreThreshold,
+                                                     foundMidPointsRatioThreshold,
+                                                     minJointsNumber,
+                                                     minSubsetScore);
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/image_model.cpp
+++ b/engines/OpenVINOEngine/src/models/image_model.cpp
@@ -0,0 +1,62 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/image_model.h"
+
+#include <stdexcept>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+
+ImageModel::ImageModel() {
+
+}
+ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+    : ModelBase(modelFileName, layout),
+      useAutoResize(useAutoResize) {}
+
+std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    auto img = inputTransform(origImg);
+
+    if (!useAutoResize) {
+        // /* Resize and copy data from the image to the input tensor */
+        const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]);  // first input should be image
+        const ov::Shape& tensorShape = frameTensor.get_shape();
+        const ov::Layout layout("NHWC");
+        const size_t width = tensorShape[ov::layout::width_idx(layout)];
+        const size_t height = tensorShape[ov::layout::height_idx(layout)];
+        const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+        if (static_cast<size_t>(img.channels()) != channels) {
+            throw std::runtime_error(std::string("The number of channels for model input: ") +
+                                     std::to_string(channels) + " and image: " +
+                                     std::to_string(img.channels()) + " - must match");
+        }
+        if (channels != 1 && channels != 3) {
+            throw std::runtime_error("Unsupported number of channels");
+        }
+        img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
+    }
+    request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+    return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
+}
--- a/engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
+++ b/engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
@@ -0,0 +1,167 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/image_model.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/jpeg_restoration_model.h"
+#include "models/results.h"
+
+JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName,
+                                           const cv::Size& inputImgSize,
+                                           bool _jpegCompression,
+                                           const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {
+    netInputHeight = inputImgSize.height;
+    netInputWidth = inputImgSize.width;
+    jpegCompression = _jpegCompression;
+}
+
+void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 input");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output");
+    }
+    const ov::Shape& outputShape = model->output().get_shape();
+    const ov::Layout outputLayout{"NCHW"};
+    if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+        outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's output is expected");
+    }
+
+    outputsNames.push_back(model->output().get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    changeInputSize(model);
+}
+
+void JPEGRestorationModel::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->input());
+
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (inputShape[heightId] % stride || inputShape[widthId] % stride) {
+        throw std::logic_error("The shape of the model input must be divisible by stride");
+    }
+
+    netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride;
+    netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride;
+
+    inputShape[batchId] = 1;
+    inputShape[heightId] = netInputHeight;
+    inputShape[widthId] = netInputWidth;
+
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> JPEGRestorationModel::preprocess(const InputData& inputData,
+                                                                    ov::InferRequest& request) {
+    cv::Mat image = inputData.asRef<ImageInputData>().inputImage;
+    const size_t h = image.rows;
+    const size_t w = image.cols;
+    cv::Mat resizedImage;
+    if (jpegCompression) {
+        std::vector<uchar> encimg;
+        std::vector<int> params{cv::IMWRITE_JPEG_QUALITY, 40};
+        cv::imencode(".jpg", image, encimg, params);
+        image = cv::imdecode(cv::Mat(encimg), 3);
+    }
+
+    if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) {
+        int bottom = netInputHeight - h;
+        int right = netInputWidth - w;
+        cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0);
+    } else {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+        cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight));
+    }
+    request.set_input_tensor(wrapMat2Tensor(resizedImage));
+
+    return std::make_shared<InternalImageModelData>(image.cols, image.rows);
+}
+
+std::unique_ptr<ResultBase> JPEGRestorationModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    std::vector<cv::Mat> imgPlanes;
+    const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+    const size_t outHeight = static_cast<int>(outputShape[2]);
+    const size_t outWidth = static_cast<int>(outputShape[3]);
+    const size_t numOfPixels = outWidth * outHeight;
+    imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+
+    if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) &&
+        static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight &&
+        netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) &&
+        static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) {
+        result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+    } else {
+        cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+    }
+
+    result->resultImage.convertTo(result->resultImage, CV_8UC3, 255);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/model_base.cpp
+++ b/engines/OpenVINOEngine/src/models/model_base.cpp
@@ -0,0 +1,65 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/model_base.h"
+#include <utility>
+#include <openvino/openvino.hpp>
+#include <utils/common.hpp>
+#include <utils/config_factory.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+std::shared_ptr<ov::Model> ModelBase::prepareModel(ov::Core& core) {
+    // --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
+    /** Read model **/
+    slog::info << "Reading model " << modelFileName << slog::endl;
+    std::shared_ptr<ov::Model> model = core.read_model(modelFileName);
+    logBasicModelInfo(model);
+    // -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes)
+    prepareInputsOutputs(model);
+    setBatch(model);
+
+    return model;
+}
+
+ov::CompiledModel ModelBase::compileModel(const ModelConfig& config, ov::Core& core) {
+    this->config = config;
+    auto model = prepareModel(core);
+    compiledModel = core.compile_model(model, config.deviceName, config.compiledModelConfig);
+    logCompiledModelInfo(compiledModel, modelFileName, config.deviceName);
+    return compiledModel;
+}
+
+ov::Layout ModelBase::getInputLayout(const ov::Output<ov::Node>& input) {
+    ov::Layout layout = ov::layout::get_layout(input);
+    if (layout.empty()) {
+        if (inputsLayouts.empty()) {
+            layout = getLayoutFromShape(input.get_partial_shape());
+            slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '"
+                       << input.get_any_name() << "' will be used." << slog::endl;
+        } else if (inputsLayouts.size() == 1) {
+            layout = inputsLayouts.begin()->second;
+        } else {
+            layout = inputsLayouts[input.get_any_name()];
+        }
+    }
+
+    return layout;
+}
+
+void ModelBase::setBatch(std::shared_ptr<ov::Model>& model) {
+    ov::set_batch(model, 1);
+}
--- a/engines/OpenVINOEngine/src/models/openpose_decoder.cpp
+++ b/engines/OpenVINOEngine/src/models/openpose_decoder.cpp
@@ -0,0 +1,345 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/openpose_decoder.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <utils/common.hpp>
+
+#include "models/results.h"
+
+Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {}
+
+HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber)
+    : peaksIndices(std::vector<int>(keypointsNumber, -1)),
+      nJoints(0),
+      score(0.0f) {}
+
+TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score)
+    : firstJointIdx(firstJointIdx),
+      secondJointIdx(secondJointIdx),
+      score(score) {}
+
+void findPeaks(const std::vector<cv::Mat>& heatMaps,
+               const float minPeaksDistance,
+               std::vector<std::vector<Peak>>& allPeaks,
+               int heatMapId,
+               float confidenceThreshold) {
+    std::vector<cv::Point> peaks;
+    const cv::Mat& heatMap = heatMaps[heatMapId];
+    const float* heatMapData = heatMap.ptr<float>();
+    size_t heatMapStep = heatMap.step1();
+    for (int y = -1; y < heatMap.rows + 1; y++) {
+        for (int x = -1; x < heatMap.cols + 1; x++) {
+            float val = 0;
+            if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) {
+                val = heatMapData[y * heatMapStep + x];
+                val = val >= confidenceThreshold ? val : 0;
+            }
+
+            float left_val = 0;
+            if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) {
+                left_val = heatMapData[y * heatMapStep + x + 1];
+                left_val = left_val >= confidenceThreshold ? left_val : 0;
+            }
+
+            float right_val = 0;
+            if (x > 0 && y >= 0 && y < heatMap.rows) {
+                right_val = heatMapData[y * heatMapStep + x - 1];
+                right_val = right_val >= confidenceThreshold ? right_val : 0;
+            }
+
+            float top_val = 0;
+            if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) {
+                top_val = heatMapData[(y + 1) * heatMapStep + x];
+                top_val = top_val >= confidenceThreshold ? top_val : 0;
+            }
+
+            float bottom_val = 0;
+            if (x >= 0 && y > 0 && x < heatMap.cols) {
+                bottom_val = heatMapData[(y - 1) * heatMapStep + x];
+                bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0;
+            }
+
+            if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) {
+                peaks.push_back(cv::Point(x, y));
+            }
+        }
+    }
+    std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) {
+        return a.x < b.x;
+    });
+    std::vector<bool> isActualPeak(peaks.size(), true);
+    int peakCounter = 0;
+    std::vector<Peak>& peaksWithScoreAndID = allPeaks[heatMapId];
+    for (size_t i = 0; i < peaks.size(); i++) {
+        if (isActualPeak[i]) {
+            for (size_t j = i + 1; j < peaks.size(); j++) {
+                if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) +
+                         (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) {
+                    isActualPeak[j] = false;
+                }
+            }
+            peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at<float>(peaks[i])));
+        }
+    }
+}
+
+std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
+                                         const std::vector<cv::Mat>& pafs,
+                                         const size_t keypointsNumber,
+                                         const float midPointsScoreThreshold,
+                                         const float foundMidPointsRatioThreshold,
+                                         const int minJointsNumber,
+                                         const float minSubsetScore) {
+    static const std::pair<int, int> limbIdsHeatmap[] = {{2, 3},
+                                                         {2, 6},
+                                                         {3, 4},
+                                                         {4, 5},
+                                                         {6, 7},
+                                                         {7, 8},
+                                                         {2, 9},
+                                                         {9, 10},
+                                                         {10, 11},
+                                                         {2, 12},
+                                                         {12, 13},
+                                                         {13, 14},
+                                                         {2, 1},
+                                                         {1, 15},
+                                                         {15, 17},
+                                                         {1, 16},
+                                                         {16, 18},
+                                                         {3, 17},
+                                                         {6, 18}};
+    static const std::pair<int, int> limbIdsPaf[] = {{31, 32},
+                                                     {39, 40},
+                                                     {33, 34},
+                                                     {35, 36},
+                                                     {41, 42},
+                                                     {43, 44},
+                                                     {19, 20},
+                                                     {21, 22},
+                                                     {23, 24},
+                                                     {25, 26},
+                                                     {27, 28},
+                                                     {29, 30},
+                                                     {47, 48},
+                                                     {49, 50},
+                                                     {53, 54},
+                                                     {51, 52},
+                                                     {55, 56},
+                                                     {37, 38},
+                                                     {45, 46}};
+
+    std::vector<Peak> candidates;
+    for (const auto& peaks : allPeaks) {
+        candidates.insert(candidates.end(), peaks.begin(), peaks.end());
+    }
+    std::vector<HumanPoseByPeaksIndices> subset(0, HumanPoseByPeaksIndices(keypointsNumber));
+    for (size_t k = 0; k < arraySize(limbIdsPaf); k++) {
+        std::vector<TwoJointsConnection> connections;
+        const int mapIdxOffset = keypointsNumber + 1;
+        std::pair<cv::Mat, cv::Mat> scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset],
+                                                pafs[limbIdsPaf[k].second - mapIdxOffset]};
+        const int idxJointA = limbIdsHeatmap[k].first - 1;
+        const int idxJointB = limbIdsHeatmap[k].second - 1;
+        const std::vector<Peak>& candA = allPeaks[idxJointA];
+        const std::vector<Peak>& candB = allPeaks[idxJointB];
+        const size_t nJointsA = candA.size();
+        const size_t nJointsB = candB.size();
+        if (nJointsA == 0 && nJointsB == 0) {
+            continue;
+        } else if (nJointsA == 0) {
+            for (size_t i = 0; i < nJointsB; i++) {
+                int num = 0;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointB] == candB[i].id) {
+                        num++;
+                        continue;
+                    }
+                }
+                if (num == 0) {
+                    HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+                    personKeypoints.peaksIndices[idxJointB] = candB[i].id;
+                    personKeypoints.nJoints = 1;
+                    personKeypoints.score = candB[i].score;
+                    subset.push_back(personKeypoints);
+                }
+            }
+            continue;
+        } else if (nJointsB == 0) {
+            for (size_t i = 0; i < nJointsA; i++) {
+                int num = 0;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == candA[i].id) {
+                        num++;
+                        continue;
+                    }
+                }
+                if (num == 0) {
+                    HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+                    personKeypoints.peaksIndices[idxJointA] = candA[i].id;
+                    personKeypoints.nJoints = 1;
+                    personKeypoints.score = candA[i].score;
+                    subset.push_back(personKeypoints);
+                }
+            }
+            continue;
+        }
+
+        std::vector<TwoJointsConnection> tempJointConnections;
+        for (size_t i = 0; i < nJointsA; i++) {
+            for (size_t j = 0; j < nJointsB; j++) {
+                cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5;
+                cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y));
+                cv::Point2f vec = candB[j].pos - candA[i].pos;
+                double norm_vec = cv::norm(vec);
+                if (norm_vec == 0) {
+                    continue;
+                }
+                vec /= norm_vec;
+                float score = vec.x * scoreMid.first.at<float>(mid) + vec.y * scoreMid.second.at<float>(mid);
+                int height_n = pafs[0].rows / 2;
+                float suc_ratio = 0.0f;
+                float mid_score = 0.0f;
+                const int mid_num = 10;
+                const float scoreThreshold = -100.0f;
+                if (score > scoreThreshold) {
+                    float p_sum = 0;
+                    int p_count = 0;
+                    cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1),
+                                    (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1));
+                    for (int n = 0; n < mid_num; n++) {
+                        cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width),
+                                           cvRound(candA[i].pos.y + n * step.height));
+                        cv::Point2f pred(scoreMid.first.at<float>(midPoint), scoreMid.second.at<float>(midPoint));
+                        score = vec.x * pred.x + vec.y * pred.y;
+                        if (score > midPointsScoreThreshold) {
+                            p_sum += score;
+                            p_count++;
+                        }
+                    }
+                    suc_ratio = static_cast<float>(p_count / mid_num);
+                    float ratio = p_count > 0 ? p_sum / p_count : 0.0f;
+                    mid_score = ratio + static_cast<float>(std::min(height_n / norm_vec - 1, 0.0));
+                }
+                if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) {
+                    tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score));
+                }
+            }
+        }
+        if (!tempJointConnections.empty()) {
+            std::sort(tempJointConnections.begin(),
+                      tempJointConnections.end(),
+                      [](const TwoJointsConnection& a, const TwoJointsConnection& b) {
+                          return (a.score > b.score);
+                      });
+        }
+        size_t num_limbs = std::min(nJointsA, nJointsB);
+        size_t cnt = 0;
+        std::vector<int> occurA(nJointsA, 0);
+        std::vector<int> occurB(nJointsB, 0);
+        for (size_t row = 0; row < tempJointConnections.size(); row++) {
+            if (cnt == num_limbs) {
+                break;
+            }
+            const int& indexA = tempJointConnections[row].firstJointIdx;
+            const int& indexB = tempJointConnections[row].secondJointIdx;
+            const float& score = tempJointConnections[row].score;
+            if (occurA[indexA] == 0 && occurB[indexB] == 0) {
+                connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score));
+                cnt++;
+                occurA[indexA] = 1;
+                occurB[indexB] = 1;
+            }
+        }
+        if (connections.empty()) {
+            continue;
+        }
+
+        bool extraJointConnections = (k == 17 || k == 18);
+        if (k == 0) {
+            subset = std::vector<HumanPoseByPeaksIndices>(connections.size(), HumanPoseByPeaksIndices(keypointsNumber));
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                subset[i].peaksIndices[idxJointA] = indexA;
+                subset[i].peaksIndices[idxJointB] = indexB;
+                subset[i].nJoints = 2;
+                subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+            }
+        } else if (extraJointConnections) {
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) {
+                        subset[j].peaksIndices[idxJointB] = indexB;
+                    } else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) {
+                        subset[j].peaksIndices[idxJointA] = indexA;
+                    }
+                }
+            }
+            continue;
+        } else {
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                bool num = false;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == indexA) {
+                        subset[j].peaksIndices[idxJointB] = indexB;
+                        subset[j].nJoints++;
+                        subset[j].score += candidates[indexB].score + connections[i].score;
+                        num = true;
+                    }
+                }
+                if (!num) {
+                    HumanPoseByPeaksIndices hpWithScore(keypointsNumber);
+                    hpWithScore.peaksIndices[idxJointA] = indexA;
+                    hpWithScore.peaksIndices[idxJointB] = indexB;
+                    hpWithScore.nJoints = 2;
+                    hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+                    subset.push_back(hpWithScore);
+                }
+            }
+        }
+    }
+    std::vector<HumanPose> poses;
+    for (const auto& subsetI : subset) {
+        if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) {
+            continue;
+        }
+        int position = -1;
+        HumanPose pose{std::vector<cv::Point2f>(keypointsNumber, cv::Point2f(-1.0f, -1.0f)),
+                       subsetI.score * std::max(0, subsetI.nJoints - 1)};
+        for (const auto& peakIdx : subsetI.peaksIndices) {
+            position++;
+            if (peakIdx >= 0) {
+                pose.keypoints[position] = candidates[peakIdx].pos;
+                pose.keypoints[position].x += 0.5;
+                pose.keypoints[position].y += 0.5;
+            }
+        }
+        poses.push_back(pose);
+    }
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/segmentation_model.cpp
+++ b/engines/OpenVINOEngine/src/models/segmentation_model.cpp
@@ -0,0 +1,160 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/segmentation_model.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout) {}
+
+std::vector<std::string> SegmentationModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labelsList;
+
+    /* Read labels (if any) */
+    if (!labelFilename.empty()) {
+        std::ifstream inputFile(labelFilename);
+        if (!inputFile.is_open())
+            throw std::runtime_error("Can't open the labels file: " + labelFilename);
+        std::string label;
+        while (std::getline(inputFile, label)) {
+            labelsList.push_back(label);
+        }
+        if (labelsList.empty())
+            throw std::logic_error("File is empty: " + labelFilename);
+    }
+
+    return labelsList;
+}
+
+void SegmentationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input  -----------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input");
+    }
+    const auto& input = model->input();
+    inputsNames.push_back(input.get_any_name());
+
+    const ov::Layout& inputLayout = getInputLayout(input);
+    const ov::Shape& inputShape = input.get_shape();
+    if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout("NHWC");
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
+    }
+
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+
+    const ov::Shape& outputShape = output.get_partial_shape().get_max_shape();
+
+    ov::Layout outputLayout = getLayoutFromShape(outputShape);
+    outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]);
+    outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]);
+
+    ppp.output().model().set_layout(outputLayout);
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    if (ov::layout::has_channels(outputLayout)) {
+        ppp.output().tensor().set_layout("NCHW");
+        outChannels = static_cast<int>(outputShape[ov::layout::channels_idx(outputLayout)]);
+    } else {
+        // deeplabv3
+        ppp.output().tensor().set_layout("NHW");
+        outChannels = 1;
+    }
+    model = ppp.build();
+}
+
+std::unique_ptr<ResultBase> SegmentationModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData);
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto& outTensor = infResult.getFirstOutputTensor();
+
+    result->resultImage = cv::Mat(outHeight, outWidth, CV_8UC1);
+
+    if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) {
+        cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data<int32_t>());
+        predictions.convertTo(result->resultImage, CV_8UC1);
+    } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) {
+        cv::Mat predictions(outHeight, outWidth, CV_32SC1);
+        const auto data = outTensor.data<int64_t>();
+        for (size_t i = 0; i < predictions.total(); ++i) {
+            reinterpret_cast<int32_t*>(predictions.data)[i] = int32_t(data[i]);
+        }
+        predictions.convertTo(result->resultImage, CV_8UC1);
+    } else if (outTensor.get_element_type() == ov::element::f32) {
+        const float* data = outTensor.data<float>();
+        for (int rowId = 0; rowId < outHeight; ++rowId) {
+            for (int colId = 0; colId < outWidth; ++colId) {
+                int classId = 0;
+                float maxProb = -1.0f;
+                for (int chId = 0; chId < outChannels; ++chId) {
+                    float prob = data[chId * outHeight * outWidth + rowId * outWidth + colId];
+                    if (prob > maxProb) {
+                        classId = chId;
+                        maxProb = prob;
+                    }
+                }  // nChannels
+
+                result->resultImage.at<uint8_t>(rowId, colId) = classId;
+            }  // width
+        }  // height
+    }
+
+    cv::resize(result->resultImage,
+               result->resultImage,
+               cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight),
+               0,
+               0,
+               cv::INTER_NEAREST);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/style_transfer_model.cpp
+++ b/engines/OpenVINOEngine/src/models/style_transfer_model.cpp
@@ -0,0 +1,107 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/style_transfer_model.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {}
+
+void StyleTransferModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Style transfer model wrapper supports topologies with only 1 input");
+    }
+
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().preprocess().convert_element_type(ov::element::f32);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Style transfer model wrapper supports topologies with only 1 output");
+    }
+    outputsNames.push_back(model->output().get_any_name());
+
+    const ov::Shape& outputShape = model->output().get_shape();
+    ov::Layout outputLayout{"NCHW"};
+    if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+        outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's output is expected");
+    }
+
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+}
+
+std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+    size_t outHeight = static_cast<int>(outputShape[2]);
+    size_t outWidth = static_cast<int>(outputShape[3]);
+    size_t numOfPixels = outWidth * outHeight;
+
+    std::vector<cv::Mat> imgPlanes;
+    imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+    cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+
+    result->resultImage.convertTo(result->resultImage, CV_8UC3);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/super_resolution_model.cpp
+++ b/engines/OpenVINOEngine/src/models/super_resolution_model.cpp
@@ -0,0 +1,310 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/super_resolution_model.h"
+
+#include <stddef.h>
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName,
+                                           const cv::Size& inputImgSize,
+                                           const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {
+    netInputHeight = inputImgSize.height;
+    netInputWidth = inputImgSize.width;
+}
+
+void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1 && inputs.size() != 2) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only");
+    }
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    inputsNames.push_back(lrInputTensorName);
+    ov::Shape lrShape = inputs.begin()->get_shape();
+    if (lrShape.size() != 4) {
+        throw std::logic_error("Number of dimensions for an input must be 4");
+    }
+    // in case of 2 inputs they have the same layouts
+    ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+    auto channelsId = ov::layout::channels_idx(inputLayout);
+    auto heightId = ov::layout::height_idx(inputLayout);
+    auto widthId = ov::layout::width_idx(inputLayout);
+
+    if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) {
+        throw std::logic_error("Input layer is expected to have 1 or 3 channels");
+    }
+
+    // A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the
+    // second input
+    std::string bicInputTensorName;
+    if (inputs.size() == 2) {
+        bicInputTensorName = (++inputs.begin())->get_any_name();
+        inputsNames.push_back(bicInputTensorName);
+        ov::Shape bicShape = (++inputs.begin())->get_shape();
+        if (bicShape.size() != 4) {
+            throw std::logic_error("Number of dimensions for both inputs must be 4");
+        }
+        if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) {
+            std::swap(bicShape, lrShape);
+            inputsNames[0].swap(inputsNames[1]);
+        } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) {
+            throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial"
+                                   "dimension of another input");
+        }
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    for (const auto& input : inputs) {
+        inputTransform.setPrecision(ppp, input.get_any_name());
+        ppp.input(input.get_any_name()).tensor().set_layout("NHWC");
+        ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+    }
+
+    // --------------------------- Prepare output -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+    }
+
+    outputsNames.push_back(outputs.begin()->get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    const ov::Shape& outShape = model->output().get_shape();
+
+    const ov::Layout outputLayout("NCHW");
+    const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+    const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+    changeInputSize(model, static_cast<int>(outWidth / inWidth));
+}
+
+void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) {
+    std::map<std::string, ov::PartialShape> shapes;
+    const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    const ov::OutputVector& inputs = model->inputs();
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    ov::Shape lrShape = inputs.begin()->get_shape();
+
+    if (inputs.size() == 2) {
+        std::string bicInputTensorName = (++inputs.begin())->get_any_name();
+        ov::Shape bicShape = (++inputs.begin())->get_shape();
+        if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) {
+            std::swap(bicShape, lrShape);
+            std::swap(bicInputTensorName, lrInputTensorName);
+        }
+        bicShape[batchId] = 1;
+        bicShape[heightId] = coeff * netInputHeight;
+        bicShape[widthId] = coeff * netInputWidth;
+        shapes[bicInputTensorName] = ov::PartialShape(bicShape);
+    }
+
+    lrShape[batchId] = 1;
+    lrShape[heightId] = netInputHeight;
+    lrShape[widthId] = netInputWidth;
+    shapes[lrInputTensorName] = ov::PartialShape(lrShape);
+
+    model->reshape(shapes);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData,
+                                                                    ov::InferRequest& request) {
+    auto imgData = inputData.asRef<ImageInputData>();
+    auto img = inputTransform(imgData.inputImage);
+    const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+    const ov::Layout layout("NHWC");
+
+    if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) {
+        cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
+    }
+
+    if (static_cast<size_t>(img.cols) != netInputWidth || 
+        static_cast<size_t>(img.rows) != netInputHeight) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+    const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+    const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+    img = resizeImageExt(img, width, height);
+    request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+
+    if (inputsNames.size() == 2) {
+        const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]);
+        const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]);
+        const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]);
+        cv::Mat resized;
+        cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC);
+        request.set_tensor(inputsNames[1], wrapMat2Tensor(resized));
+    }
+
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    std::vector<cv::Mat> imgPlanes;
+    const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+    const size_t outChannels = static_cast<int>(outShape[1]);
+    const size_t outHeight = static_cast<int>(outShape[2]);
+    const size_t outWidth = static_cast<int>(outShape[3]);
+    const size_t numOfPixels = outWidth * outHeight;
+    if (outChannels == 3) {
+        imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+                                         cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                         cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+    } else {
+        imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+        // Post-processing for text-image-super-resolution models
+        cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY);
+    }
+
+    for (auto& img : imgPlanes) {
+        img.convertTo(img, CV_8UC1, 255);
+    }
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+    result->resultImage = resultImg;
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionChannelJoint::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+
+    const size_t outHeight = static_cast<int>(outShape[2]);
+    const size_t outWidth = static_cast<int>(outShape[3]);
+
+    std::vector<cv::Mat> channels;
+    for (int i = 0; i < 3; ++i) {
+        channels.emplace_back(outHeight, outWidth, CV_32FC1, &(outputData[0]) + i * outHeight * outWidth);
+    }
+    cv::Mat resultImg(outHeight, outWidth, CV_32FC3);
+    cv::merge(channels, resultImg);
+
+    resultImg.convertTo(resultImg, CV_8UC3);
+    result->resultImage = resultImg;
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionChannelJoint::preprocess(const InputData& inputData,
+                                                                           ov::InferRequest& request) {
+    auto imgData = inputData.asRef<ImageInputData>();
+    auto& img = imgData.inputImage;
+
+    const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+    const ov::Layout layout("NCHW");
+
+    if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+
+    const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+    const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+
+    std::vector<cv::Mat> channels;
+    cv::split(img, channels);
+
+    ov::Tensor tensor(ov::element::u8, {3, 1, height, width});
+    for (int i = 0; i < 3; ++i) {
+        matToTensor(channels[i], tensor, i);
+    }
+
+    request.set_tensor(inputsNames[0], tensor);
+
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+void SuperResolutionChannelJoint::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1) {
+        throw std::logic_error("Super resolution channel joint model wrapper supports topologies with 1 inputs only");
+    }
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    inputsNames.push_back(lrInputTensorName);
+    ov::Shape lrShape = inputs.begin()->get_shape();
+    if (lrShape.size() != 4) {
+        throw std::logic_error("Number of dimensions for an input must be 4");
+    }
+    // in case of 2 inputs they have the same layouts
+    ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+    auto channelsId = ov::layout::channels_idx(inputLayout);
+
+    if (lrShape[channelsId] != 1) {
+        throw std::logic_error("Input layer is expected to have 1 channel");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    const auto& input = inputs.front();
+    ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NCHW");
+    ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+    }
+
+    outputsNames.push_back(outputs.begin()->get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    const ov::Shape& outShape = model->output().get_shape();
+
+    const ov::Layout outputLayout("NCHW");
+    const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+    const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+
+    changeInputSize(model, static_cast<int>(outWidth / inWidth));
+
+    ov::set_batch(model, 3);
+}
+
+void SuperResolutionChannelJoint::setBatch(std::shared_ptr<ov::Model>& model) {
+    ov::set_batch(model, 3);
+}