Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/engines/OpenVINOEngine/include/faceapp/action_detector.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/action_detector.hpp
@@ -0,0 +1,217 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "cnn.hpp"
+
+#include "openvino/openvino.hpp"
+
+/**
+* @brief Class for detection with action info
+*/
+struct DetectedAction {
+    /** @brief BBox of detection */
+    cv::Rect rect;
+    /** @brief Action label */
+    int label;
+    /** @brief Confidence of detection */
+    float detection_conf;
+    /** @brief Confidence of predicted action */
+    float action_conf;
+
+    /**
+    * @brief Constructor
+    */
+    DetectedAction(const cv::Rect& rect, int label,
+                   float detection_conf, float action_conf)
+        : rect(rect), label(label), detection_conf(detection_conf),
+          action_conf(action_conf) {}
+};
+using DetectedActions = std::vector<DetectedAction>;
+
+/**
+* @brief Class to store SSD-based head info
+*/
+struct SSDHead {
+    /** @brief Step size for the head */
+    int step;
+    /** @brief Vector of anchors */
+    std::vector<cv::Size2f> anchors;
+
+    /**
+    * @brief Constructor
+    */
+    SSDHead(int step, const std::vector<cv::Size2f>& anchors) : step(step), anchors(anchors) {}
+};
+using SSDHeads = std::vector<SSDHead>;
+
+/**
+* @brief Config for the Action Detection model
+*/
+struct ActionDetectorConfig : public CnnConfig {
+    explicit ActionDetectorConfig(const std::string& path_to_model, const std::string& model_type)
+        : CnnConfig(path_to_model, model_type) {}
+
+    /** @brief Name of output blob with location info */
+    std::string old_loc_blob_name{"mbox_loc1/out/conv/flat"};
+    /** @brief Name of output blob with detection confidence info */
+    std::string old_det_conf_blob_name{"mbox_main_conf/out/conv/flat/softmax/flat"};
+    /** @brief Prefix of name of output blob with action confidence info */
+    std::string old_action_conf_blob_name_prefix{"out/anchor"};
+    /** @brief Name of output blob with priorbox info */
+    std::string old_priorbox_blob_name{"mbox/priorbox"};
+
+    /** @brief Name of output blob with location info */
+    std::string new_loc_blob_name{"ActionNet/out_detection_loc"};
+    /** @brief Name of output blob with detection confidence info */
+    std::string new_det_conf_blob_name{"ActionNet/out_detection_conf"};
+    /** @brief Prefix of name of output blob with action confidence info */
+    std::string new_action_conf_blob_name_prefix{"ActionNet/action_heads/out_head_"};
+    /** @brief Suffix of name of output blob with action confidence info */
+    std::string new_action_conf_blob_name_suffix{"_anchor_"};
+
+    /** @brief Scale parameter for Soft-NMS algorithm */
+    float nms_sigma = 0.6f;
+    /** @brief Threshold for detected objects */
+    float detection_confidence_threshold = 0.4f;
+    /** @brief Threshold for recognized actions */
+    float action_confidence_threshold = 0.75f;
+    /** @brief Scale of action logits for the old network version */
+    float old_action_scale = 3.f;
+    /** @brief Scale of action logits for the new network version */
+    float new_action_scale = 16.f;
+    /** @brief Default action class label */
+    int default_action_id = 0;
+    /** @brief Number of top-score bboxes in output */
+    size_t keep_top_k = 200;
+    /** @brief Number of SSD anchors for the old network version */
+    std::vector<int> old_anchors{4};
+    /** @brief Number of SSD anchors for the new network version */
+    std::vector<int> new_anchors{1, 4};
+    /** @brief Number of actions to detect */
+    size_t num_action_classes = 3;
+    /** @brief Async execution flag */
+    bool is_async = true;
+    /** @brief  SSD bbox encoding variances */
+    float variances[4]{0.1f, 0.1f, 0.2f, 0.2f};
+    SSDHeads new_det_heads{{8,  {{26.17863728f, 58.670372f}}},
+                           {16, {{35.36f, 81.829632f},
+                                 {45.8114572f, 107.651852f},
+                                 {63.31491832f, 142.595732f},
+                                 {93.5070856f, 201.107692f}}}};
+};
+
+
+class ActionDetection : public AsyncDetection<DetectedAction>, public BaseCnnDetection {
+public:
+    explicit ActionDetection(const ActionDetectorConfig& config);
+
+    void submitRequest() override;
+    void enqueue(const cv::Mat& frame) override;
+    void wait() override { BaseCnnDetection::wait(); }
+    DetectedActions fetchResults() override;
+
+private:
+    ActionDetectorConfig m_config;
+    ov::CompiledModel m_model;
+    ov::Layout m_modelLayout;
+    std::string m_input_name;
+    std::map<std::string, ov::Tensor> m_outputs;
+
+    int m_enqueued_frames = 0;
+    float m_width = 0;
+    float m_height = 0;
+    bool m_new_model = false;
+    std::vector<int> m_head_ranges;
+    std::vector<int> m_head_step_sizes;
+    std::vector<cv::Size> m_head_blob_sizes;
+    std::vector<std::vector<int>> m_glob_anchor_map;
+    std::vector<std::string> m_glob_anchor_names;
+    int m_num_glob_anchors = 0;
+    cv::Size m_network_input_size;
+    int m_num_candidates;
+    bool m_binary_task;
+
+    /**
+    * @brief BBox in normalized form (each coordinate is in range [0;1]).
+    */
+    struct NormalizedBBox {
+        float xmin;
+        float ymin;
+        float xmax;
+        float ymax;
+    };
+    typedef std::vector<NormalizedBBox> NormalizedBBoxes;
+
+     /**
+    * @brief Translates the detections from the network outputs
+    *
+    * @param loc Location buffer
+    * @param main_conf Detection conf buffer
+    * @param add_conf Action conf buffer
+    * @param priorboxes Priorboxes buffer
+    * @param frame_size Size of input image (WxH)
+    * @return Detected objects
+    */
+    DetectedActions GetDetections(const cv::Mat& loc,
+                                  const cv::Mat& main_conf,
+                                  const cv::Mat& priorboxes,
+                                  const std::vector<cv::Mat>& add_conf,
+                                  const cv::Size& frame_size) const;
+
+     /**
+    * @brief Translate input buffer to BBox
+    *
+    * @param data Input buffer
+    * @return BBox
+    */
+    inline NormalizedBBox
+    ParseBBoxRecord(const float* data, bool inverse) const;
+
+
+     /**
+    * @brief Translate input buffer to BBox
+    *
+    * @param data Input buffer
+    * @return BBox
+    */
+    inline NormalizedBBox
+    GeneratePriorBox(int pos, int step, const cv::Size2f& anchor, const cv::Size& blob_size) const;
+
+     /**
+    * @brief Translates input blobs in SSD format to bbox in CV_Rect
+    *
+    * @param prior_bbox Prior boxes in SSD format
+    * @param variances Variances of prior boxes in SSD format
+    * @param encoded_bbox BBox to decode
+    * @param frame_size Size of input image (WxH)
+    * @return BBox in CV_Rect format
+    */
+    cv::Rect ConvertToRect(const NormalizedBBox& prior_bbox,
+                           const NormalizedBBox& variances,
+                           const NormalizedBBox& encoded_bbox,
+                           const cv::Size& frame_size) const;
+
+     /**
+    * @brief Carry out Soft Non-Maximum Suppression algorithm under detected actions
+    *
+    * @param detections Detected actions
+    * @param sigma Scale parameter
+    * @param top_k Number of top-score bboxes
+    * @param min_det_conf Minimum detection confidence
+    * @param out_indices Out indices of valid detections
+    */
+    void SoftNonMaxSuppression(const DetectedActions& detections,
+                               const float sigma,
+                               size_t top_k,
+                               const float min_det_conf,
+                               std::vector<int>* out_indices) const;
+};
--- a/engines/OpenVINOEngine/include/faceapp/actions.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/actions.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+/**
+* @brief Class for action info
+*/
+using Action = int;
+
+/**
+* @brief Class for events on a single frame with action info
+*/
+struct FrameEvent {
+    /** @brief Frame index */
+    int frame_id;
+    /** @brief Action label */
+    Action action;
+
+    /**
+  * @brief Constructor
+  */
+    FrameEvent(int frame_id, Action action)
+        : frame_id(frame_id), action(action) {}
+};
+using FrameEventsTrack = std::vector<FrameEvent>;
+
+/**
+* @brief Class for range of the same event with action info
+*/
+struct RangeEvent {
+    /** @brief  Start frame index */
+    int begin_frame_id;
+    /** @brief  Next after the last valid frame index */
+    int end_frame_id;
+    /** @brief Action label */
+    Action action;
+
+    /**
+  * @brief Constructor
+  */
+    RangeEvent(int begin_frame_id, int end_frame_id, Action action)
+        : begin_frame_id(begin_frame_id), end_frame_id(end_frame_id), action(action) {}
+};
+using RangeEventsTrack = std::vector<RangeEvent>;
+
+enum ActionsType { STUDENT, TEACHER, TOP_K };
--- a/engines/OpenVINOEngine/include/faceapp/cnn.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/cnn.hpp
@@ -0,0 +1,145 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <functional>
+
+#include "openvino/openvino.hpp"
+
+#include "utils/ocv_common.hpp"
+
+/**
+* @brief Base class of config for network
+*/
+struct CnnConfig {
+    explicit CnnConfig(const std::string& path_to_model, const std::string& model_type = "") :
+        m_path_to_model(path_to_model), m_model_type(model_type) {}
+    /** @brief Path to model description */
+    std::string m_path_to_model;
+    /** @brief Model type*/
+    std::string m_model_type;
+    /** @brief Maximal size of batch */
+    int m_max_batch_size{1};
+
+    /** @brief OpenVINO Core instance */
+    ov::Core m_core;
+    /** @brief Device name */
+    std::string m_deviceName;
+};
+
+/**
+* @brief Base class of model
+*/
+class CnnDLSDKBase {
+public:
+    using Config = CnnConfig;
+
+    /**
+   * @brief Constructor
+   */
+    explicit CnnDLSDKBase(const Config& config);
+
+    /**
+   * @brief Descructor
+   */
+    ~CnnDLSDKBase() {}
+
+    /**
+   * @brief Loads network
+   */
+    void Load();
+
+protected:
+    /**
+   * @brief Run model in batch mode
+   *
+   * @param frames Vector of input images
+   * @param results_fetcher Callback to fetch inference results
+   */
+    void InferBatch(const std::vector<cv::Mat>& frames,
+                    const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& results_fetcher);
+
+    /** @brief Config */
+    Config m_config;
+    /** @brief Model inputs info */
+    ov::OutputVector m_inInfo;
+    /** @brief Model outputs info */
+    ov::OutputVector m_outInfo_;
+    /** @brief Model layout */
+    ov::Layout m_desired_layout;
+    /** @brief Model input shape */
+    ov::Shape m_modelShape;
+    /** @brief Compled model */
+    ov::CompiledModel m_compiled_model;
+    /** @brief Inference request */
+    ov::InferRequest m_infer_request;
+    ov::Tensor m_in_tensor;
+    /** @brief Names of output tensors */
+    std::vector<std::string> m_output_tensors_names;
+};
+
+class VectorCNN : public CnnDLSDKBase {
+public:
+    explicit VectorCNN(const CnnConfig& config);
+
+    void Compute(const cv::Mat& image,
+                 cv::Mat* vector, cv::Size outp_shape = cv::Size());
+    void Compute(const std::vector<cv::Mat>& images,
+                 std::vector<cv::Mat>* vectors, cv::Size outp_shape = cv::Size());
+    int maxBatchSize() const;
+};
+
+class AsyncAlgorithm {
+public:
+    virtual ~AsyncAlgorithm() {}
+    virtual void enqueue(const cv::Mat& frame) = 0;
+    virtual void submitRequest() = 0;
+    virtual void wait() = 0;
+};
+
+template <typename T>
+class AsyncDetection : public AsyncAlgorithm {
+public:
+    virtual std::vector<T> fetchResults() = 0;
+};
+
+template <typename T>
+class NullDetection : public AsyncDetection<T> {
+public:
+    void enqueue(const cv::Mat&) override {}
+    void submitRequest() override {}
+    void wait() override {}
+    std::vector<T> fetchResults() override { return {}; }
+};
+
+class BaseCnnDetection : public AsyncAlgorithm {
+protected:
+    std::shared_ptr<ov::InferRequest> m_request;
+    const bool m_isAsync;
+    std::string m_detectorName;
+
+public:
+    explicit BaseCnnDetection(bool isAsync = false) : m_isAsync(isAsync) {}
+
+    void submitRequest() override {
+        if (m_request == nullptr)
+            return;
+        if (m_isAsync) {
+            m_request->start_async();
+        } else {
+            m_request->infer();
+        }
+    }
+
+    void wait() override {
+        if (!m_request || !m_isAsync)
+            return;
+        m_request->wait();
+    }
+};
--- a/engines/OpenVINOEngine/include/faceapp/detector.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/detector.hpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "openvino/openvino.hpp"
+
+#include "cnn.hpp"
+
+namespace detection {
+
+struct DetectedObject {
+    cv::Rect rect;
+    float confidence;
+
+    explicit DetectedObject(const cv::Rect& rect = cv::Rect(), float confidence = -1.0f) :
+        rect(rect), confidence(confidence) {}
+};
+
+using DetectedObjects = std::vector<DetectedObject>;
+
+struct DetectorConfig : public CnnConfig {
+    explicit DetectorConfig(const std::string& path_to_model) :
+        CnnConfig(path_to_model) {}
+
+    float confidence_threshold{0.6f};
+    float increase_scale_x{1.15f};
+    float increase_scale_y{1.15f};
+    bool is_async = true;
+    int input_h = 600;
+    int input_w = 600;
+};
+
+class FaceDetection : public AsyncDetection<DetectedObject>, public BaseCnnDetection {
+private:
+    DetectorConfig m_config;
+    ov::CompiledModel m_model;
+    std::string m_input_name;
+    std::string m_output_name;
+    int m_max_detections_count = 0;
+    int m_object_size = 0;
+    int m_enqueued_frames = 0;
+    float m_width = 0;
+    float m_height = 0;
+
+public:
+    explicit FaceDetection(const DetectorConfig& config);
+
+    void submitRequest() override;
+    void enqueue(const cv::Mat& frame) override;
+    void wait() override { BaseCnnDetection::wait(); }
+
+    DetectedObjects fetchResults() override;
+};
+
+}  // namespace detection
--- a/engines/OpenVINOEngine/include/faceapp/face_reid.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/face_reid.hpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "cnn.hpp"
+#include "detector.hpp"
+
+enum class RegistrationStatus {
+  SUCCESS,
+  FAILURE_LOW_QUALITY,
+  FAILURE_NOT_DETECTED,
+};
+
+struct GalleryObject {
+    std::vector<cv::Mat> embeddings;
+    std::string label;
+    int id;
+
+    GalleryObject(const std::vector<cv::Mat>& embeddings,
+                  const std::string& label, int id) :
+        embeddings(embeddings), label(label), id(id) {}
+};
+
+class EmbeddingsGallery {
+public:
+    static const char unknown_label[];
+    static const int unknown_id;
+    EmbeddingsGallery(const std::string& ids_list, double threshold, int min_size_fr,
+                      bool crop_gallery, const detection::DetectorConfig& detector_config,
+                      VectorCNN& landmarks_det,
+                      VectorCNN& image_reid,
+                      bool use_greedy_matcher=false);
+    size_t size() const;
+    std::vector<int> GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const;
+    std::string GetLabelByID(int id) const;
+    std::vector<std::string> GetIDToLabelMap() const;
+    bool LabelExists(const std::string& label) const;
+
+private:
+    RegistrationStatus RegisterIdentity(const std::string& identity_label,
+                                        const cv::Mat& image,
+                                        int min_size_fr,
+                                        bool crop_gallery,
+                                        detection::FaceDetection& detector,
+                                        VectorCNN& landmarks_det,
+                                        VectorCNN& image_reid,
+                                        cv::Mat& embedding);
+    std::vector<int> idx_to_id;
+    double reid_threshold;
+    std::vector<GalleryObject> identities;
+    bool use_greedy_matcher;
+};
+
+void AlignFaces(std::vector<cv::Mat>* face_images,
+                std::vector<cv::Mat>* landmarks_vec);
--- a/engines/OpenVINOEngine/include/faceapp/logger.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/logger.hpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <vector>
+
+#include <opencv2/opencv.hpp>
+#include <utils/slog.hpp>
+#include "actions.hpp"
+#include "tracker/tracker.hpp"
+
+class DetectionsLogger {
+private:
+    bool m_write_logs;
+    std::ofstream m_act_stat_log_stream;
+    cv::FileStorage m_act_det_log_stream;
+    slog::LogStream& m_log_stream;
+
+
+public:
+    explicit DetectionsLogger(slog::LogStream& stream, bool enabled,
+                              const std::string& act_stat_log_file,
+                              const std::string& act_det_log_file);
+
+    ~DetectionsLogger();
+    void CreateNextFrameRecord(const std::string& path, const int frame_idx,
+                               const size_t width, const size_t height);
+    void AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action);
+    void AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id);
+    void AddDetectionToFrame(const TrackedObject& object, const int frame_idx);
+    void FinalizeFrameRecord();
+    void DumpDetections(const std::string& video_path,
+                        const cv::Size frame_size,
+                        const size_t num_frames,
+                        const std::vector<Track>& face_tracks,
+                        const std::map<int, int>& track_id_to_label_faces,
+                        const std::vector<std::string>& action_idx_to_label,
+                        const std::vector<std::string>& person_id_to_label,
+                        const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps);
+    void DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
+                    const std::vector<std::string>& action_idx_to_label,
+                    const std::map<int, int>& track_id_to_label_faces,
+                    const std::vector<std::string>& person_id_to_label);
+};
+
+
+#define SCR_CHECK(cond) CV_Assert(cond);
+
+#define SCR_CHECK_BINARY(actual, expected, op) \
+    CV_Assert(actual op expected);
+
+#define SCR_CHECK_EQ(actual, expected) SCR_CHECK_BINARY(actual, expected, ==)
+#define SCR_CHECK_NE(actual, expected) SCR_CHECK_BINARY(actual, expected, !=)
+#define SCR_CHECK_LT(actual, expected) SCR_CHECK_BINARY(actual, expected, <)
+#define SCR_CHECK_GT(actual, expected) SCR_CHECK_BINARY(actual, expected, >)
+#define SCR_CHECK_LE(actual, expected) SCR_CHECK_BINARY(actual, expected, <=)
+#define SCR_CHECK_GE(actual, expected) SCR_CHECK_BINARY(actual, expected, >=)
--- a/engines/OpenVINOEngine/include/gflags/gflags.h
+++ b/engines/OpenVINOEngine/include/gflags/gflags.h
@@ -0,0 +1,627 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Revamped and reorganized by Craig Silverstein
+//
+// This is the file that should be included by any file which declares
+// or defines a command line flag or wants to parse command line flags
+// or print a program usage message (which will include information about
+// flags).  Executive summary, in the form of an example foo.cc file:
+//
+//    #include "foo.h"         // foo.h has a line "DECLARE_int32(start);"
+//    #include "validators.h"  // hypothetical file defining ValidateIsFile()
+//
+//    DEFINE_int32(end, 1000, "The last record to read");
+//
+//    DEFINE_string(filename, "my_file.txt", "The file to read");
+//    // Crash if the specified file does not exist.
+//    static bool dummy = RegisterFlagValidator(&FLAGS_filename,
+//                                              &ValidateIsFile);
+//
+//    DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
+//
+//    void MyFunc() {
+//      if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
+//    }
+//
+//    Then, at the command-line:
+//       ./foo --noverbose --start=5 --end=100
+//
+// For more details, see
+//    doc/gflags.html
+//
+// --- A note about thread-safety:
+//
+// We describe many functions in this routine as being thread-hostile,
+// thread-compatible, or thread-safe.  Here are the meanings we use:
+//
+// thread-safe: it is safe for multiple threads to call this routine
+//   (or, when referring to a class, methods of this class)
+//   concurrently.
+// thread-hostile: it is not safe for multiple threads to call this
+//   routine (or methods of this class) concurrently.  In gflags,
+//   most thread-hostile routines are intended to be called early in,
+//   or even before, main() -- that is, before threads are spawned.
+// thread-compatible: it is safe for multiple threads to read from
+//   this variable (when applied to variables), or to call const
+//   methods of this class (when applied to classes), as long as no
+//   other thread is writing to the variable or calling non-const
+//   methods of this class.
+
+#ifndef GFLAGS_GFLAGS_H_
+#define GFLAGS_GFLAGS_H_
+
+#include <string>
+#include <vector>
+
+#include "gflags/gflags_declare.h" // IWYU pragma: export
+
+
+// We always want to export variables defined in user code
+#ifndef GFLAGS_DLL_DEFINE_FLAG
+#  if GFLAGS_IS_A_DLL && defined(_MSC_VER)
+#    define GFLAGS_DLL_DEFINE_FLAG __declspec(dllexport)
+#  else
+#    define GFLAGS_DLL_DEFINE_FLAG
+#  endif
+#endif
+
+
+namespace GFLAGS_NAMESPACE {
+
+
+// --------------------------------------------------------------------
+// To actually define a flag in a file, use DEFINE_bool,
+// DEFINE_string, etc. at the bottom of this file.  You may also find
+// it useful to register a validator with the flag.  This ensures that
+// when the flag is parsed from the commandline, or is later set via
+// SetCommandLineOption, we call the validation function. It is _not_
+// called when you assign the value to the flag directly using the = operator.
+//
+// The validation function should return true if the flag value is valid, and
+// false otherwise. If the function returns false for the new setting of the
+// flag, the flag will retain its current value. If it returns false for the
+// default value, ParseCommandLineFlags() will die.
+//
+// This function is safe to call at global construct time (as in the
+// example below).
+//
+// Example use:
+//    static bool ValidatePort(const char* flagname, int32 value) {
+//       if (value > 0 && value < 32768)   // value is ok
+//         return true;
+//       printf("Invalid value for --%s: %d\n", flagname, (int)value);
+//       return false;
+//    }
+//    DEFINE_int32(port, 0, "What port to listen on");
+//    static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
+
+// Returns true if successfully registered, false if not (because the
+// first argument doesn't point to a command-line flag, or because a
+// validator is already registered for this flag).
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const bool*        flag, bool (*validate_fn)(const char*, bool));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const int32*       flag, bool (*validate_fn)(const char*, int32));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const uint32*      flag, bool (*validate_fn)(const char*, uint32));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const int64*       flag, bool (*validate_fn)(const char*, int64));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const uint64*      flag, bool (*validate_fn)(const char*, uint64));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const double*      flag, bool (*validate_fn)(const char*, double));
+extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const std::string* flag, bool (*validate_fn)(const char*, const std::string&));
+
+// Convenience macro for the registration of a flag validator
+#define DEFINE_validator(name, validator) \
+    static const bool name##_validator_registered = \
+            GFLAGS_NAMESPACE::RegisterFlagValidator(&FLAGS_##name, validator)
+
+
+// --------------------------------------------------------------------
+// These methods are the best way to get access to info about the
+// list of commandline flags.  Note that these routines are pretty slow.
+//   GetAllFlags: mostly-complete info about the list, sorted by file.
+//   ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
+//   ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
+//
+// In addition to accessing flags, you can also access argv[0] (the program
+// name) and argv (the entire commandline), which we sock away a copy of.
+// These variables are static, so you should only set them once.
+//
+// No need to export this data only structure from DLL, avoiding VS warning 4251.
+struct CommandLineFlagInfo {
+  std::string name;            // the name of the flag
+  std::string type;            // the type of the flag: int32, etc
+  std::string description;     // the "help text" associated with the flag
+  std::string current_value;   // the current value, as a string
+  std::string default_value;   // the default value, as a string
+  std::string filename;        // 'cleaned' version of filename holding the flag
+  bool has_validator_fn;       // true if RegisterFlagValidator called on this flag
+  bool is_default;             // true if the flag has the default value and
+                               // has not been set explicitly from the cmdline
+                               // or via SetCommandLineOption
+  const void* flag_ptr;        // pointer to the flag's current value (i.e. FLAGS_foo)
+};
+
+// Using this inside of a validator is a recipe for a deadlock.
+// TODO(user) Fix locking when validators are running, to make it safe to
+// call validators during ParseAllFlags.
+// Also make sure then to uncomment the corresponding unit test in
+// gflags_unittest.sh
+extern GFLAGS_DLL_DECL void GetAllFlags(std::vector<CommandLineFlagInfo>* OUTPUT);
+// These two are actually defined in gflags_reporting.cc.
+extern GFLAGS_DLL_DECL void ShowUsageWithFlags(const char *argv0);  // what --help does
+extern GFLAGS_DLL_DECL void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
+
+// Create a descriptive string for a flag.
+// Goes to some trouble to make pretty line breaks.
+extern GFLAGS_DLL_DECL std::string DescribeOneFlag(const CommandLineFlagInfo& flag);
+
+// Thread-hostile; meant to be called before any threads are spawned.
+extern GFLAGS_DLL_DECL void SetArgv(int argc, const char** argv);
+
+// The following functions are thread-safe as long as SetArgv() is
+// only called before any threads start.
+extern GFLAGS_DLL_DECL const std::vector<std::string>& GetArgvs();
+extern GFLAGS_DLL_DECL const char* GetArgv();                      // all of argv as a string
+extern GFLAGS_DLL_DECL const char* GetArgv0();                     // only argv0
+extern GFLAGS_DLL_DECL uint32 GetArgvSum();                        // simple checksum of argv
+extern GFLAGS_DLL_DECL const char* ProgramInvocationName();        // argv0, or "UNKNOWN" if not set
+extern GFLAGS_DLL_DECL const char* ProgramInvocationShortName();   // basename(argv0)
+
+// ProgramUsage() is thread-safe as long as SetUsageMessage() is only
+// called before any threads start.
+extern GFLAGS_DLL_DECL const char* ProgramUsage();                 // string set by SetUsageMessage()
+
+// VersionString() is thread-safe as long as SetVersionString() is only
+// called before any threads start.
+extern GFLAGS_DLL_DECL const char* VersionString();                // string set by SetVersionString()
+
+
+
+// --------------------------------------------------------------------
+// Normally you access commandline flags by just saying "if (FLAGS_foo)"
+// or whatever, and set them by calling "FLAGS_foo = bar" (or, more
+// commonly, via the DEFINE_foo macro).  But if you need a bit more
+// control, we have programmatic ways to get/set the flags as well.
+// These programmatic ways to access flags are thread-safe, but direct
+// access is only thread-compatible.
+
+// Return true iff the flagname was found.
+// OUTPUT is set to the flag's value, or unchanged if we return false.
+extern GFLAGS_DLL_DECL bool GetCommandLineOption(const char* name, std::string* OUTPUT);
+
+// Return true iff the flagname was found. OUTPUT is set to the flag's
+// CommandLineFlagInfo or unchanged if we return false.
+extern GFLAGS_DLL_DECL bool GetCommandLineFlagInfo(const char* name, CommandLineFlagInfo* OUTPUT);
+
+// Return the CommandLineFlagInfo of the flagname.  exit() if name not found.
+// Example usage, to check if a flag's value is currently the default value:
+//   if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
+extern GFLAGS_DLL_DECL CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
+
+enum GFLAGS_DLL_DECL FlagSettingMode {
+  // update the flag's value (can call this multiple times).
+  SET_FLAGS_VALUE,
+  // update the flag's value, but *only if* it has not yet been updated
+  // with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
+  SET_FLAG_IF_DEFAULT,
+  // set the flag's default value to this.  If the flag has not yet updated
+  // yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
+  // change the flag's current value to the new default value as well.
+  SET_FLAGS_DEFAULT
+};
+
+// Set a particular flag ("command line option").  Returns a string
+// describing the new value that the option has been set to.  The
+// return value API is not well-specified, so basically just depend on
+// it to be empty if the setting failed for some reason -- the name is
+// not a valid flag name, or the value is not a valid value -- and
+// non-empty else.
+
+// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
+extern GFLAGS_DLL_DECL std::string SetCommandLineOption        (const char* name, const char* value);
+extern GFLAGS_DLL_DECL std::string SetCommandLineOptionWithMode(const char* name, const char* value, FlagSettingMode set_mode);
+
+
+// --------------------------------------------------------------------
+// Saves the states (value, default value, whether the user has set
+// the flag, registered validators, etc) of all flags, and restores
+// them when the FlagSaver is destroyed.  This is very useful in
+// tests, say, when you want to let your tests change the flags, but
+// make sure that they get reverted to the original states when your
+// test is complete.
+//
+// Example usage:
+//   void TestFoo() {
+//     FlagSaver s1;
+//     FLAG_foo = false;
+//     FLAG_bar = "some value";
+//
+//     // test happens here.  You can return at any time
+//     // without worrying about restoring the FLAG values.
+//   }
+//
+// Note: This class is marked with GFLAGS_ATTRIBUTE_UNUSED because all
+// the work is done in the constructor and destructor, so in the standard
+// usage example above, the compiler would complain that it's an
+// unused variable.
+//
+// This class is thread-safe.  However, its destructor writes to
+// exactly the set of flags that have changed value during its
+// lifetime, so concurrent _direct_ access to those flags
+// (i.e. FLAGS_foo instead of {Get,Set}CommandLineOption()) is unsafe.
+
+class GFLAGS_DLL_DECL FlagSaver {
+ public:
+  FlagSaver();
+  ~FlagSaver();
+
+ private:
+  class FlagSaverImpl* impl_;   // we use pimpl here to keep API steady
+
+  FlagSaver(const FlagSaver&);  // no copying!
+  void operator=(const FlagSaver&);
+};
+
+// --------------------------------------------------------------------
+// Some deprecated or hopefully-soon-to-be-deprecated functions.
+
+// This is often used for logging.  TODO(csilvers): figure out a better way
+extern GFLAGS_DLL_DECL std::string CommandlineFlagsIntoString();
+// Usually where this is used, a FlagSaver should be used instead.
+extern GFLAGS_DLL_DECL
+bool ReadFlagsFromString(const std::string& flagfilecontents,
+                         const char* prog_name,
+                         bool errors_are_fatal);  // uses SET_FLAGS_VALUE
+
+// These let you manually implement --flagfile functionality.
+// DEPRECATED.
+extern GFLAGS_DLL_DECL bool AppendFlagsIntoFile(const std::string& filename, const char* prog_name);
+extern GFLAGS_DLL_DECL bool ReadFromFlagsFile(const std::string& filename, const char* prog_name, bool errors_are_fatal);   // uses SET_FLAGS_VALUE
+
+
+// --------------------------------------------------------------------
+// Useful routines for initializing flags from the environment.
+// In each case, if 'varname' does not exist in the environment
+// return defval.  If 'varname' does exist but is not valid
+// (e.g., not a number for an int32 flag), abort with an error.
+// Otherwise, return the value.  NOTE: for booleans, for true use
+// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
+
+extern GFLAGS_DLL_DECL bool BoolFromEnv(const char *varname, bool defval);
+extern GFLAGS_DLL_DECL int32 Int32FromEnv(const char *varname, int32 defval);
+extern GFLAGS_DLL_DECL uint32 Uint32FromEnv(const char *varname, uint32 defval);
+extern GFLAGS_DLL_DECL int64 Int64FromEnv(const char *varname, int64 defval);
+extern GFLAGS_DLL_DECL uint64 Uint64FromEnv(const char *varname, uint64 defval);
+extern GFLAGS_DLL_DECL double DoubleFromEnv(const char *varname, double defval);
+extern GFLAGS_DLL_DECL const char *StringFromEnv(const char *varname, const char *defval);
+
+
+// --------------------------------------------------------------------
+// The next two functions parse gflags from main():
+
+// Set the "usage" message for this program.  For example:
+//   string usage("This program does nothing.  Sample usage:\n");
+//   usage += argv[0] + " <uselessarg1> <uselessarg2>";
+//   SetUsageMessage(usage);
+// Do not include commandline flags in the usage: we do that for you!
+// Thread-hostile; meant to be called before any threads are spawned.
+extern GFLAGS_DLL_DECL void SetUsageMessage(const std::string& usage);
+
+// Sets the version string, which is emitted with --version.
+// For instance: SetVersionString("1.3");
+// Thread-hostile; meant to be called before any threads are spawned.
+extern GFLAGS_DLL_DECL void SetVersionString(const std::string& version);
+
+
+// Looks for flags in argv and parses them.  Rearranges argv to put
+// flags first, or removes them entirely if remove_flags is true.
+// If a flag is defined more than once in the command line or flag
+// file, the last definition is used.  Returns the index (into argv)
+// of the first non-flag argument.
+// See top-of-file for more details on this function.
+#ifndef SWIG   // In swig, use ParseCommandLineFlagsScript() instead.
+extern GFLAGS_DLL_DECL uint32 ParseCommandLineFlags(int *argc, char*** argv, bool remove_flags);
+#endif
+
+
+// Calls to ParseCommandLineNonHelpFlags and then to
+// HandleCommandLineHelpFlags can be used instead of a call to
+// ParseCommandLineFlags during initialization, in order to allow for
+// changing default values for some FLAGS (via
+// e.g. SetCommandLineOptionWithMode calls) between the time of
+// command line parsing and the time of dumping help information for
+// the flags as a result of command line parsing.  If a flag is
+// defined more than once in the command line or flag file, the last
+// definition is used.  Returns the index (into argv) of the first
+// non-flag argument.  (If remove_flags is true, will always return 1.)
+extern GFLAGS_DLL_DECL uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv, bool remove_flags);
+
+// This is actually defined in gflags_reporting.cc.
+// This function is misnamed (it also handles --version, etc.), but
+// it's too late to change that now. :-(
+extern GFLAGS_DLL_DECL void HandleCommandLineHelpFlags();   // in gflags_reporting.cc
+
+// Allow command line reparsing.  Disables the error normally
+// generated when an unknown flag is found, since it may be found in a
+// later parse.  Thread-hostile; meant to be called before any threads
+// are spawned.
+extern GFLAGS_DLL_DECL void AllowCommandLineReparsing();
+
+// Reparse the flags that have not yet been recognized.  Only flags
+// registered since the last parse will be recognized.  Any flag value
+// must be provided as part of the argument using "=", not as a
+// separate command line argument that follows the flag argument.
+// Intended for handling flags from dynamically loaded libraries,
+// since their flags are not registered until they are loaded.
+extern GFLAGS_DLL_DECL void ReparseCommandLineNonHelpFlags();
+
+// Clean up memory allocated by flags.  This is only needed to reduce
+// the quantity of "potentially leaked" reports emitted by memory
+// debugging tools such as valgrind.  It is not required for normal
+// operation, or for the google perftools heap-checker.  It must only
+// be called when the process is about to exit, and all threads that
+// might access flags are quiescent.  Referencing flags after this is
+// called will have unexpected consequences.  This is not safe to run
+// when multiple threads might be running: the function is
+// thread-hostile.
+extern GFLAGS_DLL_DECL void ShutDownCommandLineFlags();
+
+
+// --------------------------------------------------------------------
+// Now come the command line flag declaration/definition macros that
+// will actually be used.  They're kind of hairy.  A major reason
+// for this is initialization: we want people to be able to access
+// variables in global constructors and have that not crash, even if
+// their global constructor runs before the global constructor here.
+// (Obviously, we can't guarantee the flags will have the correct
+// default value in that case, but at least accessing them is safe.)
+// The only way to do that is have flags point to a static buffer.
+// So we make one, using a union to ensure proper alignment, and
+// then use placement-new to actually set up the flag with the
+// correct default value.  In the same vein, we have to worry about
+// flag access in global destructors, so FlagRegisterer has to be
+// careful never to destroy the flag-values it constructs.
+//
+// Note that when we define a flag variable FLAGS_<name>, we also
+// preemptively define a junk variable, FLAGS_no<name>.  This is to
+// cause a link-time error if someone tries to define 2 flags with
+// names like "logging" and "nologging".  We do this because a bool
+// flag FLAG can be set from the command line to true with a "-FLAG"
+// argument, and to false with a "-noFLAG" argument, and so this can
+// potentially avert confusion.
+//
+// We also put flags into their own namespace.  It is purposefully
+// named in an opaque way that people should have trouble typing
+// directly.  The idea is that DEFINE puts the flag in the weird
+// namespace, and DECLARE imports the flag from there into the current
+// namespace.  The net result is to force people to use DECLARE to get
+// access to a flag, rather than saying "extern GFLAGS_DLL_DECL bool FLAGS_whatever;"
+// or some such instead.  We want this so we can put extra
+// functionality (like sanity-checking) in DECLARE if we want, and
+// make sure it is picked up everywhere.
+//
+// We also put the type of the variable in the namespace, so that
+// people can't DECLARE_int32 something that they DEFINE_bool'd
+// elsewhere.
+
+class GFLAGS_DLL_DECL FlagRegisterer {
+ public:
+  // We instantiate this template ctor for all supported types,
+  // so it is possible to place implementation of the FlagRegisterer ctor in
+  // .cc file.
+  // Calling this constructor with unsupported type will produce linker error.
+  template <typename FlagType>
+  FlagRegisterer(const char* name,
+                 const char* help, const char* filename,
+                 FlagType* current_storage, FlagType* defvalue_storage);
+};
+
+// Force compiler to not generate code for the given template specialization.
+#if defined(_MSC_VER) && _MSC_VER < 1800 // Visual Studio 2013 version 12.0
+  #define GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(type)
+#else
+  #define GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(type)                  \
+    extern template GFLAGS_DLL_DECL FlagRegisterer::FlagRegisterer(  \
+        const char* name, const char* help, const char* filename,    \
+        type* current_storage, type* defvalue_storage)
+#endif
+
+// Do this for all supported flag types.
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(bool);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(int32);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(uint32);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(int64);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(uint64);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(double);
+GFLAGS_DECLARE_FLAG_REGISTERER_CTOR(std::string);
+
+#undef GFLAGS_DECLARE_FLAG_REGISTERER_CTOR
+
+// If your application #defines STRIP_FLAG_HELP to a non-zero value
+// before #including this file, we remove the help message from the
+// binary file. This can reduce the size of the resulting binary
+// somewhat, and may also be useful for security reasons.
+
+extern GFLAGS_DLL_DECL const char kStrippedFlagHelp[];
+
+
+} // namespace GFLAGS_NAMESPACE
+
+
+#ifndef SWIG  // In swig, ignore the main flag declarations
+
+#if defined(STRIP_FLAG_HELP) && STRIP_FLAG_HELP > 0
+// Need this construct to avoid the 'defined but not used' warning.
+#define MAYBE_STRIPPED_HELP(txt) \
+   (false ? (txt) : GFLAGS_NAMESPACE::kStrippedFlagHelp)
+#else
+#define MAYBE_STRIPPED_HELP(txt) txt
+#endif
+
+// Each command-line flag has two variables associated with it: one
+// with the current value, and one with the default value.  However,
+// we have a third variable, which is where value is assigned; it's a
+// constant.  This guarantees that FLAG_##value is initialized at
+// static initialization time (e.g. before program-start) rather than
+// than global construction time (which is after program-start but
+// before main), at least when 'value' is a compile-time constant.  We
+// use a small trick for the "default value" variable, and call it
+// FLAGS_no<name>.  This serves the second purpose of assuring a
+// compile error if someone tries to define a flag named no<name>
+// which is illegal (--foo and --nofoo both affect the "foo" flag).
+#define DEFINE_VARIABLE(type, shorttype, name, value, help)             \
+  namespace fL##shorttype {                                             \
+    static const type FLAGS_nono##name = value;                         \
+    /* We always want to export defined variables, dll or no */         \
+    GFLAGS_DLL_DEFINE_FLAG type FLAGS_##name = FLAGS_nono##name;        \
+    static type FLAGS_no##name = FLAGS_nono##name;                      \
+    static GFLAGS_NAMESPACE::FlagRegisterer o_##name(                   \
+      #name, MAYBE_STRIPPED_HELP(help), __FILE__,                       \
+      &FLAGS_##name, &FLAGS_no##name);                                  \
+  }                                                                     \
+  using fL##shorttype::FLAGS_##name
+
+// For DEFINE_bool, we want to do the extra check that the passed-in
+// value is actually a bool, and not a string or something that can be
+// coerced to a bool.  These declarations (no definition needed!) will
+// help us do that, and never evaluate From, which is important.
+// We'll use 'sizeof(IsBool(val))' to distinguish. This code requires
+// that the compiler have different sizes for bool & double. Since
+// this is not guaranteed by the standard, we check it with a
+// COMPILE_ASSERT.
+namespace fLB {
+struct CompileAssert {};
+typedef CompileAssert expected_sizeof_double_neq_sizeof_bool[
+                      (sizeof(double) != sizeof(bool)) ? 1 : -1];
+template<typename From> double GFLAGS_DLL_DECL IsBoolFlag(const From& from);
+GFLAGS_DLL_DECL bool IsBoolFlag(bool from);
+}  // namespace fLB
+
+// Here are the actual DEFINE_*-macros. The respective DECLARE_*-macros
+// are in a separate include, gflags_declare.h, for reducing
+// the physical transitive size for DECLARE use.
+#define DEFINE_bool(name, val, txt)                                     \
+  namespace fLB {                                                       \
+    typedef ::fLB::CompileAssert FLAG_##name##_value_is_not_a_bool[     \
+            (sizeof(::fLB::IsBoolFlag(val)) != sizeof(double))? 1: -1]; \
+  }                                                                     \
+  DEFINE_VARIABLE(bool, B, name, val, txt)
+
+#define DEFINE_int32(name, val, txt) \
+   DEFINE_VARIABLE(GFLAGS_NAMESPACE::int32, I, \
+                   name, val, txt)
+
+#define DEFINE_uint32(name,val, txt) \
+   DEFINE_VARIABLE(GFLAGS_NAMESPACE::uint32, U, \
+                   name, val, txt)
+
+#define DEFINE_int64(name, val, txt) \
+   DEFINE_VARIABLE(GFLAGS_NAMESPACE::int64, I64, \
+                   name, val, txt)
+
+#define DEFINE_uint64(name,val, txt) \
+   DEFINE_VARIABLE(GFLAGS_NAMESPACE::uint64, U64, \
+                   name, val, txt)
+
+#define DEFINE_double(name, val, txt) \
+   DEFINE_VARIABLE(double, D, name, val, txt)
+
+// Strings are trickier, because they're not a POD, so we can't
+// construct them at static-initialization time (instead they get
+// constructed at global-constructor time, which is much later).  To
+// try to avoid crashes in that case, we use a char buffer to store
+// the string, which we can static-initialize, and then placement-new
+// into it later.  It's not perfect, but the best we can do.
+
+namespace fLS {
+
+inline clstring* dont_pass0toDEFINE_string(char *stringspot,
+                                           const char *value) {
+  return new(stringspot) clstring(value);
+}
+inline clstring* dont_pass0toDEFINE_string(char *stringspot,
+                                           const clstring &value) {
+  return new(stringspot) clstring(value);
+}
+inline clstring* dont_pass0toDEFINE_string(char *stringspot,
+                                           int value);
+
+// Auxiliary class used to explicitly call destructor of string objects
+// allocated using placement new during static program deinitialization.
+// The destructor MUST be an inline function such that the explicit
+// destruction occurs in the same compilation unit as the placement new.
+class StringFlagDestructor {
+  void *current_storage_;
+  void *defvalue_storage_;
+
+public: 
+
+  StringFlagDestructor(void *current, void *defvalue)
+  : current_storage_(current), defvalue_storage_(defvalue) {}
+
+  ~StringFlagDestructor() {
+    reinterpret_cast<clstring*>(current_storage_ )->~clstring();
+    reinterpret_cast<clstring*>(defvalue_storage_)->~clstring();
+  }
+};
+
+}  // namespace fLS
+
+// We need to define a var named FLAGS_no##name so people don't define
+// --string and --nostring.  And we need a temporary place to put val
+// so we don't have to evaluate it twice.  Two great needs that go
+// great together!
+// The weird 'using' + 'extern' inside the fLS namespace is to work around
+// an unknown compiler bug/issue with the gcc 4.2.1 on SUSE 10.  See
+//    http://code.google.com/p/google-gflags/issues/detail?id=20
+#define DEFINE_string(name, val, txt)                                       \
+  namespace fLS {                                                           \
+    using ::fLS::clstring;                                                  \
+    using ::fLS::StringFlagDestructor;                                      \
+    static union { void* align; char s[sizeof(clstring)]; } s_##name[2];    \
+    clstring* const FLAGS_no##name = ::fLS::                                \
+                                   dont_pass0toDEFINE_string(s_##name[0].s, \
+                                                             val);          \
+    static GFLAGS_NAMESPACE::FlagRegisterer o_##name(                       \
+        #name, MAYBE_STRIPPED_HELP(txt), __FILE__,                          \
+        FLAGS_no##name, new (s_##name[1].s) clstring(*FLAGS_no##name));     \
+    static StringFlagDestructor d_##name(s_##name[0].s, s_##name[1].s);     \
+    extern GFLAGS_DLL_DEFINE_FLAG clstring& FLAGS_##name;                   \
+    using fLS::FLAGS_##name;                                                \
+    clstring& FLAGS_##name = *FLAGS_no##name;                               \
+  }                                                                         \
+  using fLS::FLAGS_##name
+
+#endif  // SWIG
+
+
+// Import gflags library symbols into alternative/deprecated namespace(s)
+#include "gflags_gflags.h"
+
+
+#endif  // GFLAGS_GFLAGS_H_
--- a/engines/OpenVINOEngine/include/gflags/gflags_completions.h
+++ b/engines/OpenVINOEngine/include/gflags/gflags_completions.h
@@ -0,0 +1,121 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+
+//
+// Implement helpful bash-style command line flag completions
+//
+// ** Functional API:
+// HandleCommandLineCompletions() should be called early during
+// program startup, but after command line flag code has been
+// initialized, such as the beginning of HandleCommandLineHelpFlags().
+// It checks the value of the flag --tab_completion_word.  If this
+// flag is empty, nothing happens here.  If it contains a string,
+// however, then HandleCommandLineCompletions() will hijack the
+// process, attempting to identify the intention behind this
+// completion.  Regardless of the outcome of this deduction, the
+// process will be terminated, similar to --helpshort flag
+// handling.
+//
+// ** Overview of Bash completions:
+// Bash can be told to programatically determine completions for the
+// current 'cursor word'.  It does this by (in this case) invoking a
+// command with some additional arguments identifying the command
+// being executed, the word being completed, and the previous word
+// (if any).  Bash then expects a sequence of output lines to be
+// printed to stdout.  If these lines all contain a common prefix
+// longer than the cursor word, bash will replace the cursor word
+// with that common prefix, and display nothing.  If there isn't such
+// a common prefix, bash will display the lines in pages using 'more'.
+//
+// ** Strategy taken for command line completions:
+// If we can deduce either the exact flag intended, or a common flag
+// prefix, we'll output exactly that.  Otherwise, if information
+// must be displayed to the user, we'll take the opportunity to add
+// some helpful information beyond just the flag name (specifically,
+// we'll include the default flag value and as much of the flag's
+// description as can fit on a single terminal line width, as specified
+// by the flag --tab_completion_columns).  Furthermore, we'll try to
+// make bash order the output such that the most useful or relevent
+// flags are the most likely to be shown at the top.
+//
+// ** Additional features:
+// To assist in finding that one really useful flag, substring matching
+// was implemented.  Before pressing a <TAB> to get completion for the
+// current word, you can append one or more '?' to the flag to do
+// substring matching.  Here's the semantics:
+//   --foo<TAB>     Show me all flags with names prefixed by 'foo'
+//   --foo?<TAB>    Show me all flags with 'foo' somewhere in the name
+//   --foo??<TAB>   Same as prior case, but also search in module
+//                  definition path for 'foo'
+//   --foo???<TAB>  Same as prior case, but also search in flag
+//                  descriptions for 'foo'
+// Finally, we'll trim the output to a relatively small number of
+// flags to keep bash quiet about the verbosity of output.  If one
+// really wanted to see all possible matches, appending a '+' to the
+// search word will force the exhaustive list of matches to be printed.
+//
+// ** How to have bash accept completions from a binary:
+// Bash requires that it be informed about each command that programmatic
+// completion should be enabled for.  Example addition to a .bashrc
+// file would be (your path to gflags_completions.sh file may differ):
+
+/*
+$ complete -o bashdefault -o default -o nospace -C                            \
+ '/home/build/eng/bash/bash_completions.sh --tab_completion_columns $COLUMNS' \
+  time  env  binary_name  another_binary  [...]
+*/
+
+// This would allow the following to work:
+//   $ /path/to/binary_name --vmodule<TAB>
+// Or:
+//   $ ./bin/path/another_binary --gfs_u<TAB>
+// (etc)
+//
+// Sadly, it appears that bash gives no easy way to force this behavior for
+// all commands.  That's where the "time" in the above example comes in.
+// If you haven't specifically added a command to the list of completion
+// supported commands, you can still get completions by prefixing the
+// entire command with "env".
+//   $ env /some/brand/new/binary --vmod<TAB>
+// Assuming that "binary" is a newly compiled binary, this should still
+// produce the expected completion output.
+
+
+#ifndef GFLAGS_COMPLETIONS_H_
+#define GFLAGS_COMPLETIONS_H_
+
+namespace google {
+
+extern void HandleCommandLineCompletions(void);
+
+}
+
+#endif  // GFLAGS_COMPLETIONS_H_
--- a/engines/OpenVINOEngine/include/gflags/gflags_declare.h
+++ b/engines/OpenVINOEngine/include/gflags/gflags_declare.h
@@ -0,0 +1,156 @@
+// Copyright (c) 1999, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// Revamped and reorganized by Craig Silverstein
+//
+// This is the file that should be included by any file which declares
+// command line flag.
+
+#ifndef GFLAGS_DECLARE_H_
+#define GFLAGS_DECLARE_H_
+
+
+// ---------------------------------------------------------------------------
+// Namespace of gflags library symbols.
+#define GFLAGS_NAMESPACE google
+
+// ---------------------------------------------------------------------------
+// Windows DLL import/export.
+
+// Whether gflags library is a DLL.
+//
+// Set to 1 by default when the shared gflags library was built on Windows.
+// Must be overwritten when this header file is used with the optionally also
+// built static library instead; set by CMake's INTERFACE_COMPILE_DEFINITIONS.
+#ifndef GFLAGS_IS_A_DLL
+#  define GFLAGS_IS_A_DLL 1
+#endif
+
+// We always want to import the symbols of the gflags library.
+#ifndef GFLAGS_DLL_DECL
+#  if GFLAGS_IS_A_DLL && defined(_MSC_VER)
+#    define GFLAGS_DLL_DECL __declspec(dllimport)
+#  elif defined(__GNUC__) && __GNUC__ >= 4
+#    define GFLAGS_DLL_DECL __attribute__((visibility("default")))
+#  else
+#    define GFLAGS_DLL_DECL
+#  endif
+#endif
+
+// We always want to import variables declared in user code.
+#ifndef GFLAGS_DLL_DECLARE_FLAG
+#  if GFLAGS_IS_A_DLL && defined(_MSC_VER)
+#    define GFLAGS_DLL_DECLARE_FLAG __declspec(dllimport)
+#  elif defined(__GNUC__) && __GNUC__ >= 4
+#    define GFLAGS_DLL_DECLARE_FLAG __attribute__((visibility("default")))
+#  else
+#    define GFLAGS_DLL_DECLARE_FLAG
+#  endif
+#endif
+
+// ---------------------------------------------------------------------------
+// Flag types
+#include <string>
+#if 1
+#  include <stdint.h>                   // the normal place uint32_t is defined
+#elif 1
+#  include <sys/types.h>                // the normal place u_int32_t is defined
+#elif 1
+#  include <inttypes.h>                 // a third place for uint32_t or u_int32_t
+#endif
+
+namespace GFLAGS_NAMESPACE {
+
+#if 0 // C99
+typedef int32_t          int32;
+typedef uint32_t         uint32;
+typedef int64_t          int64;
+typedef uint64_t         uint64;
+#elif 0 // BSD
+typedef int32_t          int32;
+typedef u_int32_t        uint32;
+typedef int64_t          int64;
+typedef u_int64_t        uint64;
+#elif 1 // Windows
+typedef __int32          int32;
+typedef unsigned __int32 uint32;
+typedef __int64          int64;
+typedef unsigned __int64 uint64;
+#else
+#  error Do not know how to define a 32-bit integer quantity on your system
+#endif
+
+} // namespace GFLAGS_NAMESPACE
+
+
+namespace fLS {
+
+// The meaning of "string" might be different between now and when the
+// macros below get invoked (e.g., if someone is experimenting with
+// other string implementations that get defined after this file is
+// included).  Save the current meaning now and use it in the macros.
+typedef std::string clstring;
+
+} // namespace fLS
+
+
+#define DECLARE_VARIABLE(type, shorttype, name) \
+  /* We always want to import declared variables, dll or no */ \
+  namespace fL##shorttype { extern GFLAGS_DLL_DECLARE_FLAG type FLAGS_##name; } \
+  using fL##shorttype::FLAGS_##name
+
+#define DECLARE_bool(name) \
+  DECLARE_VARIABLE(bool, B, name)
+
+#define DECLARE_int32(name) \
+  DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int32, I, name)
+
+#define DECLARE_uint32(name) \
+  DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint32, U, name)
+
+#define DECLARE_int64(name) \
+  DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int64, I64, name)
+
+#define DECLARE_uint64(name) \
+  DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint64, U64, name)
+
+#define DECLARE_double(name) \
+  DECLARE_VARIABLE(double, D, name)
+
+#define DECLARE_string(name) \
+  /* We always want to import declared variables, dll or no */ \
+  namespace fLS { \
+  extern GFLAGS_DLL_DECLARE_FLAG ::fLS::clstring& FLAGS_##name; \
+  } \
+  using fLS::FLAGS_##name
+
+
+#endif  // GFLAGS_DECLARE_H_
--- a/engines/OpenVINOEngine/include/gflags/gflags_gflags.h
+++ b/engines/OpenVINOEngine/include/gflags/gflags_gflags.h
@@ -0,0 +1,102 @@
+// Copyright (c) 2014, Andreas Schuh
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// -----------------------------------------------------------------------------
+// Imports the gflags library symbols into an alternative/deprecated namespace.
+
+#ifndef GFLAGS_GFLAGS_H_
+#  error The internal header gflags_gflags.h may only be included by gflags.h
+#endif
+
+#ifndef GFLAGS_NS_GFLAGS_H_
+#define GFLAGS_NS_GFLAGS_H_
+
+
+namespace gflags {
+
+
+using GFLAGS_NAMESPACE::int32;
+using GFLAGS_NAMESPACE::uint32;
+using GFLAGS_NAMESPACE::int64;
+using GFLAGS_NAMESPACE::uint64;
+
+using GFLAGS_NAMESPACE::RegisterFlagValidator;
+using GFLAGS_NAMESPACE::CommandLineFlagInfo;
+using GFLAGS_NAMESPACE::GetAllFlags;
+using GFLAGS_NAMESPACE::ShowUsageWithFlags;
+using GFLAGS_NAMESPACE::ShowUsageWithFlagsRestrict;
+using GFLAGS_NAMESPACE::DescribeOneFlag;
+using GFLAGS_NAMESPACE::SetArgv;
+using GFLAGS_NAMESPACE::GetArgvs;
+using GFLAGS_NAMESPACE::GetArgv;
+using GFLAGS_NAMESPACE::GetArgv0;
+using GFLAGS_NAMESPACE::GetArgvSum;
+using GFLAGS_NAMESPACE::ProgramInvocationName;
+using GFLAGS_NAMESPACE::ProgramInvocationShortName;
+using GFLAGS_NAMESPACE::ProgramUsage;
+using GFLAGS_NAMESPACE::VersionString;
+using GFLAGS_NAMESPACE::GetCommandLineOption;
+using GFLAGS_NAMESPACE::GetCommandLineFlagInfo;
+using GFLAGS_NAMESPACE::GetCommandLineFlagInfoOrDie;
+using GFLAGS_NAMESPACE::FlagSettingMode;
+using GFLAGS_NAMESPACE::SET_FLAGS_VALUE;
+using GFLAGS_NAMESPACE::SET_FLAG_IF_DEFAULT;
+using GFLAGS_NAMESPACE::SET_FLAGS_DEFAULT;
+using GFLAGS_NAMESPACE::SetCommandLineOption;
+using GFLAGS_NAMESPACE::SetCommandLineOptionWithMode;
+using GFLAGS_NAMESPACE::FlagSaver;
+using GFLAGS_NAMESPACE::CommandlineFlagsIntoString;
+using GFLAGS_NAMESPACE::ReadFlagsFromString;
+using GFLAGS_NAMESPACE::AppendFlagsIntoFile;
+using GFLAGS_NAMESPACE::ReadFromFlagsFile;
+using GFLAGS_NAMESPACE::BoolFromEnv;
+using GFLAGS_NAMESPACE::Int32FromEnv;
+using GFLAGS_NAMESPACE::Uint32FromEnv;
+using GFLAGS_NAMESPACE::Int64FromEnv;
+using GFLAGS_NAMESPACE::Uint64FromEnv;
+using GFLAGS_NAMESPACE::DoubleFromEnv;
+using GFLAGS_NAMESPACE::StringFromEnv;
+using GFLAGS_NAMESPACE::SetUsageMessage;
+using GFLAGS_NAMESPACE::SetVersionString;
+using GFLAGS_NAMESPACE::ParseCommandLineNonHelpFlags;
+using GFLAGS_NAMESPACE::HandleCommandLineHelpFlags;
+using GFLAGS_NAMESPACE::AllowCommandLineReparsing;
+using GFLAGS_NAMESPACE::ReparseCommandLineNonHelpFlags;
+using GFLAGS_NAMESPACE::ShutDownCommandLineFlags;
+using GFLAGS_NAMESPACE::FlagRegisterer;
+
+#ifndef SWIG
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+#endif
+
+
+} // namespace gflags
+
+
+#endif  // GFLAGS_NS_GFLAGS_H_
--- a/engines/OpenVINOEngine/include/models/associative_embedding_decoder.h
+++ b/engines/OpenVINOEngine/include/models/associative_embedding_decoder.h
@@ -0,0 +1,94 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+struct Peak {
+    explicit Peak(const cv::Point2f& keypoint = cv::Point2f(-1, -1), const float score = 0.0f, const float tag = 0.0f)
+        : keypoint(keypoint),
+          score(score),
+          tag(tag) {}
+
+    cv::Point2f keypoint;
+    float score;
+    float tag;
+};
+
+class Pose {
+public:
+    explicit Pose(size_t numJoints) : peaks(numJoints) {}
+
+    void add(size_t index, Peak peak) {
+        peaks[index] = peak;
+        sum += peak.score;
+        poseTag = poseTag * static_cast<float>(validPointsNum) + peak.tag;
+        poseCenter = poseCenter * static_cast<float>(validPointsNum) + peak.keypoint;
+        validPointsNum += 1;
+        poseTag = poseTag / static_cast<float>(validPointsNum);
+        poseCenter = poseCenter / static_cast<float>(validPointsNum);
+    }
+
+    float getPoseTag() const {
+        return poseTag;
+    }
+
+    float getMeanScore() const {
+        return sum / static_cast<float>(size());
+    }
+
+    Peak& getPeak(size_t index) {
+        return peaks[index];
+    }
+
+    cv::Point2f& getPoseCenter() {
+        return poseCenter;
+    }
+
+    size_t size() const {
+        return peaks.size();
+    }
+
+private:
+    std::vector<Peak> peaks;
+    cv::Point2f poseCenter = cv::Point2f(0.f, 0.f);
+    int validPointsNum = 0;
+    float poseTag = 0;
+    float sum = 0;
+};
+
+void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
+               const std::vector<cv::Mat>& aembdsMaps,
+               std::vector<std::vector<Peak>>& allPeaks,
+               size_t jointId,
+               size_t maxNumPeople,
+               float detectionThreshold);
+
+std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
+                             size_t maxNumPeople,
+                             size_t numJoints,
+                             float tagThreshold);
+
+void adjustAndRefine(std::vector<Pose>& allPoses,
+                     const std::vector<cv::Mat>& heatMaps,
+                     const std::vector<cv::Mat>& aembdsMaps,
+                     int poseId,
+                     float delta);
--- a/engines/OpenVINOEngine/include/models/classification_model.h
+++ b/engines/OpenVINOEngine/include/models/classification_model.h
@@ -0,0 +1,57 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+namespace ov {
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ClassificationModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load.
+    /// @param nTop - number of top results.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+    /// @param labels - array of labels for every class.
+    /// @param layout - model input layout
+    ClassificationModel(const std::string& modelFileName,
+                        size_t nTop,
+                        bool useAutoResize,
+                        const std::vector<std::string>& labels,
+                        const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+    static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+protected:
+    size_t nTop;
+    std::vector<std::string> labels;
+
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model.h
+++ b/engines/OpenVINOEngine/include/models/detection_model.h
@@ -0,0 +1,52 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+class DetectionModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    DetectionModel();
+    DetectionModel(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   bool useAutoResize,
+                   const std::vector<std::string>& labels,
+                   const std::string& layout = "");
+
+    static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+protected:
+    float confidenceThreshold;
+    std::vector<std::string> labels;
+
+    std::string getLabelName(size_t labelID) {
+        return labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID);
+    }
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_centernet.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_centernet.h
@@ -0,0 +1,59 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelCenterNet : public DetectionModel {
+public:
+    struct BBox {
+        float left;
+        float top;
+        float right;
+        float bottom;
+
+        float getWidth() const {
+            return (right - left) + 1.0f;
+        }
+        float getHeight() const {
+            return (bottom - top) + 1.0f;
+        }
+    };
+    static const int INIT_VECTOR_SIZE = 200;
+
+    ModelCenterNet(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   const std::vector<std::string>& labels = std::vector<std::string>(),
+                   const std::string& layout = "");
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_faceboxes.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_faceboxes.h
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelFaceBoxes : public DetectionModel {
+public:
+    static const int INIT_VECTOR_SIZE = 200;
+
+    ModelFaceBoxes(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   bool useAutoResize,
+                   float boxIOUThreshold,
+                   const std::string& layout = "");
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    size_t maxProposalsCount;
+    const float boxIOUThreshold;
+    const std::vector<float> variance;
+    const std::vector<int> steps;
+    const std::vector<std::vector<int>> minSizes;
+    std::vector<Anchor> anchors;
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps);
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_retinaface.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_retinaface.h
@@ -0,0 +1,74 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelRetinaFace : public DetectionModel {
+public:
+    static const int LANDMARKS_NUM = 5;
+    static const int INIT_VECTOR_SIZE = 200;
+    /// Loads model and performs required initialization
+    /// @param model_name name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range.
+    /// @param layout - model input layout
+    ModelRetinaFace(const std::string& model_name,
+                    float confidenceThreshold,
+                    bool useAutoResize,
+                    float boxIOUThreshold,
+                    const std::string& layout = "");
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    struct AnchorCfgLine {
+        int stride;
+        std::vector<int> scales;
+        int baseSize;
+        std::vector<int> ratios;
+    };
+
+    bool shouldDetectMasks;
+    bool shouldDetectLandmarks;
+    const float boxIOUThreshold;
+    const float maskThreshold;
+    float landmarkStd;
+
+    enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MASKSCORES, OUT_MAX };
+
+    std::vector<std::string> separateOutputsNames[OUT_MAX];
+    const std::vector<AnchorCfgLine> anchorCfg;
+    std::map<int, std::vector<Anchor>> anchorsFpn;
+    std::vector<std::vector<Anchor>> anchors;
+
+    void generateAnchorsFpn();
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_retinaface_pt.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_retinaface_pt.h
@@ -0,0 +1,81 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/types.hpp>
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+class Tensor;
+}  // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelRetinaFacePT : public DetectionModel {
+public:
+    struct Box {
+        float cX;
+        float cY;
+        float width;
+        float height;
+    };
+
+    /// Loads model and performs required initialization
+    /// @param model_name name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range.
+    /// @param layout - model input layout
+    ModelRetinaFacePT(const std::string& modelFileName,
+                      float confidenceThreshold,
+                      bool useAutoResize,
+                      float boxIOUThreshold,
+                      const std::string& layout = "");
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    size_t landmarksNum;
+    const float boxIOUThreshold;
+    float variance[2] = {0.1f, 0.2f};
+
+    enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MAX };
+
+    std::vector<ModelRetinaFacePT::Box> priors;
+
+    std::vector<size_t> filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold);
+    std::vector<float> getFilteredScores(const ov::Tensor& scoresTensor, const std::vector<size_t>& indicies);
+    std::vector<cv::Point2f> getFilteredLandmarks(const ov::Tensor& landmarksTensor,
+                                                  const std::vector<size_t>& indicies,
+                                                  int imgWidth,
+                                                  int imgHeight);
+    std::vector<ModelRetinaFacePT::Box> generatePriorData();
+    std::vector<Anchor> getFilteredProposals(const ov::Tensor& boxesTensor,
+                                             const std::vector<size_t>& indicies,
+                                             int imgWidth,
+                                             int imgHeight);
+
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_ssd.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_ssd.h
@@ -0,0 +1,73 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelSSD : public DetectionModel {
+public:
+    ModelSSD();
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    ModelSSD(const std::string& modelFileName,
+             float confidenceThreshold,
+             bool useAutoResize,
+             const std::vector<std::string>& labels = std::vector<std::string>(),
+             const std::string& layout = "");
+    void Initialise(const std::string& modelFileName,
+        float _confidenceThreshold,
+        bool useAutoResize,
+        const std::vector<std::string>& _labels = std::vector<std::string>(),
+        const std::string& layout = "") 
+    {
+        ImageModel::Initalise(modelFileName, useAutoResize, layout),
+            confidenceThreshold = confidenceThreshold;
+            labels = _labels; 
+    }
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    std::unique_ptr<ResultBase> postprocessSingleOutput(InferenceResult& infResult);
+    std::unique_ptr<ResultBase> postprocessMultipleOutputs(InferenceResult& infResult);
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void prepareSingleOutput(std::shared_ptr<ov::Model>& model);
+    void prepareMultipleOutputs(std::shared_ptr<ov::Model>& model);
+    size_t objectSize = 0;
+    size_t detectionsNumId = 0;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_yolo.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_yolo.h
@@ -0,0 +1,107 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/op/region_yolo.hpp>
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+struct DetectedObject;
+struct InferenceResult;
+struct ResultBase;
+
+class ModelYolo : public DetectionModel {
+protected:
+    class Region {
+    public:
+        int num = 0;
+        size_t classes = 0;
+        int coords = 0;
+        std::vector<float> anchors;
+        size_t outputWidth = 0;
+        size_t outputHeight = 0;
+
+        Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo);
+        Region(size_t classes,
+               int coords,
+               const std::vector<float>& anchors,
+               const std::vector<int64_t>& masks,
+               size_t outputWidth,
+               size_t outputHeight);
+    };
+
+public:
+    enum YoloVersion { YOLO_V1V2, YOLO_V3, YOLO_V4, YOLO_V4_TINY, YOLOF };
+
+    /// Constructor.
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+    /// @param useAdvancedPostprocessing - if true, an advanced algorithm for filtering/postprocessing will be used
+    /// (with better processing of multiple crossing objects). Otherwise, classic algorithm will be used.
+    /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
+    /// during postprocessing (only one of them should stay). The default value is 0.5
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param anchors - vector of anchors coordinates. Required for YOLOv4, for other versions it may be omitted.
+    /// @param masks - vector of masks values. Required for YOLOv4, for other versions it may be omitted.
+    /// @param layout - model input layout
+    ModelYolo(const std::string& modelFileName,
+              float confidenceThreshold,
+              bool useAutoResize,
+              bool useAdvancedPostprocessing = true,
+              float boxIOUThreshold = 0.5,
+              const std::vector<std::string>& labels = std::vector<std::string>(),
+              const std::vector<float>& anchors = std::vector<float>(),
+              const std::vector<int64_t>& masks = std::vector<int64_t>(),
+              const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    void parseYOLOOutput(const std::string& output_name,
+                         const ov::Tensor& tensor,
+                         const unsigned long resized_im_h,
+                         const unsigned long resized_im_w,
+                         const unsigned long original_im_h,
+                         const unsigned long original_im_w,
+                         std::vector<DetectedObject>& objects);
+
+    static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry);
+    static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2);
+
+    std::map<std::string, Region> regions;
+    double boxIOUThreshold;
+    bool useAdvancedPostprocessing;
+    bool isObjConf = 1;
+    YoloVersion yoloVersion;
+    const std::vector<float> presetAnchors;
+    const std::vector<int64_t> presetMasks;
+    ov::Layout yoloRegionLayout = "NCHW";
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_yolov3_onnx.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_yolov3_onnx.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloV3ONNX: public DetectionModel {
+public:
+    /// Constructor.
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    ModelYoloV3ONNX(const std::string& modelFileName,
+                    float confidenceThreshold,
+                    const std::vector<std::string>& labels = std::vector<std::string>(),
+                    const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    std::string boxesOutputName;
+    std::string scoresOutputName;
+    std::string indicesOutputName;
+    static const int numberOfClasses = 80;
+};
--- a/engines/OpenVINOEngine/include/models/detection_model_yolox.h
+++ b/engines/OpenVINOEngine/include/models/detection_model_yolox.h
@@ -0,0 +1,54 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloX: public DetectionModel {
+public:
+    /// Constructor.
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
+    /// during postprocessing (only one of them should stay). The default value is 0.5
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    ModelYoloX(const std::string& modelFileName,
+                    float confidenceThreshold,
+                    float boxIOUThreshold = 0.5,
+                    const std::vector<std::string>& labels = std::vector<std::string>(),
+                    const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void setStridesGrids();
+
+    float boxIOUThreshold;
+    std::vector<std::pair<size_t, size_t>> grids;
+    std::vector<size_t> expandedStrides;
+    static const size_t numberOfClasses = 80;
+};
--- a/engines/OpenVINOEngine/include/models/hpe_model_associative_embedding.h
+++ b/engines/OpenVINOEngine/include/models/hpe_model_associative_embedding.h
@@ -0,0 +1,89 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+#include <utils/image_utils.h>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+class Shape;
+}  // namespace ov
+struct HumanPose;
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class HpeAssociativeEmbedding : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param aspectRatio - the ratio of input width to its height.
+    /// @param targetSize - the length of a short image side used for model reshaping.
+    /// @param confidenceThreshold - threshold to eliminate low-confidence poses.
+    /// Any pose with confidence lower than this threshold will be ignored.
+    /// @param layout - model input layout
+    HpeAssociativeEmbedding(const std::string& modelFileName,
+                            double aspectRatio,
+                            int targetSize,
+                            float confidenceThreshold,
+                            const std::string& layout = "",
+                            float delta = 0.0,
+                            RESIZE_MODE resizeMode = RESIZE_KEEP_ASPECT);
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    cv::Size inputLayerSize;
+    double aspectRatio;
+    int targetSize;
+    float confidenceThreshold;
+    float delta;
+
+    std::string embeddingsTensorName;
+    std::string heatmapsTensorName;
+    std::string nmsHeatmapsTensorName;
+
+    static const int numJoints = 17;
+    static const int stride = 32;
+    static const int maxNumPeople = 30;
+    static const cv::Vec3f meanPixel;
+    static const float detectionThreshold;
+    static const float tagThreshold;
+
+    void changeInputSize(std::shared_ptr<ov::Model>& model);
+
+    std::string findTensorByName(const std::string& tensorName, const std::vector<std::string>& outputsNames);
+
+    std::vector<cv::Mat> split(float* data, const ov::Shape& shape);
+
+    std::vector<HumanPose> extractPoses(std::vector<cv::Mat>& heatMaps,
+                                        const std::vector<cv::Mat>& aembdsMaps,
+                                        const std::vector<cv::Mat>& nmsHeatMaps) const;
+};
--- a/engines/OpenVINOEngine/include/models/hpe_model_openpose.h
+++ b/engines/OpenVINOEngine/include/models/hpe_model_openpose.h
@@ -0,0 +1,78 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct HumanPose;
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class HPEOpenPose : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param aspectRatio - the ratio of input width to its height.
+    /// @param targetSize - the height used for model reshaping.
+    /// @param confidenceThreshold - threshold to eliminate low-confidence keypoints.
+    /// @param layout - model input layout
+    HPEOpenPose(const std::string& modelFileName,
+                double aspectRatio,
+                int targetSize,
+                float confidenceThreshold,
+                const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+    static const size_t keypointsNumber = 18;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    static const int minJointsNumber = 3;
+    static const int stride = 8;
+    static const int upsampleRatio = 4;
+    static const cv::Vec3f meanPixel;
+    static const float minPeaksDistance;
+    static const float midPointsScoreThreshold;
+    static const float foundMidPointsRatioThreshold;
+    static const float minSubsetScore;
+    cv::Size inputLayerSize;
+    double aspectRatio;
+    int targetSize;
+    float confidenceThreshold;
+
+    std::vector<HumanPose> extractPoses(const std::vector<cv::Mat>& heatMaps, const std::vector<cv::Mat>& pafs) const;
+    void resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const;
+    void UpdateAspectRatio(double aRatio) { aspectRatio = aRatio; }
+    void changeInputSize(std::shared_ptr<ov::Model>& model);
+};
--- a/engines/OpenVINOEngine/include/models/image_model.h
+++ b/engines/OpenVINOEngine/include/models/image_model.h
@@ -0,0 +1,58 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+
+#include "models/model_base.h"
+#include "utils/image_utils.h"
+
+namespace ov {
+class InferRequest;
+}  // namespace ov
+struct InputData;
+struct InternalModelData;
+
+class ImageModel : public ModelBase {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param useAutoResize - if true, image is resized by openvino
+    /// @param layout - model input layout
+    ImageModel();
+    ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    void updateImageSize(const cv::Size& inputImgSize) override {
+        netInputHeight = inputImgSize.height;
+        netInputWidth = inputImgSize.width;
+    }
+
+    void Initalise(const std::string& _modelFileName, bool _useAutoResize, const std::string& _layout = "") {
+		useAutoResize = _useAutoResize;
+        ModelBase::Initilise(modelFileName, _layout);
+    }
+protected:
+    bool useAutoResize;
+
+    size_t netInputHeight = 0;
+    size_t netInputWidth = 0;
+    cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR;
+    RESIZE_MODE resizeMode = RESIZE_FILL;
+};
--- a/engines/OpenVINOEngine/include/models/input_data.h
+++ b/engines/OpenVINOEngine/include/models/input_data.h
@@ -0,0 +1,41 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <opencv2/opencv.hpp>
+
+struct InputData {
+    virtual ~InputData() {}
+
+    template <class T>
+    T& asRef() {
+        return dynamic_cast<T&>(*this);
+    }
+
+    template <class T>
+    const T& asRef() const {
+        return dynamic_cast<const T&>(*this);
+    }
+};
+
+struct ImageInputData : public InputData {
+    cv::Mat inputImage;
+
+    ImageInputData() {}
+    ImageInputData(const cv::Mat& img) {
+        inputImage = img;
+    }
+};
--- a/engines/OpenVINOEngine/include/models/internal_model_data.h
+++ b/engines/OpenVINOEngine/include/models/internal_model_data.h
@@ -0,0 +1,48 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+struct InternalModelData {
+    virtual ~InternalModelData() {}
+
+    template <class T>
+    T& asRef() {
+        return dynamic_cast<T&>(*this);
+    }
+
+    template <class T>
+    const T& asRef() const {
+        return dynamic_cast<const T&>(*this);
+    }
+};
+
+struct InternalImageModelData : public InternalModelData {
+    InternalImageModelData(int width, int height) : inputImgWidth(width), inputImgHeight(height) {}
+
+    int inputImgWidth;
+    int inputImgHeight;
+};
+
+struct InternalScaleData : public InternalImageModelData {
+    InternalScaleData(int width, int height, float scaleX, float scaleY)
+        : InternalImageModelData(width, height),
+          scaleX(scaleX),
+          scaleY(scaleY) {}
+
+    float scaleX;
+    float scaleY;
+};
--- a/engines/OpenVINOEngine/include/models/jpeg_restoration_model.h
+++ b/engines/OpenVINOEngine/include/models/jpeg_restoration_model.h
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb  software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <opencv2/core/types.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+class JPEGRestorationModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param inputImgSize size of image to set model input shape
+    /// @param jpegCompression flag allows to perform compression before the inference
+    /// @param layout - model input layout
+    JPEGRestorationModel(const std::string& modelFileName,
+                         const cv::Size& inputImgSize,
+                         bool jpegCompression,
+                         const std::string& layout = "");
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void changeInputSize(std::shared_ptr<ov::Model>& model);
+
+    static const size_t stride = 8;
+    bool jpegCompression = false;
+};
--- a/engines/OpenVINOEngine/include/models/model_base.h
+++ b/engines/OpenVINOEngine/include/models/model_base.h
@@ -0,0 +1,87 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <openvino/openvino.hpp>
+#include <utils/args_helper.hpp>
+#include <utils/config_factory.h>
+#include <utils/ocv_common.hpp>
+
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelBase {
+public:
+    ModelBase() {};
+    ModelBase(const std::string& modelFileName, const std::string& layout = "")
+        : modelFileName(modelFileName),
+          inputsLayouts(parseLayoutString(layout)) {}
+
+    virtual ~ModelBase() {}
+
+    virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0;
+    // Virtual method to be overridden in derived classes
+    virtual void updateImageSize(const cv::Size& inputImgSize) {
+        // Optionally leave empty or add any base logic here
+    }
+
+    virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core);
+    virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {}
+
+    virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0;
+    void Initilise(const std::string& _modelFileName, const std::string& _layout = "") {
+		modelFileName = _modelFileName;
+		inputsLayouts = parseLayoutString(_layout);
+    }
+    const std::vector<std::string>& getOutputsNames() const {
+        return outputsNames;
+    }
+    const std::vector<std::string>& getInputsNames() const {
+        return inputsNames;
+    }
+
+    std::string getModelFileName() {
+        return modelFileName;
+    }
+
+
+    void setInputsPreprocessing(bool reverseInputChannels,
+                                const std::string& meanValues,
+                                const std::string& scaleValues) {
+        this->inputTransform = InputTransform(reverseInputChannels, meanValues, scaleValues);
+    }
+
+protected:
+    virtual void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) = 0;
+    virtual void setBatch(std::shared_ptr<ov::Model>& model);
+
+    std::shared_ptr<ov::Model> prepareModel(ov::Core& core);
+
+    InputTransform inputTransform = InputTransform();
+    std::vector<std::string> inputsNames;
+    std::vector<std::string> outputsNames;
+    ov::CompiledModel compiledModel;
+    std::string modelFileName;
+    ModelConfig config = {};
+    std::map<std::string, ov::Layout> inputsLayouts;
+    ov::Layout getInputLayout(const ov::Output<ov::Node>& input);
+};
--- a/engines/OpenVINOEngine/include/models/openpose_decoder.h
+++ b/engines/OpenVINOEngine/include/models/openpose_decoder.h
@@ -0,0 +1,62 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+struct HumanPose;
+
+struct Peak {
+    Peak(const int id = -1, const cv::Point2f& pos = cv::Point2f(), const float score = 0.0f);
+
+    int id;
+    cv::Point2f pos;
+    float score;
+};
+
+struct HumanPoseByPeaksIndices {
+    explicit HumanPoseByPeaksIndices(const int keypointsNumber);
+
+    std::vector<int> peaksIndices;
+    int nJoints;
+    float score;
+};
+
+struct TwoJointsConnection {
+    TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score);
+
+    int firstJointIdx;
+    int secondJointIdx;
+    float score;
+};
+
+void findPeaks(const std::vector<cv::Mat>& heatMaps,
+               const float minPeaksDistance,
+               std::vector<std::vector<Peak>>& allPeaks,
+               int heatMapId,
+               float confidenceThreshold);
+
+std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
+                                         const std::vector<cv::Mat>& pafs,
+                                         const size_t keypointsNumber,
+                                         const float midPointsScoreThreshold,
+                                         const float foundMidPointsRatioThreshold,
+                                         const int minJointsNumber,
+                                         const float minSubsetScore);
--- a/engines/OpenVINOEngine/include/models/results.h
+++ b/engines/OpenVINOEngine/include/models/results.h
@@ -0,0 +1,122 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include "internal_model_data.h"
+
+struct MetaData;
+struct ResultBase {
+    ResultBase(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : frameId(frameId),
+          metaData(metaData) {}
+    virtual ~ResultBase() {}
+
+    int64_t frameId;
+
+    std::shared_ptr<MetaData> metaData;
+    bool IsEmpty() {
+        return frameId < 0;
+    }
+
+    template <class T>
+    T& asRef() {
+        return dynamic_cast<T&>(*this);
+    }
+
+    template <class T>
+    const T& asRef() const {
+        return dynamic_cast<const T&>(*this);
+    }
+};
+
+struct InferenceResult : public ResultBase {
+    std::shared_ptr<InternalModelData> internalModelData;
+    std::map<std::string, ov::Tensor> outputsData;
+
+    /// Returns the first output tensor
+    /// This function is a useful addition to direct access to outputs list as many models have only one output
+    /// @returns first output tensor
+    ov::Tensor getFirstOutputTensor() {
+        if (outputsData.empty()) {
+            throw std::out_of_range("Outputs map is empty.");
+        }
+        return outputsData.begin()->second;
+    }
+
+    /// Returns true if object contains no valid data
+    /// @returns true if object contains no valid data
+    bool IsEmpty() {
+        return outputsData.empty();
+    }
+};
+
+struct ClassificationResult : public ResultBase {
+    ClassificationResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : ResultBase(frameId, metaData) {}
+
+    struct Classification {
+        unsigned int id;
+        std::string label;
+        float score;
+
+        Classification(unsigned int id, const std::string& label, float score) : id(id), label(label), score(score) {}
+    };
+
+    std::vector<Classification> topLabels;
+};
+
+struct DetectedObject : public cv::Rect2f {
+    size_t labelID;
+    std::string label;
+    float confidence;
+};
+
+struct DetectionResult : public ResultBase {
+    DetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : ResultBase(frameId, metaData) {}
+    std::vector<DetectedObject> objects;
+};
+
+struct RetinaFaceDetectionResult : public DetectionResult {
+    RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : DetectionResult(frameId, metaData) {}
+    std::vector<cv::Point2f> landmarks;
+};
+
+struct ImageResult : public ResultBase {
+    ImageResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : ResultBase(frameId, metaData) {}
+    cv::Mat resultImage;
+};
+
+struct HumanPose {
+    std::vector<cv::Point2f> keypoints;
+    float score;
+};
+
+struct HumanPoseResult : public ResultBase {
+    HumanPoseResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+        : ResultBase(frameId, metaData) {}
+    std::vector<HumanPose> poses;
+};
--- a/engines/OpenVINOEngine/include/models/segmentation_model.h
+++ b/engines/OpenVINOEngine/include/models/segmentation_model.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb  software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+namespace ov {
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+#pragma once
+class SegmentationModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param useAutoResize - if true, image will be resized by openvino.
+    /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+    /// @param layout - model input layout
+    SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
+
+    static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    int outHeight = 0;
+    int outWidth = 0;
+    int outChannels = 0;
+};
--- a/engines/OpenVINOEngine/include/models/style_transfer_model.h
+++ b/engines/OpenVINOEngine/include/models/style_transfer_model.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb  software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class StyleTransferModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param layout - model input layout
+    StyleTransferModel(const std::string& modelFileName, const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/models/super_resolution_model.h
+++ b/engines/OpenVINOEngine/include/models/super_resolution_model.h
@@ -0,0 +1,59 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb  software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+
+#include <opencv2/core/types.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+}  // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class SuperResolutionModel : public ImageModel {
+public:
+    /// Constructor
+    /// @param modelFileName name of model to load
+    /// @param layout - model input layout
+    SuperResolutionModel(const std::string& modelFileName,
+                         const cv::Size& inputImgSize,
+                         const std::string& layout = "");
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    void changeInputSize(std::shared_ptr<ov::Model>& model, int coeff);
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
+
+class SuperResolutionChannelJoint : public SuperResolutionModel {
+public:
+    using SuperResolutionModel::SuperResolutionModel;
+
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void setBatch(std::shared_ptr<ov::Model>& model) override;
+};
--- a/engines/OpenVINOEngine/include/monitors/cpu_monitor.h
+++ b/engines/OpenVINOEngine/include/monitors/cpu_monitor.h
@@ -0,0 +1,28 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <vector>
+
+class CpuMonitor {
+public:
+    CpuMonitor();
+    ~CpuMonitor();
+    void setHistorySize(std::size_t size);
+    std::size_t getHistorySize() const;
+    void collectData();
+    std::deque<std::vector<double>> getLastHistory() const;
+    std::vector<double> getMeanCpuLoad() const;
+
+private:
+    unsigned samplesNumber;
+    unsigned historySize;
+    std::vector<double> cpuLoadSum;
+    std::deque<std::vector<double>> cpuLoadHistory;
+    class PerformanceCounter;
+    std::unique_ptr<PerformanceCounter> performanceCounter;
+};
--- a/engines/OpenVINOEngine/include/monitors/memory_monitor.h
+++ b/engines/OpenVINOEngine/include/monitors/memory_monitor.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <deque>
+#include <memory>
+
+class MemoryMonitor {
+public:
+    MemoryMonitor();
+    ~MemoryMonitor();
+    void setHistorySize(std::size_t size);
+    std::size_t getHistorySize() const;
+    void collectData();
+    std::deque<std::pair<double, double>> getLastHistory() const;
+    double getMeanMem() const; // in GiB
+    double getMeanSwap() const;
+    double getMaxMem() const;
+    double getMaxSwap() const;
+    double getMemTotal() const;
+    double getMaxMemTotal() const; // a system may have hotpluggable memory
+private:
+    unsigned samplesNumber;
+    std::size_t historySize;
+    double memSum, swapSum;
+    double maxMem, maxSwap;
+    double memTotal;
+    double maxMemTotal;
+    std::deque<std::pair<double, double>> memSwapUsageHistory;
+    class PerformanceCounter;
+    std::unique_ptr<PerformanceCounter> performanceCounter;
+};
--- a/engines/OpenVINOEngine/include/monitors/presenter.h
+++ b/engines/OpenVINOEngine/include/monitors/presenter.h
@@ -0,0 +1,44 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <chrono>
+#include <map>
+#include <ostream>
+#include <set>
+
+#include <opencv2/imgproc.hpp>
+
+#include "cpu_monitor.h"
+#include "memory_monitor.h"
+
+enum class MonitorType{CpuAverage, DistributionCpu, Memory};
+
+class Presenter {
+public:
+    explicit Presenter(std::set<MonitorType> enabledMonitors = {},
+        int yPos = 20,
+        cv::Size graphSize = {150, 60},
+        std::size_t historySize = 20);
+    explicit Presenter(const std::string& keys,
+        int yPos = 20,
+        cv::Size graphSize = {150, 60},
+        std::size_t historySize = 20);
+    void addRemoveMonitor(MonitorType monitor);
+    void handleKey(int key); // handles C, D, M, H keys
+    void drawGraphs(cv::Mat& frame);
+    std::vector<std::string> reportMeans() const;
+
+    const int yPos;
+    const cv::Size graphSize;
+    const int graphPadding;
+private:
+    std::chrono::steady_clock::time_point prevTimeStamp;
+    std::size_t historySize;
+    CpuMonitor cpuMonitor;
+    bool distributionCpuEnabled;
+    MemoryMonitor memoryMonitor;
+    std::ostringstream strStream;
+};
--- a/engines/OpenVINOEngine/include/pipelines/async_pipeline.h
+++ b/engines/OpenVINOEngine/include/pipelines/async_pipeline.h
@@ -0,0 +1,123 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stdint.h>
+
+#include <condition_variable>
+#include <exception>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include <openvino/openvino.hpp>
+#include <models/results.h>
+#include <utils/performance_metrics.hpp>
+#include "pipelines/requests_pool.h"
+#include "models/model_base.h"
+class ModelBase;
+struct InputData;
+struct MetaData;
+struct ModelConfig;
+
+/// This is base class for asynchronous pipeline
+/// Derived classes should add functions for data submission and output processing
+class AsyncPipeline {
+public:
+    /// Loads model and performs required initialization
+    /// @param modelInstance pointer to model object. Object it points to should not be destroyed manually after passing
+    /// pointer to this function.
+    /// @param config - fine tuning configuration for model
+    /// @param core - reference to ov::Core instance to use.
+    /// If it is omitted, new instance of  ov::Core will be created inside.
+    AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core);
+    virtual ~AsyncPipeline();
+
+    /// Waits until either output data becomes available or pipeline allows to submit more input data.
+    /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
+    /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if
+    /// any result is ready.
+    void waitForData(bool shouldKeepOrder = true);
+
+    /// @returns true if there's available infer requests in the pool
+    /// and next frame can be submitted for processing, false otherwise.
+    bool isReadyToProcess() {
+        return requestsPool->isIdleRequestAvailable();
+    }
+
+    /// Waits for all currently submitted requests to be completed.
+    ///
+    void waitForTotalCompletion() {
+        if (requestsPool)
+            requestsPool->waitForTotalCompletion();
+    }
+
+    void updateModelImageSize(const cv::Size& inputImgSize) {
+		model->updateImageSize(inputImgSize);
+    }
+
+    /// Submits data to the model for inference
+    /// @param inputData - input data to be submitted
+    /// @param metaData - shared pointer to metadata container.
+    /// Might be null. This pointer will be passed through pipeline and put to the final result structure.
+    /// @returns -1 if image cannot be scheduled for processing (there's no free InferRequest available).
+    /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the
+    /// result structure.
+    virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData);
+
+    /// Gets available data from the queue
+    /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
+    /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if
+    /// any result is ready.
+    virtual std::unique_ptr<ResultBase> getResult(bool shouldKeepOrder = true);
+
+    PerformanceMetrics getInferenceMetircs() {
+        return inferenceMetrics;
+    }
+    PerformanceMetrics getPreprocessMetrics() {
+        return preprocessMetrics;
+    }
+    PerformanceMetrics getPostprocessMetrics() {
+        return postprocessMetrics;
+    }
+
+protected:
+    /// Returns processed result, if available
+    /// @param shouldKeepOrder if true, function will return processed data sequentially,
+    /// keeping original frames order (as they were submitted). Otherwise, function will return processed data in random
+    /// order.
+    /// @returns InferenceResult with processed information or empty InferenceResult (with negative frameID) if there's
+    /// no any results yet.
+    virtual InferenceResult getInferenceResult(bool shouldKeepOrder);
+
+    std::unique_ptr<RequestsPool> requestsPool;
+    std::unordered_map<int64_t, InferenceResult> completedInferenceResults;
+
+    ov::CompiledModel compiledModel;
+
+    std::mutex mtx;
+    std::condition_variable condVar;
+
+    int64_t inputFrameId = 0;
+    int64_t outputFrameId = 0;
+
+    std::exception_ptr callbackException = nullptr;
+    
+    std::unique_ptr<ModelBase> model;
+    PerformanceMetrics inferenceMetrics;
+    PerformanceMetrics preprocessMetrics;
+    PerformanceMetrics postprocessMetrics;
+};
--- a/engines/OpenVINOEngine/include/pipelines/metadata.h
+++ b/engines/OpenVINOEngine/include/pipelines/metadata.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (C) 2018-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <utils/ocv_common.hpp>
+
+struct MetaData {
+    virtual ~MetaData() {}
+
+    template <class T>
+    T& asRef() {
+        return dynamic_cast<T&>(*this);
+    }
+
+    template <class T>
+    const T& asRef() const {
+        return dynamic_cast<const T&>(*this);
+    }
+};
+
+struct ImageMetaData : public MetaData {
+    cv::Mat img;
+    std::chrono::steady_clock::time_point timeStamp;
+
+    ImageMetaData() {}
+
+    ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : img(img), timeStamp(timeStamp) {}
+};
+
+struct ClassificationImageMetaData : public ImageMetaData {
+    unsigned int groundTruthId;
+
+    ClassificationImageMetaData(cv::Mat img,
+                                std::chrono::steady_clock::time_point timeStamp,
+                                unsigned int groundTruthId)
+        : ImageMetaData(img, timeStamp),
+          groundTruthId(groundTruthId) {}
+};
--- a/engines/OpenVINOEngine/include/pipelines/requests_pool.h
+++ b/engines/OpenVINOEngine/include/pipelines/requests_pool.h
@@ -0,0 +1,65 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <stddef.h>
+#include <mutex>
+#include <utility>
+#include <vector>
+#include <openvino/openvino.hpp>
+
+/// This is class storing requests pool for asynchronous pipeline
+///
+class RequestsPool {
+public:
+    RequestsPool(ov::CompiledModel& compiledModel, unsigned int size);
+    ~RequestsPool();
+
+    /// Returns idle request from the pool. Returned request is automatically marked as In Use (this status will be
+    /// reset after request processing completion) This function is thread safe as long as request is used only until
+    /// setRequestIdle call
+    /// @returns pointer to request with idle state or nullptr if all requests are in use.
+    ov::InferRequest getIdleRequest();
+
+    /// Sets particular request to Idle state
+    /// This function is thread safe as long as request provided is not used after call to this function
+    /// @param request - request to be returned to idle state
+    void setRequestIdle(const ov::InferRequest& request);
+
+    /// Returns number of requests in use. This function is thread safe.
+    /// @returns number of requests in use
+    size_t getInUseRequestsCount();
+
+    /// Returns number of requests in use. This function is thread safe.
+    /// @returns number of requests in use
+    bool isIdleRequestAvailable();
+
+    /// Waits for completion of every non-idle requests in pool.
+    /// getIdleRequest should not be called together with this function or after it to avoid race condition or invalid
+    /// state
+    /// @returns number of requests in use
+    void waitForTotalCompletion();
+
+    /// Returns list of all infer requests in the pool.
+    /// @returns list of all infer requests in the pool.
+    std::vector<ov::InferRequest> getInferRequestsList();
+
+private:
+    std::vector<std::pair<ov::InferRequest, bool>> requests;
+    size_t numRequestsInUse;
+    std::mutex mtx;
+};
--- a/engines/OpenVINOEngine/include/tracker/tracker.hpp
+++ b/engines/OpenVINOEngine/include/tracker/tracker.hpp
@@ -0,0 +1,300 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "utils/kuhn_munkres.hpp"
+#include "cnn.hpp"
+
+struct TrackedObject {
+    cv::Rect rect;
+    float confidence;
+
+    int object_id;
+    int label;  // either id of a label, or UNKNOWN_LABEL_IDX
+    static const int UNKNOWN_LABEL_IDX;  // the value (-1) for unknown label
+
+    size_t frame_idx;      ///< Frame index where object was detected (-1 if N/A).
+
+    TrackedObject(const cv::Rect& rect = cv::Rect(), float conf = -1.0f,
+                  int label = -1, int object_id = -1) :
+        rect(rect), confidence(conf), object_id(object_id), label(label), frame_idx(-1) {}
+};
+
+using TrackedObjects = std::vector<TrackedObject>;
+
+///
+/// \brief The Params struct stores parameters of Tracker.
+///
+struct TrackerParams {
+    size_t min_track_duration;  ///< Min track duration in frames
+
+    size_t forget_delay;  ///< Forget about track if the last bounding box in
+    /// track was detected more than specified number of
+    /// frames ago.
+
+    float affinity_thr;  ///< Affinity threshold which is used to determine if
+    /// tracklet and detection should be combined.
+
+    float shape_affinity_w;  ///< Shape affinity weight.
+
+    float motion_affinity_w;  ///< Motion affinity weight.
+
+    float min_det_conf;  ///< Min confidence of detection.
+
+    cv::Vec2f bbox_aspect_ratios_range;  ///< Bounding box aspect ratios range.
+
+    cv::Vec2f bbox_heights_range;  ///< Bounding box heights range.
+
+    bool drop_forgotten_tracks;  ///< Drop forgotten tracks. If it's enabled it
+    /// disables an ability to get detection log.
+
+    int max_num_objects_in_track;  ///< The number of objects in track is
+    /// restricted by this parameter. If it is negative or zero, the max number of
+    /// objects in track is not restricted.
+
+    int averaging_window_size_for_rects;  ///< The number of objects in track for averaging rects of predictions.
+    int averaging_window_size_for_labels;  ///< The number of objects in track for averaging labels of predictions.
+
+    std::string objects_type;  ///< The type of boxes which will be grabbed from
+    /// detector. Boxes with other types are ignored.
+
+    ///
+    /// Default constructor.
+    ///
+    TrackerParams();
+};
+
+///
+/// \brief The Track struct describes tracks.
+///
+struct Track {
+    ///
+    /// \brief Track constructor.
+    /// \param objs Detected objects sequence.
+    ///
+    explicit Track(const TrackedObjects& objs) : objects(objs), lost(0), length(1) {
+        CV_Assert(!objs.empty());
+        first_object = objs[0];
+    }
+
+    ///
+    /// \brief empty returns if track does not contain objects.
+    /// \return true if track does not contain objects.
+    ///
+    bool empty() const { return objects.empty(); }
+
+    ///
+    /// \brief size returns number of detected objects in a track.
+    /// \return number of detected objects in a track.
+    ///
+    size_t size() const { return objects.size(); }
+
+    ///
+    /// \brief operator [] return const reference to detected object with
+    ///        specified index.
+    /// \param i Index of object.
+    /// \return const reference to detected object with specified index.
+    ///
+    const TrackedObject& operator[](size_t i) const { return objects[i]; }
+
+    ///
+    /// \brief operator [] return non-const reference to detected object with
+    ///        specified index.
+    /// \param i Index of object.
+    /// \return non-const reference to detected object with specified index.
+    ///
+    TrackedObject& operator[](size_t i) { return objects[i]; }
+
+    ///
+    /// \brief back returns const reference to last object in track.
+    /// \return const reference to last object in track.
+    ///
+    const TrackedObject& back() const {
+        CV_Assert(!empty());
+        return objects.back();
+    }
+
+    ///
+    /// \brief back returns non-const reference to last object in track.
+    /// \return non-const reference to last object in track.
+    ///
+    TrackedObject& back() {
+        CV_Assert(!empty());
+        return objects.back();
+    }
+
+    TrackedObjects objects;  ///< Detected objects;
+    size_t lost;             ///< How many frames ago track has been lost.
+
+    TrackedObject first_object;  ///< First object in track.
+    size_t length;  ///< Length of a track including number of objects that were
+                    /// removed from track in order to avoid memory usage growth.
+};
+
+///
+/// \brief Simple Hungarian algorithm-based tracker.
+///
+class Tracker {
+public:
+    ///
+    /// \brief Constructor that creates an instance of Tracker with
+    /// parameters.
+    /// \param[in] params Tracker parameters.
+    ///
+    explicit Tracker(const TrackerParams& params = TrackerParams()) :
+        m_params(params), m_tracks_counter(0), m_frame_size() {}
+
+    ///
+    /// \brief Process given frame.
+    /// \param[in] frame Colored image (CV_8UC3).
+    /// \param[in] detections Detected objects on the frame.
+    /// \param[in] timestamp Timestamp must be positive and measured in
+    /// milliseconds
+    ///
+    void Process(const cv::Mat& frame, const TrackedObjects& detections, int frame_idx);
+
+    ///
+    /// \brief Pipeline parameters getter.
+    /// \return Parameters of pipeline.
+    ///
+    const TrackerParams& params() const;
+
+    ///
+    /// \brief Pipeline parameters setter.
+    /// \param[in] params Parameters of pipeline.
+    ///
+    void set_params(const TrackerParams& params);
+
+    ///
+    /// \brief Reset the pipeline.
+    ///
+    void Reset();
+
+    ///
+    /// \brief Returns recently detected objects.
+    /// \return recently detected objects.
+    ///
+    const TrackedObjects& detections() const;
+
+    ///
+    /// \brief Get active tracks to draw
+    /// \return Active tracks.
+    ///
+    std::unordered_map<size_t, std::vector<cv::Point>> GetActiveTracks() const;
+
+    ///
+    /// \brief Get tracked detections.
+    /// \return Tracked detections.
+    ///
+    TrackedObjects TrackedDetections() const;
+
+    ///
+    /// \brief Get tracked detections with labels.
+    /// \return Tracked detections.
+    ///
+    TrackedObjects TrackedDetectionsWithLabels() const;
+
+    ///
+    /// \brief IsTrackForgotten returns true if track is forgotten.
+    /// \param id Track ID.
+    /// \return true if track is forgotten.
+    ///
+    bool IsTrackForgotten(size_t id) const;
+
+    ///
+    /// \brief tracks Returns all tracks including forgotten (lost too many frames
+    /// ago).
+    /// \return Set of tracks {id, track}.
+    ///
+    const std::unordered_map<size_t, Track>& tracks() const;
+
+    ///
+    /// \brief tracks Returns all tracks including forgotten (lost too many frames
+    /// ago).
+    /// \return Vector of tracks
+    ///
+    std::vector<Track> vector_tracks() const;
+
+    ///
+    /// \brief IsTrackValid Checks whether track is valid (duration > threshold).
+    /// \param id Index of checked track.
+    /// \return True if track duration exceeds some predefined value.
+    ///
+    bool IsTrackValid(size_t id) const;
+
+    ///
+    /// \brief DropForgottenTracks Removes tracks from memory that were lost too
+    /// many frames ago.
+    ///
+    void DropForgottenTracks();
+
+private:
+    const std::set<size_t>& active_track_ids() const { return m_active_track_ids; }
+
+    float ShapeAffinity(const cv::Rect& trk, const cv::Rect& det);
+    float MotionAffinity(const cv::Rect& trk, const cv::Rect& det);
+
+    void SolveAssignmentProblem(
+            const std::set<size_t>& track_ids, const TrackedObjects& detections,
+            std::set<size_t>* unmatched_tracks,
+            std::set<size_t>* unmatched_detections,
+            std::set<std::tuple<size_t, size_t, float>>* matches);
+    void FilterDetectionsAndStore(const TrackedObjects& detected_objects);
+
+    void ComputeDissimilarityMatrix(const std::set<size_t>& active_track_ids,
+                                    const TrackedObjects& detections,
+                                    cv::Mat* dissimilarity_matrix);
+
+    std::vector<std::pair<size_t, size_t>> GetTrackToDetectionIds(
+            const std::set<std::tuple<size_t, size_t, float>>& matches);
+
+    float Distance(const TrackedObject& obj1, const TrackedObject& obj2);
+
+    void AddNewTrack(const TrackedObject& detection);
+
+    void AddNewTracks(const TrackedObjects& detections);
+
+    void AddNewTracks(const TrackedObjects& detections, const std::set<size_t>& ids);
+
+    void AppendToTrack(size_t track_id, const TrackedObject& detection);
+
+    bool EraseTrackIfBBoxIsOutOfFrame(size_t track_id);
+
+    bool EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id);
+
+    bool UptateLostTrackAndEraseIfItsNeeded(size_t track_id);
+
+    void UpdateLostTracks(const std::set<size_t>& track_ids);
+
+    std::unordered_map<size_t, std::vector<cv::Point>> GetActiveTracks();
+
+    // Parameters of the pipeline.
+    TrackerParams m_params;
+
+    // Indexes of active tracks.
+    std::set<size_t> m_active_track_ids;
+
+    // All tracks.
+    std::unordered_map<size_t, Track> m_tracks;
+
+    // Recent detections.
+    TrackedObjects m_detections;
+
+    // Number of all current tracks.
+    size_t m_tracks_counter;
+
+    cv::Size m_frame_size;
+};
+
+int LabelWithMaxFrequencyInTrack(const Track& track, int window_size);
+std::vector<Track> UpdateTrackLabelsToBestAndFilterOutUnknowns(const std::vector<Track>& tracks);
--- a/engines/OpenVINOEngine/include/utils/args_helper.hpp
+++ b/engines/OpenVINOEngine/include/utils/args_helper.hpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file args_helper.hpp
+ */
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/types.hpp>
+#include <openvino/openvino.hpp>
+
+/**
+* @brief This function checks input args and existence of specified files in a given folder
+* @param arg path to a file to be checked for existence
+* @return files updated vector of verified input files
+*/
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg);
+
+/**
+* @brief This function finds -i/--i key in input args
+*        It's necessary to process multiple values for single key
+* @return files updated vector of verified input files
+*/
+void parseInputFilesArguments(std::vector<std::string>& files);
+
+std::vector<std::string> split(const std::string& s, char delim);
+void split(const std::string& s, char delim, std::vector<float> &out);
+std::string merge(std::initializer_list<std::string> list, const char *delim);
+std::string merge(const std::vector<std::string> &list, const char *delim);
+
+std::vector<std::string> parseDevices(const std::string& device_string);
+
+std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices,
+                                                   const std::string& values_string);
+
+cv::Size stringToSize(const std::string& str);
+
+std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string);
--- a/engines/OpenVINOEngine/include/utils/classification_grid_mat.hpp
+++ b/engines/OpenVINOEngine/include/utils/classification_grid_mat.hpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <monitors/presenter.h>
+#include <utils/ocv_common.hpp>
+
+#include "utils/performance_metrics.hpp"
+
+enum class PredictionResult { Correct, Incorrect, Unknown };
+
+class ClassificationGridMat {
+public:
+    cv::Mat outImg;
+
+    explicit ClassificationGridMat(Presenter& presenter,
+                     const cv::Size maxDisp = cv::Size{1920, 1080},
+                     const cv::Size aspectRatio = cv::Size{16, 9},
+                     double targetFPS = 60)
+        : currSourceId{0} {
+        cv::Size size(static_cast<int>(std::round(sqrt(1. * targetFPS * aspectRatio.width / aspectRatio.height))),
+                      static_cast<int>(std::round(sqrt(1. * targetFPS * aspectRatio.height / aspectRatio.width))));
+        if (size.width == 0 || size.height == 0) {
+            size = {1, 1};  // set minimum possible grid size
+        }
+        int minCellSize = std::min(maxDisp.width / size.width, maxDisp.height / size.height);
+        cellSize = cv::Size(minCellSize, minCellSize);
+
+        for (int i = 0; i < size.height; i++) {
+            for (int j = 0; j < size.width; j++) {
+                points.emplace_back(cellSize.width * j, presenter.graphSize.height + cellSize.height * i);
+            }
+        }
+
+        outImg.create((cellSize.height * size.height) + presenter.graphSize.height,
+                      cellSize.width * size.width,
+                      CV_8UC3);
+        outImg.setTo(0);
+
+        textSize = cv::getTextSize("", fontType, fontScale, thickness, &baseline);
+        accuracyMessageSize = cv::getTextSize("Accuracy (top 0): 0.000", fontType, fontScale, thickness, &baseline);
+        testMessageSize = cv::getTextSize(ClassificationGridMat::testMessage, fontType, fontScale, thickness, &baseline);
+    }
+
+    void textUpdate(PerformanceMetrics& metrics,
+                    PerformanceMetrics::TimePoint lastRequestStartTime,
+                    double accuracy,
+                    unsigned int nTop,
+                    bool isFpsTest,
+                    bool showAccuracy,
+                    Presenter& presenter) {
+        rectangle(outImg, {0, 0}, {outImg.cols, presenter.graphSize.height}, cv::Scalar(0, 0, 0), cv::FILLED);
+
+        presenter.drawGraphs(outImg);
+
+        metrics.update(lastRequestStartTime,
+                       outImg,
+                       cv::Point(textPadding, textSize.height + textPadding),
+                       fontType,
+                       fontScale,
+                       cv::Scalar(255, 100, 100),
+                       thickness);
+
+        if (showAccuracy) {
+            cv::putText(outImg,
+                        cv::format("Accuracy (top %d): %.3f", nTop, accuracy),
+                        cv::Point(outImg.cols - accuracyMessageSize.width - textPadding, textSize.height + textPadding),
+                        fontType,
+                        fontScale,
+                        cv::Scalar(255, 255, 255),
+                        thickness);
+        }
+
+        if (isFpsTest) {
+            cv::putText(
+                outImg,
+                ClassificationGridMat::testMessage,
+                cv::Point(outImg.cols - testMessageSize.width - textPadding, (textSize.height + textPadding) * 2),
+                fontType,
+                fontScale,
+                cv::Scalar(50, 50, 255),
+                thickness);
+        }
+    }
+
+    void updateMat(const cv::Mat& mat, const std::string& label, PredictionResult predictionResul) {
+        if (!prevImg.empty()) {
+            size_t prevSourceId = currSourceId - 1;
+            prevSourceId = std::min(prevSourceId, points.size() - 1);
+            prevImg.copyTo(outImg(cv::Rect(points[prevSourceId], cellSize)));
+        }
+        cv::Scalar textColor;
+        switch (predictionResul) {
+            case PredictionResult::Correct:
+                textColor = cv::Scalar(75, 255, 75);  // green
+                break;
+            case PredictionResult::Incorrect:
+                textColor = cv::Scalar(50, 50, 255);  // red
+                break;
+            case PredictionResult::Unknown:
+                textColor = cv::Scalar(200, 10, 10);  // blue
+                break;
+            default:
+                throw std::runtime_error("Undefined type of prediction result");
+        }
+        int labelThickness = cellSize.width / 20;
+        cv::Size labelTextSize = cv::getTextSize(label, fontType, 1, 2, &baseline);
+        double labelFontScale = static_cast<double>(cellSize.width - 2 * labelThickness) / labelTextSize.width;
+        cv::resize(mat, prevImg, cellSize);
+        putHighlightedText(prevImg,
+                           label,
+                           cv::Point(labelThickness, cellSize.height - labelThickness - labelTextSize.height),
+                           fontType,
+                           labelFontScale,
+                           textColor,
+                           2);
+        cv::Mat cell = outImg(cv::Rect(points[currSourceId], cellSize));
+        prevImg.copyTo(cell);
+        cv::rectangle(cell, {0, 0}, {cell.cols, cell.rows}, {255, 50, 50}, labelThickness);  // draw a border
+
+        if (currSourceId == points.size() - 1) {
+            currSourceId = 0;
+        } else {
+            currSourceId++;
+        }
+    }
+
+private:
+    cv::Mat prevImg;
+    cv::Size cellSize;
+    size_t currSourceId;
+    std::vector<cv::Point> points;
+    static const int fontType = cv::FONT_HERSHEY_PLAIN;
+    static constexpr double fontScale = 1.5;
+    static const int thickness = 2;
+    static const int textPadding = 10;
+    static constexpr const char testMessage[] = "Testing, please wait...";
+    int baseline;
+    cv::Size textSize;
+    cv::Size accuracyMessageSize;
+    cv::Size testMessageSize;
+};
+
+constexpr const char ClassificationGridMat::testMessage[];
--- a/engines/OpenVINOEngine/include/utils/common.hpp
+++ b/engines/OpenVINOEngine/include/utils/common.hpp
@@ -0,0 +1,193 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file common.hpp
+ */
+
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+#include "utils/slog.hpp"
+#include "utils/args_helper.hpp"
+
+#ifndef UNUSED
+#ifdef _WIN32
+#define UNUSED
+#else
+#define UNUSED  __attribute__((unused))
+#endif
+#endif
+
+template <typename T, std::size_t N>
+constexpr std::size_t arraySize(const T(&)[N]) noexcept {
+    return N;
+}
+
+static inline void catcher() noexcept {
+    if (std::current_exception()) {
+        try {
+            std::rethrow_exception(std::current_exception());
+        } catch (const std::exception& error) {
+            slog::err << error.what() << slog::endl;
+        } catch (...) {
+            slog::err << "Non-exception object thrown" << slog::endl;
+        }
+        std::exit(1);
+    }
+    std::abort();
+}
+
+template <typename T>
+T clamp(T value, T low, T high) {
+    return value < low ? low : (value > high ? high : value);
+}
+
+inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) {
+    return os << "OpenVINO" << slog::endl
+        << "\tversion: " << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH << slog::endl
+        << "\tbuild: " << version.buildNumber;
+}
+
+/**
+ * @class Color
+ * @brief A Color class stores channels of a given color
+ */
+class Color {
+private:
+    unsigned char _r;
+    unsigned char _g;
+    unsigned char _b;
+
+public:
+    /**
+     * A default constructor.
+     * @param r - value for red channel
+     * @param g - value for green channel
+     * @param b - value for blue channel
+     */
+    Color(unsigned char r,
+        unsigned char g,
+        unsigned char b) : _r(r), _g(g), _b(b) {}
+
+    inline unsigned char red() const {
+        return _r;
+    }
+
+    inline unsigned char blue() const {
+        return _b;
+    }
+
+    inline unsigned char green() const {
+        return _g;
+    }
+};
+
+// Known colors for training classes from the Cityscapes dataset
+static UNUSED const Color CITYSCAPES_COLORS[] = {
+    { 128, 64,  128 },
+    { 232, 35,  244 },
+    { 70,  70,  70 },
+    { 156, 102, 102 },
+    { 153, 153, 190 },
+    { 153, 153, 153 },
+    { 30,  170, 250 },
+    { 0,   220, 220 },
+    { 35,  142, 107 },
+    { 152, 251, 152 },
+    { 180, 130, 70 },
+    { 60,  20,  220 },
+    { 0,   0,   255 },
+    { 142, 0,   0 },
+    { 70,  0,   0 },
+    { 100, 60,  0 },
+    { 90,  0,   0 },
+    { 230, 0,   0 },
+    { 32,  11,  119 },
+    { 0,   74,  111 },
+    { 81,  0,   81 }
+};
+
+inline void showAvailableDevices() {
+    ov::Core core;
+    std::vector<std::string> devices = core.get_available_devices();
+
+    std::cout << "Available devices:";
+    for (const auto& device : devices) {
+        std::cout << ' ' << device;
+    }
+    std::cout << std::endl;
+}
+
+inline std::string fileNameNoExt(const std::string& filepath) {
+    auto pos = filepath.rfind('.');
+    if (pos == std::string::npos) return filepath;
+    return filepath.substr(0, pos);
+}
+
+inline void logCompiledModelInfo(
+    const ov::CompiledModel& compiledModel,
+    const std::string& modelName,
+    const std::string& deviceName,
+    const std::string& modelType = "") {
+    slog::info << "The " << modelType << (modelType.empty() ? "" : " ") << "model " << modelName << " is loaded to " << deviceName << slog::endl;
+    std::set<std::string> devices;
+    for (const std::string& device : parseDevices(deviceName)) {
+        devices.insert(device);
+    }
+
+    if (devices.find("AUTO") == devices.end()) { // do not print info for AUTO device
+        for (const auto& device : devices) {
+            try {
+                slog::info << "\tDevice: " << device << slog::endl;
+                int32_t nstreams = compiledModel.get_property(ov::streams::num);
+                slog::info << "\t\tNumber of streams: " << nstreams << slog::endl;
+                if (device == "CPU") {
+                    int32_t nthreads = compiledModel.get_property(ov::inference_num_threads);
+                    slog::info << "\t\tNumber of threads: " << (nthreads == 0 ? "AUTO" : std::to_string(nthreads)) << slog::endl;
+                }
+            }
+            catch (const ov::Exception&) {}
+        }
+    }
+}
+
+inline void logBasicModelInfo(const std::shared_ptr<ov::Model>& model) {
+    slog::info << "Model name: " << model->get_friendly_name() << slog::endl;
+
+    // Dump information about model inputs/outputs
+    ov::OutputVector inputs = model->inputs();
+    ov::OutputVector outputs = model->outputs();
+
+    slog::info << "\tInputs: " << slog::endl;
+    for (const ov::Output<ov::Node>& input : inputs) {
+        const std::string name = input.get_any_name();
+        const ov::element::Type type = input.get_element_type();
+        const ov::PartialShape shape = input.get_partial_shape();
+        const ov::Layout layout = ov::layout::get_layout(input);
+
+        slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl;
+    }
+
+    slog::info << "\tOutputs: " << slog::endl;
+    for (const ov::Output<ov::Node>& output : outputs) {
+        const std::string name = output.get_any_name();
+        const ov::element::Type type = output.get_element_type();
+        const ov::PartialShape shape = output.get_partial_shape();
+        const ov::Layout layout = ov::layout::get_layout(output);
+
+        slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl;
+    }
+
+    return;
+}
+
+std::vector<unsigned> loadClassIndices(const std::string &groundtruth_filepath,
+                                       const std::vector<std::string> &imageNames);
--- a/engines/OpenVINOEngine/include/utils/config_factory.h
+++ b/engines/OpenVINOEngine/include/utils/config_factory.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stdint.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+#include <openvino/openvino.hpp>
+
+struct ModelConfig {
+    std::string deviceName;
+    std::string cpuExtensionsPath;
+    std::string clKernelsConfigPath;
+    unsigned int maxAsyncRequests;
+    ov::AnyMap compiledModelConfig;
+
+    std::set<std::string> getDevices();
+    std::map<std::string, std::string> getLegacyConfig() const;
+
+protected:
+    std::set<std::string> devices;
+};
+
+class ConfigFactory {
+public:
+    static ModelConfig getUserConfig(const std::string& flags_d,
+                                     uint32_t flags_nireq,
+                                     const std::string& flags_nstreams,
+                                     uint32_t flags_nthreads);
+    static ModelConfig getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq);
+
+protected:
+    static ModelConfig getCommonConfig(const std::string& flags_d, uint32_t flags_nireq);
+};
--- a/engines/OpenVINOEngine/include/utils/default_flags.hpp
+++ b/engines/OpenVINOEngine/include/utils/default_flags.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2020-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gflags/gflags.h>
+
+#define DEFINE_INPUT_FLAGS \
+DEFINE_string(i, "", input_message); \
+DEFINE_bool(loop, false, loop_message);
+
+#define DEFINE_OUTPUT_FLAGS \
+DEFINE_string(o, "", output_message); \
+DEFINE_uint32(limit, 1000, limit_message);
+
+static const char input_message[] = "Required. An input to process. The input must be a single image, a folder of "
+    "images, video file or camera id.";
+static const char loop_message[] = "Optional. Enable reading the input in a loop.";
+static const char output_message[] = "Optional. Name of the output file(s) to save. Frames of odd width or height can be truncated. See https://github.com/opencv/opencv/pull/24086";
+static const char limit_message[] = "Optional. Number of frames to store in output. If 0 is set, all frames are stored.";
--- a/engines/OpenVINOEngine/include/utils/grid_mat.hpp
+++ b/engines/OpenVINOEngine/include/utils/grid_mat.hpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+class GridMat {
+public:
+    cv::Mat outimg;
+
+    explicit GridMat(const std::vector<cv::Size>& sizes, const cv::Size maxDisp = cv::Size{1920, 1080}) {
+        size_t maxWidth = 0;
+        size_t maxHeight = 0;
+        for (size_t i = 0; i < sizes.size(); i++) {
+            maxWidth = std::max(maxWidth, static_cast<size_t>(sizes[i].width));
+            maxHeight = std::max(maxHeight, static_cast<size_t>(sizes[i].height));
+        }
+        if (0 == maxWidth || 0 == maxHeight) {
+            throw std::invalid_argument("Input resolution must not be zero.");
+        }
+
+        size_t nGridCols = static_cast<size_t>(ceil(sqrt(static_cast<float>(sizes.size()))));
+        size_t nGridRows = (sizes.size() - 1) / nGridCols + 1;
+        size_t gridMaxWidth = static_cast<size_t>(maxDisp.width/nGridCols);
+        size_t gridMaxHeight = static_cast<size_t>(maxDisp.height/nGridRows);
+
+        float scaleWidth = static_cast<float>(gridMaxWidth) / maxWidth;
+        float scaleHeight = static_cast<float>(gridMaxHeight) / maxHeight;
+        float scaleFactor = std::min(1.f, std::min(scaleWidth, scaleHeight));
+
+        cellSize.width = static_cast<int>(maxWidth * scaleFactor);
+        cellSize.height = static_cast<int>(maxHeight * scaleFactor);
+
+        for (size_t i = 0; i < sizes.size(); i++) {
+            cv::Point p;
+            p.x = cellSize.width * (i % nGridCols);
+            p.y = cellSize.height * (i / nGridCols);
+            points.push_back(p);
+        }
+
+        outimg.create(cellSize.height * nGridRows, cellSize.width * nGridCols, CV_8UC3);
+        outimg.setTo(0);
+        clear();
+    }
+
+    cv::Size getCellSize() {
+        return cellSize;
+    }
+
+    void fill(std::vector<cv::Mat>& frames) {
+        if (frames.size() > points.size()) {
+            throw std::logic_error("Cannot display " + std::to_string(frames.size()) + " channels in a grid with " + std::to_string(points.size()) + " cells");
+        }
+
+        for (size_t i = 0; i < frames.size(); i++) {
+            cv::Mat cell = outimg(cv::Rect(points[i].x, points[i].y, cellSize.width, cellSize.height));
+
+            if ((cellSize.width == frames[i].cols) && (cellSize.height == frames[i].rows)) {
+                frames[i].copyTo(cell);
+            } else if ((cellSize.width > frames[i].cols) && (cellSize.height > frames[i].rows)) {
+                frames[i].copyTo(cell(cv::Rect(0, 0, frames[i].cols, frames[i].rows)));
+            } else {
+                cv::resize(frames[i], cell, cellSize);
+            }
+        }
+        unupdatedSourceIDs.clear();
+    }
+
+    void update(const cv::Mat& frame, const size_t sourceID) {
+        const cv::Mat& cell = outimg(cv::Rect(points[sourceID], cellSize));
+
+        if ((cellSize.width == frame.cols) && (cellSize.height == frame.rows)) {
+            frame.copyTo(cell);
+        } else if ((cellSize.width > frame.cols) && (cellSize.height > frame.rows)) {
+            frame.copyTo(cell(cv::Rect(0, 0, frame.cols, frame.rows)));
+        } else {
+            cv::resize(frame, cell, cellSize);
+        }
+        unupdatedSourceIDs.erase(unupdatedSourceIDs.find(sourceID));
+    }
+
+    bool isFilled() const noexcept {
+        return unupdatedSourceIDs.empty();
+    }
+    void clear() {
+        size_t counter = 0;
+        std::generate_n(std::inserter(unupdatedSourceIDs, unupdatedSourceIDs.end()), points.size(), [&counter]{return counter++;});
+    }
+    std::set<size_t> getUnupdatedSourceIDs() const noexcept {
+        return unupdatedSourceIDs;
+    }
+    cv::Mat getMat() const noexcept {
+        return outimg;
+    }
+
+private:
+    cv::Size cellSize;
+    std::set<size_t> unupdatedSourceIDs;
+    std::vector<cv::Point> points;
+};
+
+//void fillROIColor(cv::Mat& displayImage, cv::Rect roi, cv::Scalar color, double opacity) {
+//    if (opacity > 0) {
+//        roi = roi & cv::Rect(0, 0, displayImage.cols, displayImage.rows);
+//        cv::Mat textROI = displayImage(roi);
+//        cv::addWeighted(color, opacity, textROI, 1.0 - opacity , 0.0, textROI);
+//    }
+//}
+//
+//void putTextOnImage(cv::Mat& displayImage, std::string str, cv::Point p,
+//                    cv::HersheyFonts font, double fontScale, cv::Scalar color,
+//                    int thickness = 1, cv::Scalar bgcolor = cv::Scalar(),
+//                    double opacity = 0) {
+//    int baseline = 0;
+//    cv::Size textSize = cv::getTextSize(str, font, 0.5, 1, &baseline);
+//    fillROIColor(displayImage, cv::Rect(cv::Point(p.x, p.y + baseline),
+//                                        cv::Point(p.x + textSize.width, p.y - textSize.height)),
+//                 bgcolor, opacity);
+//    cv::putText(displayImage, str, p, font, fontScale, color, thickness);
+//}
--- a/engines/OpenVINOEngine/include/utils/image_utils.h
+++ b/engines/OpenVINOEngine/include/utils/image_utils.h
@@ -0,0 +1,29 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+enum RESIZE_MODE {
+    RESIZE_FILL,
+    RESIZE_KEEP_ASPECT,
+    RESIZE_KEEP_ASPECT_LETTERBOX
+};
+
+cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode = RESIZE_FILL,
+                       cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR, cv::Rect* roi = nullptr,
+                       cv::Scalar BorderConstant = cv::Scalar(0, 0, 0));
--- a/engines/OpenVINOEngine/include/utils/images_capture.h
+++ b/engines/OpenVINOEngine/include/utils/images_capture.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2020-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <stddef.h>
+
+#include <limits>
+#include <memory>
+#include <string>
+
+#include <opencv2/core.hpp>
+
+#include "utils/performance_metrics.hpp"
+
+enum class read_type { efficient, safe };
+
+class ImagesCapture {
+public:
+    const bool loop;
+
+    ImagesCapture(bool loop) : loop{loop} {}
+    virtual double fps() const = 0;
+    virtual cv::Mat read() = 0;
+    virtual std::string getType() const = 0;
+    const PerformanceMetrics& getMetrics() {
+        return readerMetrics;
+    }
+    virtual ~ImagesCapture() = default;
+
+protected:
+    PerformanceMetrics readerMetrics;
+};
+
+// An advanced version of
+// try {
+//     return cv::VideoCapture(std::stoi(input));
+// } catch (const std::invalid_argument&) {
+//     return cv::VideoCapture(input);
+// } catch (const std::out_of_range&) {
+//     return cv::VideoCapture(input);
+// }
+// Some VideoCapture backends continue owning the video buffer under cv::Mat. safe_copy forses to return a copy from
+// read()
+// https://github.com/opencv/opencv/blob/46e1560678dba83d25d309d8fbce01c40f21b7be/modules/gapi/include/opencv2/gapi/streaming/cap.hpp#L72-L76
+std::unique_ptr<ImagesCapture> openImagesCapture(
+    const std::string& input,
+    bool loop,
+    read_type type = read_type::efficient,
+    size_t initialImageId = 0,
+    size_t readLengthLimit = std::numeric_limits<size_t>::max(),  // General option
+    cv::Size cameraResolution = {1280, 720});
--- a/engines/OpenVINOEngine/include/utils/input_wrappers.hpp
+++ b/engines/OpenVINOEngine/include/utils/input_wrappers.hpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <thread>
+#include <vector>
+#include <queue>
+
+#include <opencv2/opencv.hpp>
+
+class InputChannel;
+
+class IInputSource {
+public:
+    virtual bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) = 0;
+    virtual void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) = 0;
+    virtual cv::Size getSize() = 0;
+    virtual void lock() {
+        sourceLock.lock();
+    }
+    virtual void unlock() {
+        sourceLock.unlock();
+    }
+    virtual ~IInputSource() = default;
+private:
+    std::mutex sourceLock;
+};
+
+class InputChannel: public std::enable_shared_from_this<InputChannel> { // note: public inheritance
+public:
+    InputChannel(const InputChannel&) = delete;
+    InputChannel& operator=(const InputChannel&) = delete;
+    static std::shared_ptr<InputChannel> create(const std::shared_ptr<IInputSource>& source) {
+        auto tmp = std::shared_ptr<InputChannel>(new InputChannel(source));
+        source->addSubscriber(tmp);
+        return tmp;
+    }
+    bool read(cv::Mat& mat) {
+        readQueueMutex.lock();
+        if (readQueue.empty()) {
+            readQueueMutex.unlock();
+            source->lock();
+            readQueueMutex.lock();
+            if (readQueue.empty()) {
+                bool res = source->read(mat, shared_from_this());
+                readQueueMutex.unlock();
+                source->unlock();
+                return res;
+            } else {
+                source->unlock();
+            }
+        }
+        mat = readQueue.front().clone();
+        readQueue.pop();
+        readQueueMutex.unlock();
+        return true;
+    }
+    void push(const cv::Mat& mat) {
+        readQueueMutex.lock();
+        readQueue.push(mat);
+        readQueueMutex.unlock();
+    }
+    cv::Size getSize() {
+        return source->getSize();
+    }
+
+private:
+    explicit InputChannel(const std::shared_ptr<IInputSource>& source): source{source} {}
+    std::shared_ptr<IInputSource> source;
+    std::queue<cv::Mat, std::list<cv::Mat>> readQueue;
+    std::mutex readQueueMutex;
+};
+
+class VideoCaptureSource: public IInputSource {
+public:
+    VideoCaptureSource(const cv::VideoCapture& videoCapture, bool loop): videoCapture{videoCapture}, loop{loop},
+        imSize{static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_WIDTH)), static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_HEIGHT))} {}
+    bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override {
+        if (!videoCapture.read(mat)) {
+            if (loop) {
+                videoCapture.set(cv::CAP_PROP_POS_FRAMES, 0);
+                videoCapture.read(mat);
+            } else {
+                return false;
+            }
+        }
+        if (1 != subscribedInputChannels.size()) {
+            cv::Mat shared = mat.clone();
+            for (const std::weak_ptr<InputChannel>& weakInputChannel : subscribedInputChannels) {
+                try {
+                    std::shared_ptr<InputChannel> sharedInputChannel = std::shared_ptr<InputChannel>(weakInputChannel);
+                    if (caller != sharedInputChannel) {
+                        sharedInputChannel->push(shared);
+                    }
+                } catch (const std::bad_weak_ptr&) {}
+            }
+        }
+        return true;
+    }
+    void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override {
+        subscribedInputChannels.push_back(inputChannel);
+    }
+    cv::Size getSize() override {
+        return imSize;
+    }
+
+private:
+    std::vector<std::weak_ptr<InputChannel>> subscribedInputChannels;
+    cv::VideoCapture videoCapture;
+    bool loop;
+    cv::Size imSize;
+};
+
+class ImageSource: public IInputSource {
+public:
+    ImageSource(const cv::Mat& im, bool loop): im{im.clone()}, loop{loop} {}  // clone to avoid image changing
+    bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override {
+        if (!loop) {
+            auto subscribedInputChannelsIt = subscribedInputChannels.find(caller);
+            if (subscribedInputChannels.end() == subscribedInputChannelsIt) {
+                return false;
+            } else {
+                subscribedInputChannels.erase(subscribedInputChannelsIt);
+                mat = im;
+                return true;
+            }
+        } else {
+            mat = im;
+            return true;
+        }
+    }
+    void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override {
+        if (false == subscribedInputChannels.insert(inputChannel).second)
+            throw std::invalid_argument("The insertion did not take place");
+    }
+    cv::Size getSize() override {
+        return im.size();
+    }
+
+private:
+    std::set<std::weak_ptr<InputChannel>, std::owner_less<std::weak_ptr<InputChannel>>> subscribedInputChannels;
+    cv::Mat im;
+    bool loop;
+};
--- a/engines/OpenVINOEngine/include/utils/kuhn_munkres.hpp
+++ b/engines/OpenVINOEngine/include/utils/kuhn_munkres.hpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "opencv2/core.hpp"
+
+#include <memory>
+#include <vector>
+
+
+///
+/// \brief The KuhnMunkres class
+///
+/// Solves the assignment problem.
+///
+class KuhnMunkres {
+public:
+    ///
+    /// \brief Initializes the class for assignment problem solving.
+    /// \param[in] greedy If a faster greedy matching algorithm should be used.
+    explicit KuhnMunkres(bool greedy = false);
+
+    ///
+    /// \brief Solves the assignment problem for given dissimilarity matrix.
+    /// It returns a vector that where each element is a column index for
+    /// corresponding row (e.g. result[0] stores optimal column index for very
+    /// first row in the dissimilarity matrix).
+    /// \param dissimilarity_matrix CV_32F dissimilarity matrix.
+    /// \return Optimal column index for each row. -1 means that there is no
+    /// column for row.
+    ///
+    std::vector<size_t> Solve(const cv::Mat &dissimilarity_matrix);
+
+private:
+    static constexpr int kStar = 1;
+    static constexpr int kPrime = 2;
+
+    cv::Mat dm_;
+    cv::Mat marked_;
+    std::vector<cv::Point> points_;
+
+    std::vector<int> is_row_visited_;
+    std::vector<int> is_col_visited_;
+
+    int n_;
+    bool greedy_;
+
+    void TrySimpleCase();
+    bool CheckIfOptimumIsFound();
+    cv::Point FindUncoveredMinValPos();
+    void UpdateDissimilarityMatrix(float val);
+    int FindInRow(int row, int what);
+    int FindInCol(int col, int what);
+    void Run();
+};
--- a/engines/OpenVINOEngine/include/utils/nms.hpp
+++ b/engines/OpenVINOEngine/include/utils/nms.hpp
@@ -0,0 +1,81 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include <numeric>
+#include <vector>
+
+struct Anchor {
+    float left;
+    float top;
+    float right;
+    float bottom;
+
+    float getWidth() const {
+        return (right - left) + 1.0f;
+    }
+    float getHeight() const {
+        return (bottom - top) + 1.0f;
+    }
+    float getXCenter() const {
+        return left + (getWidth() - 1.0f) / 2.0f;
+    }
+    float getYCenter() const {
+        return top + (getHeight() - 1.0f) / 2.0f;
+    }
+};
+
+template <typename Anchor>
+std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores,
+                     const float thresh, bool includeBoundaries=false) {
+    std::vector<float> areas(boxes.size());
+    for (size_t i = 0; i < boxes.size(); ++i) {
+        areas[i] = (boxes[i].right - boxes[i].left + includeBoundaries) * (boxes[i].bottom - boxes[i].top + includeBoundaries);
+    }
+    std::vector<int> order(scores.size());
+    std::iota(order.begin(), order.end(), 0);
+    std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; });
+
+    size_t ordersNum = 0;
+    for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0; ordersNum++);
+
+    std::vector<int> keep;
+    bool shouldContinue = true;
+    for (size_t i = 0; shouldContinue && i < ordersNum; ++i) {
+        auto idx1 = order[i];
+        if (idx1 >= 0) {
+            keep.push_back(idx1);
+            shouldContinue = false;
+            for (size_t j = i + 1; j < ordersNum; ++j) {
+                auto idx2 = order[j];
+                if (idx2 >= 0) {
+                    shouldContinue = true;
+                    auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
+                    auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
+                    auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
+                    auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection);
+
+                    if (overlap >= thresh) {
+                        order[j] = -1;
+                    }
+                }
+            }
+        }
+    }
+    return keep;
+}
--- a/engines/OpenVINOEngine/include/utils/ocv_common.hpp
+++ b/engines/OpenVINOEngine/include/utils/ocv_common.hpp
@@ -0,0 +1,347 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality using OpenCV
+ * @file ocv_common.hpp
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+
+#include "utils/common.hpp"
+#include "utils/shared_tensor_allocator.hpp"
+
+/**
+* @brief Get cv::Mat value in the correct format.
+*/
+template <typename T>
+const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
+    switch (mat.type()) {
+        case CV_8UC1:  return (T)mat.at<uchar>((int)h, (int)w);
+        case CV_8UC3:  return (T)mat.at<cv::Vec3b>((int)h, (int)w)[c];
+        case CV_32FC1: return (T)mat.at<float>((int)h, (int)w);
+        case CV_32FC3: return (T)mat.at<cv::Vec3f>((int)h, (int)w)[c];
+    }
+    throw std::runtime_error("cv::Mat type is not recognized");
+};
+
+/**
+* @brief Resize and copy image data from cv::Mat object to a given Tensor object.
+* @param mat - given cv::Mat object with an image data.
+* @param tensor - Tensor object which to be filled by an image data.
+* @param batchIndex - batch index of an image inside of the blob.
+*/
+static UNUSED void matToTensor(const cv::Mat& mat, ov::Tensor& tensor, int batchIndex = 0) {
+    ov::Shape tensorShape = tensor.get_shape();
+    static const ov::Layout layout("NCHW");
+    const size_t width = tensorShape[ov::layout::width_idx(layout)];
+    const size_t height = tensorShape[ov::layout::height_idx(layout)];
+    const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+    if (static_cast<size_t>(mat.channels()) != channels) {
+        throw std::runtime_error("The number of channels for model input and image must match");
+    }
+    if (channels != 1 && channels != 3) {
+        throw std::runtime_error("Unsupported number of channels");
+    }
+    int batchOffset = batchIndex * width * height * channels;
+
+    cv::Mat resizedMat;
+    if (static_cast<int>(width) != mat.size().width || static_cast<int>(height) != mat.size().height) {
+        cv::resize(mat, resizedMat, cv::Size(width, height));
+    } else {
+        resizedMat = mat;
+    }
+
+    if (tensor.get_element_type() == ov::element::f32) {
+        float_t* tensorData = tensor.data<float_t>();
+        for (size_t c = 0; c < channels; c++)
+            for (size_t h = 0; h < height; h++)
+                for (size_t w = 0; w < width; w++)
+                    tensorData[batchOffset + c * width * height + h * width + w] =
+                        getMatValue<float_t>(resizedMat, h, w, c);
+    } else {
+        uint8_t* tensorData = tensor.data<uint8_t>();
+        if (resizedMat.depth() == CV_32F) {
+            throw std::runtime_error("Conversion of cv::Mat from float_t to uint8_t is forbidden");
+        }
+        for (size_t c = 0; c < channels; c++)
+            for (size_t h = 0; h < height; h++)
+                for (size_t w = 0; w < width; w++)
+                    tensorData[batchOffset + c * width * height + h * width + w] =
+                        getMatValue<uint8_t>(resizedMat, h, w, c);
+    }
+}
+
+static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
+    auto matType = mat.type() & CV_MAT_DEPTH_MASK;
+    if (matType != CV_8U && matType != CV_32F) {
+        throw std::runtime_error("Unsupported mat type for wrapping");
+    }
+    bool isMatFloat = matType == CV_32F;
+
+    const size_t channels = mat.channels();
+    const size_t height = mat.rows;
+    const size_t width = mat.cols;
+
+    const size_t strideH = mat.step.buf[0];
+    const size_t strideW = mat.step.buf[1];
+
+    const bool isDense = !isMatFloat ? (strideW == channels && strideH == channels * width) :
+        (strideW == channels * sizeof(float) && strideH == channels * width * sizeof(float));
+    if (!isDense) {
+        throw std::runtime_error("Doesn't support conversion from not dense cv::Mat");
+    }
+    auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
+    return ov::Tensor(precision, ov::Shape{ 1, height, width, channels }, SharedMatAllocator{mat});
+}
+
+static inline void resize2tensor(const cv::Mat& mat, ov::Tensor& tensor) {
+    static const ov::Layout layout{"NHWC"};
+    const ov::Shape& shape = tensor.get_shape();
+    cv::Size size{int(shape[ov::layout::width_idx(layout)]), int(shape[ov::layout::height_idx(layout)])};
+    assert(tensor.get_element_type() == ov::element::u8);
+    assert(shape.size() == 4);
+    assert(shape[ov::layout::batch_idx(layout)] == 1);
+    assert(shape[ov::layout::channels_idx(layout)] == 3);
+    cv::resize(mat, cv::Mat{size, CV_8UC3, tensor.data<uint8_t>()}, size);
+}
+
+struct IntervalCondition {
+    using DimType = size_t;
+    using IndexType = size_t;
+    using ConditionChecker = std::function<bool(IndexType, const ov::PartialShape&)>;
+
+    template<class Cond>
+    constexpr IntervalCondition(IndexType i1, IndexType i2, Cond c) :
+        impl([=](IndexType i0, const ov::PartialShape& shape) {
+            return c(shape[i0].get_max_length(), shape[i1].get_max_length()) && c(shape[i0].get_max_length(), shape[i2].get_max_length());})
+    {}
+    bool operator() (IndexType i0, const ov::PartialShape& shape) const { return impl(i0, shape); }
+private:
+    ConditionChecker impl;
+};
+
+template <template<class> class Cond, class ...Args>
+IntervalCondition makeCond(Args&&...args) {
+    return IntervalCondition(std::forward<Args>(args)..., Cond<IntervalCondition::DimType>{});
+}
+using LayoutCondition = std::tuple<size_t/*dim index*/, IntervalCondition, std::string>;
+
+static inline std::tuple<bool, ov::Layout> makeGuesLayoutFrom4DShape(const ov::PartialShape& shape) {
+    // at the moment we make assumption about NCHW & NHCW only
+    // if hypothetical C value is less than hypothetical H and W - then
+    // out assumption is correct and we pick a corresponding layout
+    static const std::array<LayoutCondition, 2> hypothesisMatrix {{
+        {1, makeCond<std::less_equal>(2, 3), "NCHW"},
+        {3, makeCond<std::less_equal>(1, 2), "NHWC"}
+    }};
+    for (const auto &h : hypothesisMatrix) {
+
+        auto channel_index = std::get<0>(h);
+        const auto &cond = std::get<1>(h);
+        if (cond(channel_index, shape)) {
+            return std::make_tuple(true, ov::Layout{std::get<2>(h)});
+        }
+    }
+    return {false, ov::Layout{}};
+}
+
+static inline ov::Layout getLayoutFromShape(const ov::PartialShape& shape) {
+    if (shape.size() == 2) {
+        return "NC";
+    }
+    if (shape.size() == 3) {
+        if (shape[0] == 1) {
+            return "NHW";
+        }
+        if (shape[2] == 1) {
+            return "HWN";
+        }
+        throw std::runtime_error("Can't guess layout for " + shape.to_string());
+    }
+    if (shape.size() == 4) {
+        if (ov::Interval{1, 4}.contains(shape[1].get_interval())) {
+            return "NCHW";
+        }
+        if (ov::Interval{1, 4}.contains(shape[3].get_interval())) {
+            return "NHWC";
+        }
+        if (shape[1] == shape[2]) {
+            return "NHWC";
+        }
+        if (shape[2] == shape[3]) {
+            return "NCHW";
+        }
+        bool guesResult = false;
+        ov::Layout guessedLayout;
+        std::tie(guesResult, guessedLayout) = makeGuesLayoutFrom4DShape(shape);
+        if (guesResult) {
+            return guessedLayout;
+        }
+    }
+    throw std::runtime_error("Usupported " + std::to_string(shape.size()) + "D shape");
+}
+
+/**
+ * @brief Puts text message on the frame, highlights the text with a white border to make it distinguishable from
+ *        the background.
+ * @param frame - frame to put the text on.
+ * @param message - text of the message.
+ * @param position - bottom-left corner of the text string in the image.
+ * @param fontFace - font type.
+ * @param fontScale - font scale factor that is multiplied by the font-specific base size.
+ * @param color - text color.
+ * @param thickness - thickness of the lines used to draw a text.
+ */
+inline void putHighlightedText(const cv::Mat& frame,
+                               const std::string& message,
+                               cv::Point position,
+                               int fontFace,
+                               double fontScale,
+                               cv::Scalar color,
+                               int thickness) {
+    cv::putText(frame, message, position, fontFace, fontScale, cv::Scalar(255, 255, 255), thickness + 1);
+    cv::putText(frame, message, position, fontFace, fontScale, color, thickness);
+}
+
+// TODO: replace with Size::empty() after OpenCV3 is dropped
+static inline bool isSizeEmpty(const cv::Size& size) {
+    return size.width <= 0 || size.height <= 0;
+}
+
+// TODO: replace with Rect::empty() after OpenCV3 is dropped
+static inline bool isRectEmpty(const cv::Rect& rect) {
+    return rect.width <= 0 || rect.height <= 0;
+}
+
+class OutputTransform {
+public:
+    OutputTransform() : doResize(false), scaleFactor(1) {}
+
+    OutputTransform(cv::Size inputSize, cv::Size outputResolution) :
+        doResize(true), scaleFactor(1), inputSize(inputSize), outputResolution(outputResolution) {}
+
+    cv::Size computeResolution() {
+        float inputWidth = static_cast<float>(inputSize.width);
+        float inputHeight = static_cast<float>(inputSize.height);
+        scaleFactor = MIN(outputResolution.height / inputHeight, outputResolution.width / inputWidth);
+        newResolution = cv::Size{static_cast<int>(inputWidth * scaleFactor), static_cast<int>(inputHeight * scaleFactor)};
+        return newResolution;
+    }
+
+    void resize(cv::Mat& image) {
+        if (!doResize) { return; }
+        cv::Size currSize = image.size();
+        if (currSize != inputSize) {
+            inputSize = currSize;
+            computeResolution();
+        }
+        if (scaleFactor == 1) { return; }
+        cv::resize(image, image, newResolution);
+    }
+
+    template<typename T>
+    void scaleCoord(T& coord) {
+        if (!doResize || scaleFactor == 1) { return; }
+        coord.x = std::floor(coord.x * scaleFactor);
+        coord.y = std::floor(coord.y * scaleFactor);
+    }
+
+    template<typename T>
+    void scaleRect(T& rect) {
+        if (!doResize || scaleFactor == 1) { return; }
+        scaleCoord(rect);
+        rect.width = std::floor(rect.width * scaleFactor);
+        rect.height = std::floor(rect.height * scaleFactor);
+    }
+
+    bool doResize;
+
+private:
+    float scaleFactor;
+    cv::Size inputSize;
+    cv::Size outputResolution;
+    cv::Size newResolution;
+};
+
+class InputTransform {
+public:
+    InputTransform() : reverseInputChannels(false), isTrivial(true) {}
+
+    InputTransform(bool reverseInputChannels, const std::string& meanValues, const std::string& scaleValues) :
+        reverseInputChannels(reverseInputChannels),
+        isTrivial(!reverseInputChannels && meanValues.empty() && scaleValues.empty()),
+        means(meanValues.empty() ? cv::Scalar(0.0, 0.0, 0.0) : string2Vec(meanValues)),
+        stdScales(scaleValues.empty() ? cv::Scalar(1.0, 1.0, 1.0) : string2Vec(scaleValues)) {
+    }
+
+    cv::Scalar string2Vec(const std::string& string) {
+        const auto& strValues = split(string, ' ');
+        std::vector<float> values;
+        try {
+            for (auto& str : strValues)
+                values.push_back(std::stof(str));
+        }
+        catch (const std::invalid_argument&) {
+            throw std::runtime_error("Invalid parameter --mean_values or --scale_values is provided.");
+        }
+        if (values.size() != 3) {
+            throw std::runtime_error("InputTransform expects 3 values per channel, but get \"" + string + "\".");
+        }
+        return cv::Scalar(values[0], values[1], values[2]);
+    }
+
+    void setPrecision(ov::preprocess::PrePostProcessor& ppp, const std::string& tensorName) {
+        const auto precision = isTrivial ? ov::element::u8 : ov::element::f32;
+        ppp.input(tensorName).tensor().
+                set_element_type(precision);
+    }
+
+    cv::Mat operator()(const cv::Mat& inputs) {
+        if (isTrivial) { return inputs; }
+        cv::Mat result;
+        inputs.convertTo(result, CV_32F);
+        if (reverseInputChannels) {
+            cv::cvtColor(result, result, cv::COLOR_BGR2RGB);
+        }
+        // TODO: merge the two following lines after OpenCV3 is droppped
+        result -= means;
+        result /= cv::Mat{stdScales};
+        return result;
+    }
+
+private:
+    bool reverseInputChannels;
+    bool isTrivial;
+    cv::Scalar means;
+    cv::Scalar stdScales;
+};
+
+class LazyVideoWriter {
+    cv::VideoWriter writer;
+    unsigned nwritten;
+public:
+    const std::string filenames;
+    const double fps;
+    const unsigned lim;
+
+    LazyVideoWriter(const std::string& filenames, double fps, unsigned lim) :
+        nwritten{1}, filenames{filenames}, fps{fps}, lim{lim} {}
+    void write(const cv::Mat& im) {
+        if (writer.isOpened() && (nwritten < lim || 0 == lim)) {
+            writer.write(im);
+            ++nwritten;
+            return;
+        }
+        if (!writer.isOpened() && !filenames.empty()) {
+            if (!writer.open(filenames, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, im.size())) {
+                throw std::runtime_error("Can't open video writer");
+            }
+            writer.write(im);
+        }
+    }
+};
--- a/engines/OpenVINOEngine/include/utils/performance_metrics.hpp
+++ b/engines/OpenVINOEngine/include/utils/performance_metrics.hpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2020-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file for performance metrics calculation class
+ * @file performance_metrics.hpp
+ */
+
+#pragma once
+
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include "utils/ocv_common.hpp"
+
+class PerformanceMetrics {
+public:
+    using Clock = std::chrono::steady_clock;
+    using TimePoint = std::chrono::time_point<Clock>;
+    using Duration = Clock::duration;
+    using Ms = std::chrono::duration<double, std::ratio<1, 1000>>;
+    using Sec = std::chrono::duration<double, std::ratio<1, 1>>;
+
+    struct Metrics {
+        double latency;
+        double fps;
+    };
+
+    enum MetricTypes {
+        ALL,
+        FPS,
+        LATENCY
+    };
+
+    PerformanceMetrics(Duration timeWindow = std::chrono::seconds(1));
+    void update(TimePoint lastRequestStartTime,
+                const cv::Mat& frame,
+                cv::Point position = {15, 30},
+                int fontFace = cv::FONT_HERSHEY_COMPLEX,
+                double fontScale = 0.75,
+                cv::Scalar color = {200, 10, 10},
+                int thickness = 2, MetricTypes metricType = ALL);
+    void update(TimePoint lastRequestStartTime);
+
+    /// Paints metrics over provided mat
+    /// @param frame frame to paint over
+    /// @param position left top corner of text block
+    /// @param fontScale font scale
+    /// @param color font color
+    /// @param thickness font thickness
+    void paintMetrics(const cv::Mat& frame,
+        cv::Point position = { 15, 30 },
+        int fontFace = cv::FONT_HERSHEY_COMPLEX,
+        double fontScale = 0.75,
+        cv::Scalar color = { 200, 10, 10 },
+        int thickness = 2, MetricTypes metricType = ALL) const;
+
+    Metrics getLast() const;
+    Metrics getTotal() const;
+    void logTotal() const;
+
+private:
+    struct Statistic {
+        Duration latency;
+        Duration period;
+        int frameCount;
+
+        Statistic() {
+            latency = Duration::zero();
+            period = Duration::zero();
+            frameCount = 0;
+        }
+
+        void combine(const Statistic& other) {
+            latency += other.latency;
+            period += other.period;
+            frameCount += other.frameCount;
+        }
+    };
+
+    Duration timeWindowSize;
+    Statistic lastMovingStatistic;
+    Statistic currentMovingStatistic;
+    Statistic totalStatistic;
+    TimePoint lastUpdateTime;
+    bool firstFrameProcessed;
+};
+
+void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat);
--- a/engines/OpenVINOEngine/include/utils/shared_tensor_allocator.hpp
+++ b/engines/OpenVINOEngine/include/utils/shared_tensor_allocator.hpp
@@ -0,0 +1,26 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <opencv2/core.hpp>
+
+struct SharedMatAllocator {
+    const cv::Mat mat;
+    void* allocate(size_t bytes, size_t) {return bytes <= mat.rows * mat.step[0] ? mat.data : nullptr;}
+    void deallocate(void*, size_t, size_t) noexcept {}
+    bool is_equal(const SharedMatAllocator& other) const noexcept {return this == &other;}
+};
--- a/engines/OpenVINOEngine/include/utils/slog.hpp
+++ b/engines/OpenVINOEngine/include/utils/slog.hpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with logging facility for common samples
+ * @file log.hpp
+ */
+
+#pragma once
+
+#include <iostream>
+#include <string>
+
+namespace slog {
+
+/**
+ * @class LogStreamEndLine
+ * @brief The LogStreamEndLine class implements an end line marker for a log stream
+ */
+class LogStreamEndLine { };
+
+static constexpr LogStreamEndLine endl;
+
+
+/**
+ * @class LogStreamBoolAlpha
+ * @brief The LogStreamBoolAlpha class implements bool printing for a log stream
+ */
+class LogStreamBoolAlpha { };
+
+static constexpr LogStreamBoolAlpha boolalpha;
+
+
+/**
+ * @class LogStream
+ * @brief The LogStream class implements a stream for sample logging
+ */
+class LogStream {
+    std::string _prefix;
+    std::ostream* _log_stream;
+    bool _new_line;
+
+public:
+    /**
+     * @brief A constructor. Creates a LogStream object
+     * @param prefix The prefix to print
+     */
+    LogStream(const std::string &prefix, std::ostream& log_stream)
+            : _prefix(prefix), _new_line(true) {
+        _log_stream = &log_stream;
+    }
+
+    /**
+     * @brief A stream output operator to be used within the logger
+     * @param arg Object for serialization in the logger message
+     */
+    template<class T>
+    LogStream &operator<<(const T &arg) {
+        if (_new_line) {
+            (*_log_stream) << "[ " << _prefix << " ] ";
+            _new_line = false;
+        }
+
+        (*_log_stream) << arg;
+        return *this;
+    }
+
+    // Specializing for LogStreamEndLine to support slog::endl
+    LogStream& operator<< (const LogStreamEndLine &/*arg*/) {
+        _new_line = true;
+
+        (*_log_stream) << std::endl;
+        return *this;
+    }
+
+    // Specializing for LogStreamBoolAlpha to support slog::boolalpha
+    LogStream& operator<< (const LogStreamBoolAlpha &/*arg*/) {
+        (*_log_stream) << std::boolalpha;
+        return *this;
+    }
+
+    // Specializing for std::vector and std::list
+    template<template<class, class> class Container, class T>
+    LogStream& operator<< (const Container<T, std::allocator<T>>& container) {
+        for (const auto& el : container) {
+            *this << el << slog::endl;
+        }
+        return *this;
+    }
+};
+
+
+static LogStream info("INFO", std::cout);
+static LogStream debug("DEBUG", std::cout);
+static LogStream warn("WARNING", std::cout);
+static LogStream err("ERROR", std::cerr);
+
+}  // namespace slog
--- a/engines/OpenVINOEngine/include/utils/threads_common.hpp
+++ b/engines/OpenVINOEngine/include/utils/threads_common.hpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <set>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include "utils/performance_metrics.hpp"
+
+// VideoFrame can represent not a single image but the whole grid
+class VideoFrame {
+public:
+    typedef std::shared_ptr<VideoFrame> Ptr;
+
+    VideoFrame(unsigned sourceID, int64_t frameId, const cv::Mat& frame = cv::Mat()) :
+        sourceID{sourceID}, frameId{frameId}, frame{frame} {}
+    virtual ~VideoFrame() = default;  // A user has to define how it is reconstructed
+
+    const unsigned sourceID;
+    const int64_t frameId;
+    cv::Mat frame;
+
+    PerformanceMetrics::TimePoint timestamp;
+};
+
+class Worker;
+
+class Task {
+public:
+    explicit Task(VideoFrame::Ptr sharedVideoFrame, float priority = 0):
+        sharedVideoFrame{sharedVideoFrame}, priority{priority} {}
+    virtual bool isReady() = 0;
+    virtual void process() = 0;
+    virtual ~Task() = default;
+
+    std::string name;
+    VideoFrame::Ptr sharedVideoFrame;  // it is possible that two tasks try to draw on the same cvMat
+    const float priority;
+};
+
+struct HigherPriority {
+    bool operator()(const std::shared_ptr<Task>& lhs, const std::shared_ptr<Task>& rhs) const {
+        return lhs->priority > rhs->priority
+            || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId < rhs->sharedVideoFrame->frameId)
+            || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId == rhs->sharedVideoFrame->frameId && lhs < rhs);
+    }
+};
+
+class Worker {
+public:
+    explicit Worker(unsigned threadNum):
+        threadPool(threadNum), running{false} {}
+    ~Worker() {
+        stop();
+    }
+    void runThreads() {
+        running = true;
+        for (std::thread& t : threadPool) {
+            t = std::thread(&Worker::threadFunc, this);
+        }
+    }
+    void push(std::shared_ptr<Task> task) {
+        tasksMutex.lock();
+        tasks.insert(task);
+        tasksMutex.unlock();
+        tasksCondVar.notify_one();
+    }
+    void threadFunc() {
+        while (running) {
+            std::unique_lock<std::mutex> lk(tasksMutex);
+            while (running && tasks.empty()) {
+                tasksCondVar.wait(lk);
+            }
+            try {
+                auto it = std::find_if(tasks.begin(), tasks.end(), [](const std::shared_ptr<Task>& task){return task->isReady();});
+                if (tasks.end() != it) {
+                    const std::shared_ptr<Task> task = std::move(*it);
+                    tasks.erase(it);
+                    lk.unlock();
+                    task->process();
+                }
+            } catch (...) {
+                std::lock_guard<std::mutex> lock{exceptionMutex};
+                if (nullptr == currentException) {
+                    currentException = std::current_exception();
+                    stop();
+                }
+            }
+        }
+    }
+    void stop() {
+        running = false;
+        tasksCondVar.notify_all();
+    }
+    void join() {
+        for (auto& t : threadPool) {
+            t.join();
+        }
+        if (nullptr != currentException) {
+            std::rethrow_exception(currentException);
+        }
+    }
+
+private:
+    std::condition_variable tasksCondVar;
+    std::set<std::shared_ptr<Task>, HigherPriority> tasks;
+    std::mutex tasksMutex;
+    std::vector<std::thread> threadPool;
+    std::atomic<bool> running;
+    std::exception_ptr currentException;
+    std::mutex exceptionMutex;
+};
+
+
+template <class C> class ConcurrentContainer {
+public:
+    C container;
+    mutable std::mutex mutex;
+
+    bool lockedEmpty() const noexcept {
+        std::lock_guard<std::mutex> lock{mutex};
+        return container.empty();
+    }
+    typename C::size_type lockedSize() const noexcept {
+        std::lock_guard<std::mutex> lock{mutex};
+        return container.size();
+    }
+    void lockedPushBack(const typename C::value_type& value) {
+        std::lock_guard<std::mutex> lock{mutex};
+        container.push_back(value);
+    }
+    bool lockedTryPop(typename C::value_type& value) {
+        mutex.lock();
+        if (!container.empty()) {
+            value = container.back();
+            container.pop_back();
+            mutex.unlock();
+            return true;
+        } else {
+            mutex.unlock();
+            return false;
+        }
+    }
+
+    operator C() const {
+        std::lock_guard<std::mutex> lock{mutex};
+        return container;
+    }
+};
--- a/engines/OpenVINOEngine/include/utils/visualizer.hpp
+++ b/engines/OpenVINOEngine/include/utils/visualizer.hpp
@@ -0,0 +1,66 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <string>
+#include <utility>
+
+#include <opencv2/core.hpp>
+
+struct ImageResult;
+
+class Visualizer {
+private:
+    // names of window and trackbar
+    std::string winName = "Image Processing Demo (press A for help)";
+    std::string trackbarName = "Orig/Diff | Res";
+
+    // images info
+    cv::Size resolution = cv::Size(1000, 600);
+    bool isResolutionSet = false;
+    cv::Mat inputImg = cv::Mat(resolution, CV_32FC3, 0.);
+    cv::Mat resultImg = cv::Mat(resolution, CV_32FC3, 0.);
+    cv::Mat displayImg = cv::Mat(resolution, CV_32FC3, 0.);
+
+    // trackbar info
+    std::string mode = "result";
+    bool isTrackbarShown = false;
+    int slider = 1;
+
+    // help message
+    bool isHelpShown = false;
+    std::string helpMessage[4] = {"Use R to display the result",
+                                  "Use O to display the orig with result",
+                                  "Use V to display the diff with result",
+                                  "Esc or Q to quit"};
+    void addTrackbar();
+    void disableTrackbar();
+    void setResolution(cv::Size& newResolution);
+    void markImage(cv::Mat& image, const std::pair<std::string, std::string>& marks, float alpha);
+    void drawSweepLine(cv::Mat& image);
+    void changeDisplayImg();
+
+public:
+    Visualizer(const std::string& type = "");
+    cv::Size getSize();
+
+    // change display image for new input and result images
+    cv::Mat renderResultData(ImageResult result, cv::Size& newResolution);
+
+    // show display image or specified value
+    void show(cv::Mat img = cv::Mat());
+
+    void handleKey(int key);
+};
--- a/engines/OpenVINOEngine/src/faceapp/action_detector.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/action_detector.cpp
@@ -0,0 +1,418 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "openvino/openvino.hpp"
+
+#include "action_detector.hpp"
+
+#define SSD_LOCATION_RECORD_SIZE 4
+#define SSD_PRIORBOX_RECORD_SIZE 4
+#define NUM_DETECTION_CLASSES 2
+#define POSITIVE_DETECTION_IDX 1
+
+template <typename T>
+bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                          const std::pair<float, T>& pair2) {
+    return pair1.first > pair2.first;
+}
+
+ActionDetection::ActionDetection(const ActionDetectorConfig& config) :
+    BaseCnnDetection(config.is_async), m_config(config) {
+    m_detectorName = "action detector";
+
+    ov::Layout desiredLayout = {"NHWC"};
+
+    slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
+    std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
+    logBasicModelInfo(model);
+
+    ov::OutputVector inputInfo = model->inputs();
+    if (inputInfo.size() != 1) {
+        throw std::runtime_error("Action Detection network should have only one input");
+    }
+
+    m_input_name = model->input().get_any_name();
+
+    m_modelLayout = ov::layout::get_layout(model->input());
+    if (m_modelLayout.empty())
+        m_modelLayout = {"NCHW"};
+
+    m_network_input_size.height = model->input().get_shape()[ov::layout::height_idx(m_modelLayout)];
+    m_network_input_size.width = model->input().get_shape()[ov::layout::width_idx(m_modelLayout)];
+
+    m_new_model = false;
+
+    ov::OutputVector outputs = model->outputs();
+    auto cmp = [&](const ov::Output<ov::Node>& output) { return output.get_any_name() == m_config.new_loc_blob_name; };
+    auto it = std::find_if(outputs.begin(), outputs.end(), cmp);
+    if (it != outputs.end())
+        m_new_model = true;
+
+    ov::preprocess::PrePostProcessor ppp(model);
+
+    ppp.input().tensor()
+        .set_element_type(ov::element::u8)
+        .set_layout(desiredLayout);
+    ppp.input().preprocess()
+        .convert_layout(m_modelLayout)
+        .convert_element_type(ov::element::f32);
+    ppp.input().model().set_layout(m_modelLayout);
+
+    model = ppp.build();
+
+    slog::info << "PrePostProcessor configuration:" << slog::endl;
+    slog::info << ppp << slog::endl;
+
+    ov::set_batch(model, m_config.m_max_batch_size);
+
+    m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
+    logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_config.m_model_type);
+
+    const auto& head_anchors = m_new_model ? m_config.new_anchors : m_config.old_anchors;
+    const int num_heads = head_anchors.size();
+
+    m_head_ranges.resize(num_heads + 1);
+    m_glob_anchor_map.resize(num_heads);
+    m_head_step_sizes.resize(num_heads);
+
+    m_head_ranges[0] = 0;
+    int head_shift = 0;
+
+    for (int head_id = 0; head_id < num_heads; ++head_id) {
+        m_glob_anchor_map[head_id].resize(head_anchors[head_id]);
+        int anchor_height, anchor_width;
+        for (int anchor_id = 0; anchor_id < head_anchors[head_id]; ++anchor_id) {
+            const auto glob_anchor_name = m_new_model ?
+                m_config.new_action_conf_blob_name_prefix + std::to_string(head_id + 1) +
+                    m_config.new_action_conf_blob_name_suffix + std::to_string(anchor_id + 1) :
+                m_config.old_action_conf_blob_name_prefix + std::to_string(anchor_id + 1);
+            m_glob_anchor_names.push_back(glob_anchor_name);
+
+            const auto anchor_dims = m_model.output(glob_anchor_name).get_shape();
+
+            anchor_height = m_new_model ? anchor_dims[ov::layout::height_idx(m_modelLayout)] : anchor_dims[1];
+            anchor_width = m_new_model ? anchor_dims[ov::layout::width_idx(m_modelLayout)] : anchor_dims[2];
+            std::size_t num_action_classes = m_new_model ? anchor_dims[ov::layout::channels_idx(m_modelLayout)] : anchor_dims[3];
+            if (num_action_classes != m_config.num_action_classes) {
+                throw std::logic_error("The number of specified actions and the number of actions predicted by "
+                    "the Person/Action Detection Retail model must match");
+            }
+
+            const int anchor_size = anchor_height * anchor_width;
+            head_shift += anchor_size;
+
+            m_head_step_sizes[head_id] = m_new_model ? anchor_size : 1;
+            m_glob_anchor_map[head_id][anchor_id] = m_num_glob_anchors++;
+        }
+
+        m_head_ranges[head_id + 1] = head_shift;
+        m_head_blob_sizes.emplace_back(anchor_width, anchor_height);
+    }
+
+    m_num_candidates = head_shift;
+
+    m_binary_task = m_config.num_action_classes == 2;
+}
+
+void ActionDetection::submitRequest() {
+    if (!m_enqueued_frames)
+        return;
+    m_enqueued_frames = 0;
+
+    BaseCnnDetection::submitRequest();
+}
+
+void ActionDetection::enqueue(const cv::Mat& frame) {
+    if (m_request == nullptr) {
+        m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
+    }
+
+    m_width = static_cast<float>(frame.cols);
+    m_height = static_cast<float>(frame.rows);
+
+    ov::Tensor input_tensor = m_request->get_tensor(m_input_name);
+
+    resize2tensor(frame, input_tensor);
+
+    m_enqueued_frames = 1;
+}
+
+DetectedActions ActionDetection::fetchResults() {
+    const auto loc_blob_name = m_new_model ? m_config.new_loc_blob_name : m_config.old_loc_blob_name;
+    const auto det_conf_blob_name = m_new_model ? m_config.new_det_conf_blob_name : m_config.old_det_conf_blob_name;
+
+    ov::Shape loc_out_shape = m_model.output(loc_blob_name).get_shape();
+    const cv::Mat loc_out(loc_out_shape[0],
+                          loc_out_shape[1],
+                          CV_32F,
+                          m_request->get_tensor(loc_blob_name).data<float>());
+
+    ov::Shape conf_out_shape = m_model.output(det_conf_blob_name).get_shape();
+    const cv::Mat main_conf_out(conf_out_shape[0],
+                                conf_out_shape[1],
+                                CV_32F,
+                                m_request->get_tensor(det_conf_blob_name).data<float>());
+
+    std::vector<cv::Mat> add_conf_out;
+    for (int glob_anchor_id = 0; glob_anchor_id < m_num_glob_anchors; ++glob_anchor_id) {
+        const auto& blob_name = m_glob_anchor_names[glob_anchor_id];
+        ov::Shape shape = m_request->get_tensor(blob_name).get_shape();
+        add_conf_out.emplace_back(shape[ov::layout::height_idx(m_modelLayout)],
+                                  shape[ov::layout::width_idx(m_modelLayout)],
+                                  CV_32F,
+                                  m_request->get_tensor(blob_name).data());
+    }
+
+    // Parse detections
+    DetectedActions result;
+    if (m_new_model) {
+        const cv::Mat priorbox_out;
+        result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
+                             cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
+    } else {
+        ov::Shape old_priorbox_shape = m_model.output(m_config.old_priorbox_blob_name).get_shape();
+        const cv::Mat priorbox_out((int)old_priorbox_shape[0], (int)old_priorbox_shape[1],
+            CV_32F,
+            m_request->get_tensor(m_config.old_priorbox_blob_name).data());
+
+        result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
+            cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
+    }
+
+    return result;
+}
+
+inline ActionDetection::NormalizedBBox
+ActionDetection::ParseBBoxRecord(const float* data, bool inverse) const {
+    NormalizedBBox bbox;
+    bbox.xmin = data[inverse ? 1 : 0];
+    bbox.ymin = data[inverse ? 0 : 1];
+    bbox.xmax = data[inverse ? 3 : 2];
+    bbox.ymax = data[inverse ? 2 : 3];
+    return bbox;
+}
+
+inline ActionDetection::NormalizedBBox
+ActionDetection::GeneratePriorBox(int pos, int step, const cv::Size2f& anchor,
+                                  const cv::Size& blob_size) const {
+    const int row = pos / blob_size.width;
+    const int col = pos % blob_size.width;
+
+    const float center_x = (col + 0.5f) * static_cast<float>(step);
+    const float center_y = (row + 0.5f) * static_cast<float>(step);
+
+    NormalizedBBox bbox;
+    bbox.xmin = (center_x - 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
+    bbox.ymin = (center_y - 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
+    bbox.xmax = (center_x + 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
+    bbox.ymax = (center_y + 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
+
+    return bbox;
+}
+
+cv::Rect ActionDetection::ConvertToRect(
+        const NormalizedBBox& prior_bbox, const NormalizedBBox& variances,
+        const NormalizedBBox& encoded_bbox, const cv::Size& frame_size) const {
+    // Convert prior bbox to CV_Rect
+    const float prior_width = prior_bbox.xmax - prior_bbox.xmin;
+    const float prior_height = prior_bbox.ymax - prior_bbox.ymin;
+    const float prior_center_x = 0.5f * (prior_bbox.xmin + prior_bbox.xmax);
+    const float prior_center_y = 0.5f * (prior_bbox.ymin + prior_bbox.ymax);
+
+    // Decode bbox coordinates from the SSD format
+    const float decoded_bbox_center_x =
+            variances.xmin * encoded_bbox.xmin * prior_width + prior_center_x;
+    const float decoded_bbox_center_y =
+            variances.ymin * encoded_bbox.ymin * prior_height + prior_center_y;
+    const float decoded_bbox_width =
+            static_cast<float>(exp(static_cast<float>(variances.xmax * encoded_bbox.xmax))) * prior_width;
+    const float decoded_bbox_height =
+            static_cast<float>(exp(static_cast<float>(variances.ymax * encoded_bbox.ymax))) * prior_height;
+
+    // Create decoded bbox
+    const float decoded_bbox_xmin = decoded_bbox_center_x - 0.5f * decoded_bbox_width;
+    const float decoded_bbox_ymin = decoded_bbox_center_y - 0.5f * decoded_bbox_height;
+    const float decoded_bbox_xmax = decoded_bbox_center_x + 0.5f * decoded_bbox_width;
+    const float decoded_bbox_ymax = decoded_bbox_center_y + 0.5f * decoded_bbox_height;
+
+    // Convert decoded bbox to CV_Rect
+    return cv::Rect(static_cast<int>(decoded_bbox_xmin * frame_size.width),
+                    static_cast<int>(decoded_bbox_ymin * frame_size.height),
+                    static_cast<int>((decoded_bbox_xmax - decoded_bbox_xmin) * frame_size.width),
+                    static_cast<int>((decoded_bbox_ymax - decoded_bbox_ymin) * frame_size.height));
+}
+
+DetectedActions ActionDetection::GetDetections(const cv::Mat& loc, const cv::Mat& main_conf,
+        const cv::Mat& priorboxes, const std::vector<cv::Mat>& add_conf,
+        const cv::Size& frame_size) const {
+    // Prepare input data buffers
+    const float* loc_data = reinterpret_cast<float*>(loc.data);
+    const float* det_conf_data = reinterpret_cast<float*>(main_conf.data);
+    const float* prior_data = m_new_model ? NULL : reinterpret_cast<float*>(priorboxes.data);
+
+    const int total_num_anchors = add_conf.size();
+    std::vector<float*> action_conf_data(total_num_anchors);
+    for (int i = 0; i < total_num_anchors; ++i) {
+        action_conf_data[i] = reinterpret_cast<float*>(add_conf[i].data);
+    }
+
+    // Variable to store all detection candidates
+    DetectedActions valid_detections;
+
+    // Iterate over all candidate bboxes
+    for (int p = 0; p < m_num_candidates; ++p) {
+        // Parse detection confidence from the SSD Detection output
+        const float detection_conf =
+                det_conf_data[p * NUM_DETECTION_CLASSES + POSITIVE_DETECTION_IDX];
+
+        // Skip low-confidence detections
+        if (detection_conf < m_config.detection_confidence_threshold) {
+            continue;
+        }
+
+        // Estimate the action head ID
+        int head_id = 0;
+        while (p >= m_head_ranges[head_id + 1]) {
+            ++head_id;
+        }
+        const int head_p = p - m_head_ranges[head_id];
+
+        // Estimate the action anchor ID
+        const int head_num_anchors =
+            m_new_model ? m_config.new_anchors[head_id] : m_config.old_anchors[head_id];
+        const int anchor_id = head_p % head_num_anchors;
+
+        // Estimate the action label
+        const int glob_anchor_id = m_glob_anchor_map[head_id][anchor_id];
+        const float* anchor_conf_data = action_conf_data[glob_anchor_id];
+        const int action_conf_idx_shift = m_new_model ?
+            head_p / head_num_anchors :
+            head_p / head_num_anchors * m_config.num_action_classes;
+        const int action_conf_step = m_head_step_sizes[head_id];
+        const float scale = m_new_model ? m_config.new_action_scale : m_config.old_action_scale;
+        int action_label = -1;
+        float action_max_exp_value = 0.f;
+        float action_sum_exp_values = 0.f;
+        for (size_t c = 0; c < m_config.num_action_classes; ++c) {
+            float action_exp_value =
+                std::exp(scale * anchor_conf_data[action_conf_idx_shift + c * action_conf_step]);
+            action_sum_exp_values += action_exp_value;
+            if (action_exp_value > action_max_exp_value && ((c > 0 && m_binary_task) || !m_binary_task)) {
+                action_max_exp_value = action_exp_value;
+                action_label = c;
+            }
+        }
+
+        if (std::fabs(action_sum_exp_values) < std::numeric_limits<float>::epsilon()) {
+            throw std::logic_error("action_sum_exp_values can't be equal to 0");
+        }
+        // Estimate the action confidence
+        float action_conf = action_max_exp_value / action_sum_exp_values;
+
+        // Skip low-confidence actions
+        if (action_label < 0 || action_conf < m_config.action_confidence_threshold) {
+            action_label = m_config.default_action_id;
+            action_conf = 0.f;
+        }
+
+        // Parse bbox from the SSD Detection output
+        const auto priorbox = m_new_model ?
+            GeneratePriorBox(head_p / head_num_anchors,
+                m_config.new_det_heads[head_id].step,
+                m_config.new_det_heads[head_id].anchors[anchor_id],
+                m_head_blob_sizes[head_id]) :
+            ParseBBoxRecord(prior_data + p * SSD_PRIORBOX_RECORD_SIZE, false);
+        const auto variance =
+                ParseBBoxRecord(m_new_model ?
+                    m_config.variances :
+                    prior_data + (m_num_candidates + p) * SSD_PRIORBOX_RECORD_SIZE, false);
+        const auto encoded_bbox =
+                ParseBBoxRecord(loc_data + p * SSD_LOCATION_RECORD_SIZE, m_new_model);
+
+        const auto det_rect = ConvertToRect(priorbox, variance, encoded_bbox, frame_size);
+
+        // Store detected action
+        valid_detections.emplace_back(det_rect, action_label, detection_conf, action_conf);
+    }
+
+    // Merge most overlapped detections
+    std::vector<int> out_det_indices;
+    SoftNonMaxSuppression(valid_detections, m_config.nms_sigma, m_config.keep_top_k,
+                          m_config.detection_confidence_threshold, &out_det_indices);
+
+    DetectedActions detections;
+    for (size_t i = 0; i < out_det_indices.size(); ++i) {
+        detections.emplace_back(valid_detections[out_det_indices[i]]);
+    }
+    return detections;
+}
+
+void ActionDetection::SoftNonMaxSuppression(const DetectedActions& detections,
+        const float sigma, size_t top_k, const float min_det_conf,
+        std::vector<int>* out_indices) const {
+    // Store input bbox scores
+    std::vector<float> scores(detections.size());
+    for (size_t i = 0; i < detections.size(); ++i) {
+        scores[i] = detections[i].detection_conf;
+    }
+
+    top_k = std::min(top_k, scores.size());
+
+    // Select top-k score indices
+    std::vector<size_t> score_idx(scores.size());
+    std::iota(score_idx.begin(), score_idx.end(), 0);
+    std::nth_element(score_idx.begin(), score_idx.begin() + top_k, score_idx.end(),
+        [&scores](size_t i1, size_t i2) {return scores[i1] > scores[i2];});
+
+    // Extract top-k score values
+    std::vector<float> top_scores(top_k);
+    for (size_t i = 0; i < top_scores.size(); ++i) {
+        top_scores[i] = scores[score_idx[i]];
+    }
+
+    // Carry out Soft Non-Maximum Suppression algorithm
+    out_indices->clear();
+    for (size_t step = 0; step < top_scores.size(); ++step) {
+        auto best_score_itr = std::max_element(top_scores.begin(), top_scores.end());
+        if (*best_score_itr < min_det_conf) {
+            break;
+        }
+
+        // Add current bbox to output list
+        const size_t local_anchor_idx = std::distance(top_scores.begin(), best_score_itr);
+        const int anchor_idx = score_idx[local_anchor_idx];
+        out_indices->emplace_back(anchor_idx);
+        *best_score_itr = 0.f;
+
+        // Update top_scores of the rest bboxes
+        for (size_t local_reference_idx = 0; local_reference_idx < top_scores.size(); ++local_reference_idx) {
+            // Skip updating step for the low-confidence bbox
+            if (top_scores[local_reference_idx] < min_det_conf) {
+                continue;
+            }
+
+            // Calculate the Intersection over Union metric between two bboxes
+            const size_t reference_idx = score_idx[local_reference_idx];
+            const auto& rect1 = detections[anchor_idx].rect;
+            const auto& rect2 = detections[reference_idx].rect;
+            const auto intersection = rect1 & rect2;
+            float overlap = 0.f;
+            if (intersection.width > 0 && intersection.height > 0) {
+                const int intersection_area = intersection.area();
+                overlap = static_cast<float>(intersection_area) / static_cast<float>(rect1.area() + rect2.area() - intersection_area);
+            }
+
+            // Scale bbox score using the exponential rule
+            top_scores[local_reference_idx] *= std::exp(-overlap * overlap / sigma);
+        }
+    }
+}
--- a/engines/OpenVINOEngine/src/faceapp/align_transform.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/align_transform.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <vector>
+#include <limits>
+
+#include <opencv2/imgproc.hpp>
+
+#include "face_reid.hpp"
+
+static const float h = 112.;
+static const float w = 96.;
+// reference landmarks points in the unit square [0,1]x[0,1]
+static const float ref_landmarks_normalized[] = {
+    30.2946f / w, 51.6963f / h, 65.5318f / w, 51.5014f / h, 48.0252f / w,
+    71.7366f / h, 33.5493f / w, 92.3655f / h, 62.7299f / w, 92.2041f / h};
+
+cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst) {
+    cv::Mat col_mean_src;
+    reduce(*src, col_mean_src, 0, cv::REDUCE_AVG);
+    for (int i = 0; i < src->rows; i++) {
+        src->row(i) -= col_mean_src;
+    }
+
+    cv::Mat col_mean_dst;
+    reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG);
+    for (int i = 0; i < dst->rows; i++) {
+        dst->row(i) -= col_mean_dst;
+    }
+
+    cv::Scalar mean, dev_src, dev_dst;
+    cv::meanStdDev(*src, mean, dev_src);
+    dev_src(0) =
+            std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_src(0));
+    *src /= dev_src(0);
+    cv::meanStdDev(*dst, mean, dev_dst);
+    dev_dst(0) =
+            std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_dst(0));
+    *dst /= dev_dst(0);
+
+    cv::Mat w, u, vt;
+    cv::SVD::compute((*src).t() * (*dst), w, u, vt);
+    cv::Mat r = (u * vt).t();
+    cv::Mat m(2, 3, CV_32F);
+    m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0));
+    m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t());
+    return m;
+}
+
+void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
+    if (landmarks_vec->size() == 0) {
+        return;
+    }
+    CV_Assert(face_images->size() == landmarks_vec->size());
+    cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
+
+    for (size_t j = 0; j < face_images->size(); j++) {
+        for (int i = 0; i < ref_landmarks.rows; i++) {
+            ref_landmarks.at<float>(i, 0) =
+                    ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
+            ref_landmarks.at<float>(i, 1) =
+                    ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
+            landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
+            landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
+        }
+        cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
+        cv::warpAffine(face_images->at(j), face_images->at(j), m,
+                       face_images->at(j).size(), cv::WARP_INVERSE_MAP);
+    }
+}
--- a/engines/OpenVINOEngine/src/faceapp/cnn.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/cnn.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include "openvino/openvino.hpp"
+#include "cnn.hpp"
+
+CnnDLSDKBase::CnnDLSDKBase(const Config& config) : m_config(config) {}
+
+void CnnDLSDKBase::Load() {
+    std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
+    ov::OutputVector inputs = model->inputs();
+    if (inputs.size() != 1) {
+        return;
+    }
+    m_desired_layout = {"NHWC"};
+    ov::Layout model_layout = ov::layout::get_layout(model->input());
+    if (model_layout.empty()) {
+        model_layout = {"NCHW"};
+    }
+    ov::OutputVector outputs = model->outputs();
+    for (auto& item : outputs) {
+        const std::string name = item.get_any_name();
+        m_output_tensors_names.push_back(name);
+    }
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor()
+        .set_element_type(ov::element::u8)
+        .set_layout(m_desired_layout);
+    ppp.input().preprocess()
+        .convert_layout(model_layout)
+        .convert_element_type(ov::element::f32);
+    ppp.input().model().set_layout(model_layout);
+
+    model = ppp.build();
+    m_modelShape = model->input().get_shape();
+    m_modelShape[ov::layout::batch_idx(m_desired_layout)] = m_config.m_max_batch_size;
+    ov::set_batch(model, {1, m_config.m_max_batch_size});
+    m_compiled_model = m_config.m_core.compile_model(model, m_config.m_deviceName, { ov::hint::num_requests(1) });
+    m_infer_request = m_compiled_model.create_infer_request();
+    m_in_tensor = m_infer_request.get_input_tensor();
+    m_in_tensor.set_shape(m_modelShape);
+}
+
+void CnnDLSDKBase::InferBatch(const std::vector<cv::Mat>& frames,const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& fetch_results) 
+{
+    size_t num_imgs = frames.size();
+    size_t h = m_modelShape[ov::layout::height_idx(m_desired_layout)];
+    size_t w = m_modelShape[ov::layout::width_idx(m_desired_layout)];
+    size_t c = m_modelShape[ov::layout::channels_idx(m_desired_layout)];
+    for (size_t i = 0; i < num_imgs; i++) {
+        auto slice = ov::Tensor{ m_in_tensor, {i, 0, 0, 0}, {i + 1, h, w, c} };
+        resize2tensor(frames[i], slice);
+    }
+    m_infer_request.set_input_tensor(ov::Tensor{ m_in_tensor, { 0, 0, 0, 0 }, {num_imgs, h, w, c} });
+    m_infer_request.infer();
+
+    std::map<std::string, ov::Tensor> output_tensors;
+    for (const auto& output_tensor_name : m_output_tensors_names) {
+        output_tensors[output_tensor_name] = m_infer_request.get_tensor(output_tensor_name);
+    }
+
+    fetch_results(output_tensors, num_imgs);
+}
+
+VectorCNN::VectorCNN(const Config& config) : CnnDLSDKBase(config) {
+    Load();
+}
+
+void VectorCNN::Compute(const cv::Mat& frame, cv::Mat* vector, cv::Size outp_shape) {
+    std::vector<cv::Mat> output;
+    Compute({frame}, &output, outp_shape);
+    *vector = output[0];
+}
+
+void VectorCNN::Compute(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* vectors, 
+                        cv::Size outp_shape) {
+    if (images.empty()) {
+        return;
+    }
+    vectors->clear();
+    auto results_fetcher = [vectors, outp_shape](const std::map<std::string, ov::Tensor>& outputs, size_t batch_size) {
+        for (auto& output : outputs) {
+            ov::Tensor tensor = output.second;
+            ov::Shape shape = tensor.get_shape();
+            std::vector<int> tensor_sizes(shape.size(), 0);
+            for (size_t i = 0; i < tensor_sizes.size(); ++i) {
+                tensor_sizes[i] = shape[i];
+            }
+            cv::Mat out_tensor(tensor_sizes, CV_32F, tensor.data<float>());
+            for (size_t b = 0; b < batch_size; b++) {
+                cv::Mat tensor_wrapper(out_tensor.size[1], 1, CV_32F,
+                                     reinterpret_cast<void*>((out_tensor.ptr<float>(0) + b * out_tensor.size[1])));
+                vectors->emplace_back();
+                if (!isSizeEmpty(outp_shape))
+                    tensor_wrapper = tensor_wrapper.reshape(1, outp_shape.height);
+                tensor_wrapper.copyTo(vectors->back());
+            }
+        }
+    };
+    InferBatch(images, results_fetcher);
+}
+int VectorCNN::maxBatchSize() const {
+    return m_config.m_max_batch_size;
+}
--- a/engines/OpenVINOEngine/src/faceapp/detector.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/detector.cpp
@@ -0,0 +1,176 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <string>
+#include <map>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "openvino/openvino.hpp"
+
+#include "detector.hpp"
+
+#define SSD_EMPTY_DETECTIONS_INDICATOR -1.0
+
+using namespace detection;
+
+namespace {
+cv::Rect TruncateToValidRect(const cv::Rect& rect, const cv::Size& size) {
+    auto tl = rect.tl(), br = rect.br();
+    tl.x = std::max(0, std::min(size.width - 1, tl.x));
+    tl.y = std::max(0, std::min(size.height - 1, tl.y));
+    br.x = std::max(0, std::min(size.width, br.x));
+    br.y = std::max(0, std::min(size.height, br.y));
+    int w = std::max(0, br.x - tl.x);
+    int h = std::max(0, br.y - tl.y);
+    return cv::Rect(tl.x, tl.y, w, h);
+}
+
+cv::Rect IncreaseRect(const cv::Rect& r, float coeff_x, float coeff_y) {
+    cv::Point2f tl = r.tl();
+    cv::Point2f br = r.br();
+    cv::Point2f c = (tl * 0.5f) + (br * 0.5f);
+    cv::Point2f diff = c - tl;
+    cv::Point2f new_diff{diff.x * coeff_x, diff.y * coeff_y};
+    cv::Point2f new_tl = c - new_diff;
+    cv::Point2f new_br = c + new_diff;
+
+    cv::Point new_tl_int {static_cast<int>(std::floor(new_tl.x)), static_cast<int>(std::floor(new_tl.y))};
+    cv::Point new_br_int {static_cast<int>(std::ceil(new_br.x)), static_cast<int>(std::ceil(new_br.y))};
+
+    return cv::Rect(new_tl_int, new_br_int);
+}
+}  // namespace
+
+FaceDetection::FaceDetection(const DetectorConfig& config) :
+        BaseCnnDetection(config.is_async), m_config(config) {
+    m_detectorName = "Face Detection";
+
+    slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
+    std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
+    logBasicModelInfo(model);
+
+    ov::Layout desiredLayout = {"NHWC"};
+
+    ov::OutputVector inputs_info = model->inputs();
+    if (inputs_info.size() != 1) {
+        throw std::runtime_error("Face Detection network should have only one input");
+    }
+
+    ov::OutputVector outputs_info = model->outputs();
+    if (outputs_info.size() != 1) {
+        throw std::runtime_error("Face Detection network should have only one output");
+    }
+
+    ov::Output<ov::Node> input = model->input();
+    m_input_name = input.get_any_name();
+
+    ov::Layout modelLayout = ov::layout::get_layout(input);
+    if (modelLayout.empty())
+        modelLayout = {"NCHW"};
+
+    ov::Shape shape = input.get_shape();
+    shape[ov::layout::height_idx(modelLayout)] = m_config.input_h;
+    shape[ov::layout::width_idx(modelLayout)] = m_config.input_w;
+
+    ov::Output<ov::Node> output = model->output();
+    m_output_name = output.get_any_name();
+
+    ov::Shape outputDims = output.get_shape();
+    m_max_detections_count = outputDims[2];
+    m_object_size = outputDims[3];
+    if (m_object_size != 7) {
+        throw std::runtime_error("Face Detection network output layer should have 7 as a last dimension");
+    }
+    if (outputDims.size() != 4) {
+        throw std::runtime_error("Face Detection network output should have 4 dimensions, but had " +
+            std::to_string(outputDims.size()));
+    }
+
+    std::map<std::string, ov::PartialShape> input_shapes;
+    input_shapes[input.get_any_name()] = shape;
+    model->reshape(input_shapes);
+
+    ov::preprocess::PrePostProcessor ppp(model);
+
+    ppp.input().tensor()
+        .set_element_type(ov::element::u8)
+        .set_layout(desiredLayout);
+    ppp.input().preprocess()
+        .convert_layout(modelLayout)
+        .convert_element_type(ov::element::f32);
+    ppp.input().model().set_layout(modelLayout);
+
+    model = ppp.build();
+
+    slog::info << "PrePostProcessor configuration:" << slog::endl;
+    slog::info << ppp << slog::endl;
+
+    m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
+    logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_detectorName);
+}
+
+void FaceDetection::submitRequest() {
+    if (!m_enqueued_frames)
+        return;
+    m_enqueued_frames = 0;
+
+    BaseCnnDetection::submitRequest();
+}
+
+void FaceDetection::enqueue(const cv::Mat& frame) {
+    if (m_request == nullptr) {
+        m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
+    }
+
+    m_width = static_cast<float>(frame.cols);
+    m_height = static_cast<float>(frame.rows);
+
+    ov::Tensor inputTensor = m_request->get_tensor(m_input_name);
+
+    resize2tensor(frame, inputTensor);
+
+    m_enqueued_frames = 1;
+}
+
+DetectedObjects FaceDetection::fetchResults() {
+    DetectedObjects results;
+    const float* data = m_request->get_tensor(m_output_name).data<float>();
+
+    for (int det_id = 0; det_id < m_max_detections_count; ++det_id) {
+        const int start_pos = det_id * m_object_size;
+
+        const float batchID = data[start_pos];
+        if (batchID == SSD_EMPTY_DETECTIONS_INDICATOR) {
+            break;
+        }
+
+        const float score = std::min(std::max(0.0f, data[start_pos + 2]), 1.0f);
+        const float x0 = std::min(std::max(0.0f, data[start_pos + 3]), 1.0f) * m_width;
+        const float y0 = std::min(std::max(0.0f, data[start_pos + 4]), 1.0f) * m_height;
+        const float x1 = std::min(std::max(0.0f, data[start_pos + 5]), 1.0f) * m_width;
+        const float y1 = std::min(std::max(0.0f, data[start_pos + 6]), 1.0f) * m_height;
+
+        DetectedObject object;
+        object.confidence = score;
+        object.rect = cv::Rect(cv::Point(static_cast<int>(round(static_cast<double>(x0))),
+                                         static_cast<int>(round(static_cast<double>(y0)))),
+                               cv::Point(static_cast<int>(round(static_cast<double>(x1))),
+                                         static_cast<int>(round(static_cast<double>(y1)))));
+
+
+        object.rect = TruncateToValidRect(IncreaseRect(object.rect,
+                                                       m_config.increase_scale_x,
+                                                       m_config.increase_scale_y),
+                                          cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
+
+        if (object.confidence > m_config.confidence_threshold && object.rect.area() > 0) {
+            results.emplace_back(object);
+        }
+    }
+
+    return results;
+}
--- a/engines/OpenVINOEngine/src/faceapp/logger.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/logger.cpp
@@ -0,0 +1,166 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <iomanip>
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <fstream>
+
+#include "logger.hpp"
+
+namespace {
+
+const char unknown_label[] = "Unknown";
+
+std::string GetUnknownOrLabel(const std::vector<std::string>& labels, int idx)  {
+    return idx >= 0 ? labels.at(idx) : unknown_label;
+}
+
+std::string FrameIdxToString(const std::string& path, int frame_idx) {
+    std::stringstream ss;
+    ss << std::setw(6) << std::setfill('0') << frame_idx;
+    return path.substr(path.rfind("/") + 1) + "@" + ss.str();
+}
+}  // anonymous namespace
+
+DetectionsLogger::DetectionsLogger(slog::LogStream& stream, bool enabled,
+                                   const std::string& act_stat_log_file,
+                                   const std::string& act_det_log_file) :
+    m_log_stream(stream) {
+    m_write_logs = enabled;
+    m_act_stat_log_stream.open(act_stat_log_file, std::fstream::out);
+
+    if (!act_det_log_file.empty()) {
+        m_act_det_log_stream.open(act_det_log_file, cv::FileStorage::WRITE);
+
+        m_act_det_log_stream << "data" << "[";
+    }
+}
+
+void DetectionsLogger::CreateNextFrameRecord(
+    const std::string& path, const int frame_idx, const size_t width, const size_t height) {
+    if (m_write_logs)
+        m_log_stream << "Frame_name: " << path << "@" << frame_idx << " width: "
+                     << width << " height: " << height << slog::endl;
+}
+
+void DetectionsLogger::AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action) {
+    if (m_write_logs) {
+        m_log_stream << "Object type: face. Box: " << rect << " id: " << id;
+        if (!action.empty()) {
+            m_log_stream << " action: " << action;
+        }
+        m_log_stream << slog::endl;
+    }
+}
+
+void DetectionsLogger::AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id) {
+    if (m_write_logs) {
+        m_log_stream << "Object type: person. Box: " << rect << " action: " << action;
+        if (!id.empty()) {
+            m_log_stream << " id: " << id;
+        }
+        m_log_stream << slog::endl;
+    }
+}
+
+void DetectionsLogger::AddDetectionToFrame(const TrackedObject& object, const int frame_idx) {
+    if (m_act_det_log_stream.isOpened()) {
+        m_act_det_log_stream << "{" << "frame_id" << frame_idx
+                             << "det_conf" << object.confidence
+                             << "label" << object.label
+                             << "rect" << object.rect << "}";
+    }
+}
+
+void DetectionsLogger::FinalizeFrameRecord() {
+    if (m_write_logs) {
+        m_log_stream << slog::endl;
+    }
+}
+
+void DetectionsLogger::DumpDetections(const std::string& video_path,
+                                      const cv::Size frame_size,
+                                      const size_t num_frames,
+                                      const std::vector<Track>& face_tracks,
+                                      const std::map<int, int>& track_id_to_label_faces,
+                                      const std::vector<std::string>& action_idx_to_label,
+                                      const std::vector<std::string>& person_id_to_label,
+                                      const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps)  {
+    std::map<int, std::vector<const TrackedObject*>> frame_idx_to_face_track_objs;
+
+    for (const auto& tr : face_tracks) {
+        for (const auto& obj : tr.objects) {
+            frame_idx_to_face_track_objs[obj.frame_idx].emplace_back(&obj);
+        }
+    }
+
+    std::map<std::string, std::string> face_label_to_action;
+    for (const auto& label : person_id_to_label) {
+        face_label_to_action[label] = unknown_label;
+    }
+    m_act_stat_log_stream << "frame_idx";
+    for (const auto& label : person_id_to_label) {
+        m_act_stat_log_stream << "," << label;
+    }
+    m_act_stat_log_stream << std::endl;
+
+    for (size_t i = 0; i < num_frames; i++)  {
+        CreateNextFrameRecord(video_path, i, frame_size.width, frame_size.height);
+        const auto& frame_face_obj_id_to_action = frame_face_obj_id_to_action_maps.at(i);
+        for (auto& kv : face_label_to_action) {
+            kv.second = unknown_label;
+        }
+
+        for (const auto& p_obj : frame_idx_to_face_track_objs[i]) {
+            const auto& obj = *p_obj;
+            std::string action_label = unknown_label;
+            if (frame_face_obj_id_to_action.count(obj.object_id) > 0) {
+                action_label = GetUnknownOrLabel(action_idx_to_label, frame_face_obj_id_to_action.at(obj.object_id));
+            }
+            std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj.object_id));
+            face_label_to_action[face_label] = action_label;
+            AddFaceToFrame(obj.rect, face_label, action_label);
+        }
+
+        m_act_stat_log_stream << FrameIdxToString(video_path, i);
+        for (const auto& label : person_id_to_label) {
+            m_act_stat_log_stream << "," << face_label_to_action[label];
+        }
+        m_act_stat_log_stream << std::endl;
+
+        FinalizeFrameRecord();
+    }
+}
+
+void DetectionsLogger::DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
+                                  const std::vector<std::string>& action_idx_to_label,
+                                  const std::map<int, int>& track_id_to_label_faces,
+                                  const std::vector<std::string>& person_id_to_label) {
+    for (const auto& tup : obj_id_to_events) {
+        const int obj_id = tup.first;
+        if (track_id_to_label_faces.count(obj_id) > 0) {
+            const auto& events = tup.second;
+
+            std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj_id));
+            m_log_stream << "Person: " << face_label << slog::endl;
+
+            for (const auto& event : events) {
+                std::string action_label = GetUnknownOrLabel(action_idx_to_label, event.action);
+                m_log_stream << "   - " << action_label
+                             << ": from " << event.begin_frame_id
+                             << " to " << event.end_frame_id
+                             << " frames" << slog::endl;
+            }
+        }
+    }
+}
+
+DetectionsLogger::~DetectionsLogger() {
+    if (m_act_det_log_stream.isOpened()) {
+        m_act_det_log_stream << "]";
+    }
+}
--- a/engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
+++ b/engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
@@ -0,0 +1,186 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <limits>
+
+#include <opencv2/opencv.hpp>
+
+#include "face_reid.hpp"
+#include "tracker/tracker.hpp"
+
+namespace {
+    float ComputeReidDistance(const cv::Mat& descr1, const cv::Mat& descr2) {
+        float xy = static_cast<float>(descr1.dot(descr2));
+        float xx = static_cast<float>(descr1.dot(descr1));
+        float yy = static_cast<float>(descr2.dot(descr2));
+        float norm = sqrt(xx * yy) + 1e-6f;
+        return 1.0f - xy / norm;
+    }
+
+    bool file_exists(const std::string& name) {
+        std::ifstream f(name.c_str());
+        return f.good();
+    }
+
+    inline char separator() {
+        #ifdef _WIN32
+        return '\\';
+        #else
+        return '/';
+        #endif
+    }
+
+    std::string folder_name(const std::string& path) {
+        size_t found_pos;
+        found_pos = path.find_last_of(separator());
+        if (found_pos != std::string::npos)
+            return path.substr(0, found_pos);
+        return std::string(".") + separator();
+    }
+
+}  // namespace
+
+const char EmbeddingsGallery::unknown_label[] = "Unknown";
+const int EmbeddingsGallery::unknown_id = TrackedObject::UNKNOWN_LABEL_IDX;
+
+EmbeddingsGallery::EmbeddingsGallery(const std::string& ids_list,
+                                     double threshold, int min_size_fr,
+                                     bool crop_gallery, const detection::DetectorConfig& detector_config,
+                                     VectorCNN& landmarks_det,
+                                     VectorCNN& image_reid,
+                                     bool use_greedy_matcher) :
+    reid_threshold(threshold), use_greedy_matcher(use_greedy_matcher) {
+    if (ids_list.empty()) {
+        return;
+    }
+
+    detection::FaceDetection detector(detector_config);
+
+    cv::FileStorage fs(ids_list, cv::FileStorage::Mode::READ);
+    cv::FileNode fn = fs.root();
+    int id = 0;
+    for (cv::FileNodeIterator fit = fn.begin(); fit != fn.end(); ++fit) {
+        cv::FileNode item = *fit;
+        std::string label = item.name();
+        std::vector<cv::Mat> embeddings;
+
+        // Please, note that the case when there are more than one image in gallery
+        // for a person might not work properly with the current implementation
+        // of the demo.
+        // Remove this assert by your own risk.
+        CV_Assert(item.size() == 1);
+
+        for (size_t i = 0; i < item.size(); i++) {
+            std::string path;
+            if (file_exists(item[i].string())) {
+                path = item[i].string();
+            } else {
+                path = folder_name(ids_list) + separator() + item[i].string();
+            }
+
+            cv::Mat image = cv::imread(path);
+            CV_Assert(!image.empty());
+            cv::Mat emb;
+            RegistrationStatus status = RegisterIdentity(label, image, min_size_fr, crop_gallery,  detector, landmarks_det, image_reid, emb);
+            if (status == RegistrationStatus::SUCCESS) {
+                embeddings.push_back(emb);
+                idx_to_id.push_back(id);
+                identities.emplace_back(embeddings, label, id);
+                ++id;
+            }
+        }
+    }
+}
+
+std::vector<int> EmbeddingsGallery::GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const {
+    if (embeddings.empty() || idx_to_id.empty())
+        return std::vector<int>(embeddings.size(), unknown_id);
+
+    cv::Mat distances(static_cast<int>(embeddings.size()), static_cast<int>(idx_to_id.size()), CV_32F);
+
+    for (int i = 0; i < distances.rows; i++) {
+        int k = 0;
+        for (size_t j = 0; j < identities.size(); j++) {
+            for (const auto& reference_emb : identities[j].embeddings) {
+                distances.at<float>(i, k) = ComputeReidDistance(embeddings[i], reference_emb);
+                k++;
+            }
+        }
+    }
+    KuhnMunkres matcher(use_greedy_matcher);
+    auto matched_idx = matcher.Solve(distances);
+    std::vector<int> output_ids;
+    for (auto col_idx : matched_idx) {
+        if (int(col_idx) == -1) {
+            output_ids.push_back(unknown_id);
+            continue;
+        }
+        if (distances.at<float>(output_ids.size(), col_idx) > reid_threshold)
+            output_ids.push_back(unknown_id);
+        else
+            output_ids.push_back(idx_to_id[col_idx]);
+    }
+    return output_ids;
+}
+
+std::string EmbeddingsGallery::GetLabelByID(int id) const {
+    if (id >= 0 && id < static_cast<int>(identities.size()))
+        return identities[id].label;
+    else
+        return unknown_label;
+}
+
+size_t EmbeddingsGallery::size() const {
+    return identities.size();
+}
+
+std::vector<std::string> EmbeddingsGallery::GetIDToLabelMap() const {
+    std::vector<std::string> map;
+    map.reserve(identities.size());
+    for (const auto& item : identities) {
+        map.emplace_back(item.label);
+    }
+    return map;
+}
+
+bool EmbeddingsGallery::LabelExists(const std::string& label) const {
+    return identities.end() != std::find_if(identities.begin(), identities.end(),
+                                        [label](const GalleryObject& o){return o.label == label;});
+}
+
+RegistrationStatus EmbeddingsGallery::RegisterIdentity(const std::string& identity_label,
+    const cv::Mat& image,
+    int min_size_fr, bool crop_gallery,
+    detection::FaceDetection& detector,
+    VectorCNN& landmarks_det,
+    VectorCNN& image_reid,
+    cv::Mat& embedding) {
+    cv::Mat target = image;
+    if (crop_gallery) {
+        detector.enqueue(image);
+        detector.submitRequest();
+        detector.wait();
+        detection::DetectedObjects faces = detector.fetchResults();
+        if (faces.size() == 0) {
+            return RegistrationStatus::FAILURE_NOT_DETECTED;
+        }
+        CV_Assert(faces.size() == 1);
+        cv::Mat face_roi = image(faces[0].rect);
+        target = face_roi;
+    }
+    if ((target.rows < min_size_fr) && (target.cols < min_size_fr)) {
+        return RegistrationStatus::FAILURE_LOW_QUALITY;
+    }
+    cv::Mat landmarks;
+    landmarks_det.Compute(target, &landmarks, cv::Size(2, 5));
+    std::vector<cv::Mat> images = { target };
+    std::vector<cv::Mat> landmarks_vec = { landmarks };
+    AlignFaces(&images, &landmarks_vec);
+    image_reid.Compute(images[0], &embedding);
+    return RegistrationStatus::SUCCESS;
+}
--- a/engines/OpenVINOEngine/src/models/associative_embedding_decoder.cpp
+++ b/engines/OpenVINOEngine/src/models/associative_embedding_decoder.cpp
@@ -0,0 +1,201 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/associative_embedding_decoder.h"
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include <utils/kuhn_munkres.hpp>
+
+void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
+               const std::vector<cv::Mat>& aembdsMaps,
+               std::vector<std::vector<Peak>>& allPeaks,
+               size_t jointId,
+               size_t maxNumPeople,
+               float detectionThreshold) {
+    const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId];
+    const float* heatMapData = nmsHeatMap.ptr<float>();
+    cv::Size outputSize = nmsHeatMap.size();
+
+    std::vector<int> indices(outputSize.area());
+    std::iota(std::begin(indices), std::end(indices), 0);
+    std::partial_sort(std::begin(indices),
+                      std::begin(indices) + maxNumPeople,
+                      std::end(indices),
+                      [heatMapData](int l, int r) {
+                          return heatMapData[l] > heatMapData[r];
+                      });
+
+    for (size_t personId = 0; personId < maxNumPeople; personId++) {
+        int index = indices[personId];
+        int x = index / outputSize.width;
+        int y = index % outputSize.width;
+        float tag = aembdsMaps[jointId].at<float>(x, y);
+        float score = heatMapData[index];
+        allPeaks[jointId].reserve(maxNumPeople);
+        if (score > detectionThreshold) {
+            allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast<float>(x), static_cast<float>(y)), score, tag});
+        }
+    }
+}
+
+std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
+                             size_t maxNumPeople,
+                             size_t numJoints,
+                             float tagThreshold) {
+    size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16};
+    std::vector<Pose> allPoses;
+    for (size_t jointId : jointOrder) {
+        std::vector<Peak>& jointPeaks = allPeaks[jointId];
+        std::vector<float> tags;
+        for (auto& peak : jointPeaks) {
+            tags.push_back(peak.tag);
+        }
+        if (allPoses.empty()) {
+            for (size_t personId = 0; personId < jointPeaks.size(); personId++) {
+                Peak peak = jointPeaks[personId];
+                Pose pose = Pose(numJoints);
+                pose.add(jointId, peak);
+                allPoses.push_back(pose);
+            }
+            continue;
+        }
+        if (jointPeaks.empty() || (allPoses.size() == maxNumPeople)) {
+            continue;
+        }
+        std::vector<float> posesTags;
+        std::vector<cv::Point2f> posesCenters;
+        for (auto& pose : allPoses) {
+            posesTags.push_back(pose.getPoseTag());
+            posesCenters.push_back(pose.getPoseCenter());
+        }
+        size_t numAdded = tags.size();
+        size_t numGrouped = posesTags.size();
+        cv::Mat tagsDiff(numAdded, numGrouped, CV_32F);
+        cv::Mat matchingCost(numAdded, numGrouped, CV_32F);
+        std::vector<float> dists(numAdded);
+        for (size_t j = 0; j < numGrouped; j++) {
+            float minDist = std::numeric_limits<float>::max();
+            // Compute euclidean distance (in spatial space) between the pose center and all joints.
+            const cv::Point2f center = posesCenters.at(j);
+            for (size_t i = 0; i < numAdded; i++) {
+                cv::Point2f v = jointPeaks.at(i).keypoint - center;
+                float dist = std::sqrt(v.x * v.x + v.y * v.y);
+                dists[i] = dist;
+                minDist = std::min(dist, minDist);
+            }
+            // Compute semantic distance (in embedding space) between the pose tag and all joints
+            // and corresponding matching costs.
+            auto poseTag = posesTags[j];
+            for (size_t i = 0; i < numAdded; i++) {
+                float diff = static_cast<float>(cv::norm(tags[i] - poseTag));
+                tagsDiff.at<float>(i, j) = diff;
+                if (diff < tagThreshold) {
+                    diff *= dists[i] / (minDist + 1e-10f);
+                }
+                matchingCost.at<float>(i, j) = std::round(diff) * 100 - jointPeaks[i].score;
+            }
+        }
+
+        if (numAdded > numGrouped) {
+            cv::copyMakeBorder(matchingCost,
+                               matchingCost,
+                               0,
+                               0,
+                               0,
+                               numAdded - numGrouped,
+                               cv::BORDER_CONSTANT,
+                               10000000);
+        }
+        // Get pairs
+        auto res = KuhnMunkres().Solve(matchingCost);
+        for (size_t row = 0; row < res.size(); row++) {
+            size_t col = res[row];
+            if (row < numAdded && col < numGrouped && tagsDiff.at<float>(row, col) < tagThreshold) {
+                allPoses[col].add(jointId, jointPeaks[row]);
+            } else {
+                Pose pose = Pose(numJoints);
+                pose.add(jointId, jointPeaks[row]);
+                allPoses.push_back(pose);
+            }
+        }
+    }
+    return allPoses;
+}
+
+namespace {
+cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) {
+    cv::Point2f delta(0.f, 0.f);
+    int width = heatMap.cols;
+    int height = heatMap.rows;
+    if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) {
+        auto diffX = heatMap.at<float>(y, x + 1) - heatMap.at<float>(y, x - 1);
+        auto diffY = heatMap.at<float>(y + 1, x) - heatMap.at<float>(y - 1, x);
+        delta.x = diffX > 0 ? 0.25f : -0.25f;
+        delta.y = diffY > 0 ? 0.25f : -0.25f;
+    }
+    return delta;
+}
+}  // namespace
+
+void adjustAndRefine(std::vector<Pose>& allPoses,
+                     const std::vector<cv::Mat>& heatMaps,
+                     const std::vector<cv::Mat>& aembdsMaps,
+                     int poseId,
+                     const float delta) {
+    Pose& pose = allPoses[poseId];
+    float poseTag = pose.getPoseTag();
+    for (size_t jointId = 0; jointId < pose.size(); jointId++) {
+        Peak& peak = pose.getPeak(jointId);
+        const cv::Mat& heatMap = heatMaps[jointId];
+        const cv::Mat& aembds = aembdsMaps[jointId];
+
+        if (peak.score > 0) {
+            // Adjust
+            int x = static_cast<int>(peak.keypoint.x);
+            int y = static_cast<int>(peak.keypoint.y);
+            peak.keypoint += adjustLocation(x, y, heatMap);
+            if (delta) {
+                peak.keypoint.x += delta;
+                peak.keypoint.y += delta;
+            }
+        } else {
+            // Refine
+            // Get position with the closest tag value to the pose tag
+            cv::Mat diff = cv::abs(aembds - poseTag);
+            diff.convertTo(diff, CV_32S, 1.0, 0.0);
+            diff.convertTo(diff, CV_32F);
+            diff -= heatMap;
+            double min;
+            cv::Point2i minLoc;
+            cv::minMaxLoc(diff, &min, 0, &minLoc);
+            int x = minLoc.x;
+            int y = minLoc.y;
+            float val = heatMap.at<float>(y, x);
+            if (val > 0) {
+                peak.keypoint.x = static_cast<float>(x);
+                peak.keypoint.y = static_cast<float>(y);
+                peak.keypoint += adjustLocation(x, y, heatMap);
+                // Peak score is assigned directly, so it does not affect the pose score.
+                peak.score = val;
+            }
+        }
+    }
+}
--- a/engines/OpenVINOEngine/src/models/classification_model.cpp
+++ b/engines/OpenVINOEngine/src/models/classification_model.cpp
@@ -0,0 +1,196 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/classification_model.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iterator>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/op/softmax.hpp>
+#include <openvino/op/topk.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/slog.hpp>
+
+#include "models/results.h"
+
+ClassificationModel::ClassificationModel(const std::string& modelFileName,
+                                         size_t nTop,
+                                         bool useAutoResize,
+                                         const std::vector<std::string>& labels,
+                                         const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout),
+      nTop(nTop),
+      labels(labels) {}
+
+std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
+    const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
+    const int* indicesPtr = indicesTensor.data<int>();
+    const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    result->topLabels.reserve(scoresTensor.get_size());
+    for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
+        int ind = indicesPtr[i];
+        if (ind < 0 || ind >= static_cast<int>(labels.size())) {
+            throw std::runtime_error("Invalid index for the class label is found during postprocessing");
+        }
+        result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]);
+    }
+
+    return retVal;
+}
+
+std::vector<std::string> ClassificationModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labels;
+
+    /* Read labels */
+    std::ifstream inputFile(labelFilename);
+    if (!inputFile.is_open())
+        throw std::runtime_error("Can't open the labels file: " + labelFilename);
+    std::string labelsLine;
+    while (std::getline(inputFile, labelsLine)) {
+        size_t labelBeginIdx = labelsLine.find(' ');
+        size_t labelEndIdx = labelsLine.find(',');  // can be npos when class has only one label
+        if (labelBeginIdx == std::string::npos) {
+            throw std::runtime_error("The labels file has incorrect format.");
+        }
+        labels.push_back(labelsLine.substr(labelBeginIdx + 1, labelEndIdx - (labelBeginIdx + 1)));
+    }
+    if (labels.empty())
+        throw std::logic_error("File is empty: " + labelFilename);
+
+    return labels;
+}
+
+void ClassificationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Classification model wrapper supports topologies with only 1 input");
+    }
+    const auto& input = model->input();
+    inputsNames.push_back(input.get_any_name());
+
+    const ov::Shape& inputShape = input.get_shape();
+    const ov::Layout& inputLayout = getInputLayout(input);
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    const auto width = inputShape[ov::layout::width_idx(inputLayout)];
+    const auto height = inputShape[ov::layout::height_idx(inputLayout)];
+    if (height != width) {
+        throw std::logic_error("Model input has incorrect image shape. Must be NxN square."
+                               " Got " +
+                               std::to_string(height) + "x" + std::to_string(width) + ".");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({ "NHWC" });
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Classification model wrapper supports topologies with only 1 output");
+    }
+
+    const ov::Shape& outputShape = model->output().get_shape();
+    if (outputShape.size() != 2 && outputShape.size() != 4) {
+        throw std::logic_error("Classification model wrapper supports topologies only with"
+                               " 2-dimensional or 4-dimensional output");
+    }
+
+    const ov::Layout outputLayout("NCHW");
+    if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 ||
+                                    outputShape[ov::layout::width_idx(outputLayout)] != 1)) {
+        throw std::logic_error("Classification model wrapper supports topologies only"
+                               " with 4-dimensional output which has last two dimensions of size 1");
+    }
+
+    size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)];
+    if (nTop > classesNum) {
+        throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " +
+                               std::to_string(nTop) + " labels are requested to be predicted");
+    }
+    if (classesNum == labels.size() + 1) {
+        labels.insert(labels.begin(), "other");
+        slog::warn << "Inserted 'other' label as first." << slog::endl;
+    } else if (classesNum != labels.size()) {
+        throw std::logic_error("Model's number of classes and parsed labels must match (" +
+                               std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')');
+    }
+
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    // --------------------------- Adding softmax and topK output  ---------------------------
+    auto nodes = model->get_ops();
+    auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr<ov::Node>& op) {
+        return std::string(op->get_type_name()) == "Softmax";
+    });
+
+    std::shared_ptr<ov::Node> softmaxNode;
+    if (softmaxNodeIt == nodes.end()) {
+        auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node();
+        softmaxNode = std::make_shared<ov::op::v1::Softmax>(logitsNode->output(0), 1);
+    } else {
+        softmaxNode = *softmaxNodeIt;
+    }
+    const auto k = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, std::vector<size_t>{nTop});
+    std::shared_ptr<ov::Node> topkNode = std::make_shared<ov::op::v3::TopK>(softmaxNode,
+                                                                            k,
+                                                                            1,
+                                                                            ov::op::v3::TopK::Mode::MAX,
+                                                                            ov::op::v3::TopK::SortType::SORT_VALUES);
+
+    auto indices = std::make_shared<ov::op::v0::Result>(topkNode->output(0));
+    auto scores = std::make_shared<ov::op::v0::Result>(topkNode->output(1));
+    ov::ResultVector res({scores, indices});
+    model = std::make_shared<ov::Model>(res, model->get_parameters(), "classification");
+
+    // manually set output tensors name for created topK node
+    model->outputs()[0].set_names({"indices"});
+    outputsNames.push_back("indices");
+    model->outputs()[1].set_names({"scores"});
+    outputsNames.push_back("scores");
+
+    // set output precisions
+    ppp = ov::preprocess::PrePostProcessor(model);
+    ppp.output("indices").tensor().set_element_type(ov::element::i32);
+    ppp.output("scores").tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+}
--- a/engines/OpenVINOEngine/src/models/detection_model.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model.cpp
@@ -0,0 +1,54 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model.h"
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+DetectionModel::DetectionModel() {
+}
+DetectionModel::DetectionModel(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               bool useAutoResize,
+                               const std::vector<std::string>& labels,
+                               const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout),
+      confidenceThreshold(confidenceThreshold),
+      labels(labels) {}
+
+std::vector<std::string> DetectionModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labelsList;
+
+    /* Read labels (if any) */
+    if (!labelFilename.empty()) {
+        std::ifstream inputFile(labelFilename);
+        if (!inputFile.is_open())
+            throw std::runtime_error("Can't open the labels file: " + labelFilename);
+        std::string label;
+        while (std::getline(inputFile, label)) {
+            labelsList.push_back(label);
+        }
+        if (labelsList.empty())
+            throw std::logic_error("File is empty: " + labelFilename);
+    }
+
+    return labelsList;
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
@@ -0,0 +1,302 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_centernet.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <utility>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelCenterNet::ModelCenterNet(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               const std::vector<std::string>& labels,
+                               const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {}
+
+void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("CenterNet model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 3) {
+        throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs");
+    }
+
+    const ov::Layout outLayout{"NCHW"};
+    for (const auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout);
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+    model = ppp.build();
+}
+
+cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) {
+    float sn = sinf(rotRadius);
+    float cs = cosf(rotRadius);
+
+    cv::Point2f srcResult(0.0f, 0.0f);
+    srcResult.x = srcPoint.x * cs - srcPoint.y * sn;
+    srcResult.y = srcPoint.x * sn + srcPoint.y * cs;
+
+    return srcResult;
+}
+
+cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) {
+    cv::Point2f direct = a - b;
+    return b + cv::Point2f(-direct.y, direct.x);
+}
+
+cv::Mat getAffineTransform(float centerX,
+                           float centerY,
+                           int srcW,
+                           float rot,
+                           size_t outputWidth,
+                           size_t outputHeight,
+                           bool inv = false) {
+    float rotRad = static_cast<float>(CV_PI) * rot / 180.0f;
+    auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad);
+    cv::Point2f dstDir(0.0f, -0.5f * outputWidth);
+    std::vector<cv::Point2f> src(3, {0.0f, 0.0f});
+    std::vector<cv::Point2f> dst(3, {0.0f, 0.0f});
+
+    src[0] = {centerX, centerY};
+    src[1] = srcDir + src[0];
+    src[2] = get3rdPoint(src[0], src[1]);
+
+    dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f};
+    dst[1] = dst[0] + dstDir;
+    dst[2] = get3rdPoint(dst[0], dst[1]);
+
+    cv::Mat trans;
+    if (inv) {
+        trans = cv::getAffineTransform(dst, src);
+    } else {
+        trans = cv::getAffineTransform(src, dst);
+    }
+
+    return trans;
+}
+
+std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    auto& img = inputData.asRef<ImageInputData>().inputImage;
+    const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX);
+
+    request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg)));
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) {
+    std::vector<std::pair<size_t, float>> scores;
+    scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE);
+    auto chSize = shape[2] * shape[3];
+
+    for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) {
+        scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i]));
+    }
+
+    for (size_t ch = 0; ch < shape[1]; ++ch) {
+        for (size_t w = 0; w < shape[2]; ++w) {
+            for (size_t h = 0; h < shape[3]; ++h) {
+                float max = scoresPtr[chSize * ch + shape[2] * w + h];
+
+                // ---------------------  filter on threshold--------------------------------------
+                if (max < threshold) {
+                    continue;
+                }
+
+                // ---------------------  store index and score------------------------------------
+                scores.push_back({chSize * ch + shape[2] * w + h, max});
+
+                bool next = true;
+                // ---------------------- maxpool2d -----------------------------------------------
+                for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) {
+                    for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) {
+                        if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) {
+                            if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) {
+                                scores.pop_back();
+                                next = false;
+                                break;
+                            }
+                        } else {
+                            if (max < 0) {
+                                scores.pop_back();
+                                next = false;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return scores;
+}
+
+static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) {
+    auto shape = scoresTensor.get_shape();
+    float* scoresPtr = scoresTensor.data<float>();
+
+    return nms(scoresPtr, shape, threshold);
+}
+
+std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor,
+                                               const std::vector<std::pair<size_t, float>>& scores,
+                                               size_t chSize) {
+    const float* regPtr = regressionTensor.data<float>();
+    std::vector<std::pair<float, float>> reg;
+
+    for (auto s : scores) {
+        reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]});
+    }
+
+    return reg;
+}
+
+std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor,
+                                              const std::vector<std::pair<size_t, float>>& scores,
+                                              size_t chSize) {
+    const float* whPtr = whTensor.data<float>();
+    std::vector<std::pair<float, float>> wh;
+
+    for (auto s : scores) {
+        wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]});
+    }
+
+    return wh;
+}
+
+std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores,
+                                            const std::vector<std::pair<float, float>>& reg,
+                                            const std::vector<std::pair<float, float>>& wh,
+                                            const ov::Shape& shape) {
+    std::vector<ModelCenterNet::BBox> boxes(scores.size());
+
+    for (size_t i = 0; i < boxes.size(); ++i) {
+        size_t chIdx = scores[i].first % (shape[2] * shape[3]);
+        auto xCenter = chIdx % shape[3];
+        auto yCenter = chIdx / shape[3];
+
+        boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f;
+        boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f;
+        boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f;
+        boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f;
+    }
+
+    return boxes;
+}
+
+void transform(std::vector<ModelCenterNet::BBox>& boxes,
+               const ov::Shape& shape,
+               int scale,
+               float centerX,
+               float centerY) {
+    cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true);
+
+    for (auto& b : boxes) {
+        ModelCenterNet::BBox newbb;
+
+        newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2);
+        newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2);
+        newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2);
+        newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2);
+
+        b = newbb;
+    }
+}
+
+std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) {
+    // --------------------------- Filter data and get valid indices ---------------------------------
+    const auto& heatmapTensor = infResult.outputsData[outputsNames[0]];
+    const auto& heatmapTensorShape = heatmapTensor.get_shape();
+    const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3];
+    const auto scores = filterScores(heatmapTensor, confidenceThreshold);
+
+    const auto& regressionTensor = infResult.outputsData[outputsNames[1]];
+    const auto reg = filterReg(regressionTensor, scores, chSize);
+
+    const auto& whTensor = infResult.outputsData[outputsNames[2]];
+    const auto wh = filterWH(whTensor, scores, chSize);
+
+    // --------------------------- Calculate bounding boxes & apply inverse affine transform ----------
+    auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape);
+
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const auto scale = std::max(imgWidth, imgHeight);
+    const float centerX = imgWidth / 2.0f;
+    const float centerY = imgHeight / 2.0f;
+
+    transform(boxes, heatmapTensorShape, scale, centerX, centerY);
+
+    // --------------------------- Create detection result objects ------------------------------------
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+    result->objects.reserve(scores.size());
+    for (size_t i = 0; i < scores.size(); ++i) {
+        DetectedObject desc;
+        desc.confidence = scores[i].second;
+        desc.labelID = scores[i].first / chSize;
+        desc.label = getLabelName(desc.labelID);
+        desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+
+        result->objects.push_back(desc);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
@@ -0,0 +1,261 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_faceboxes.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName,
+                               float confidenceThreshold,
+                               bool useAutoResize,
+                               float boxIOUThreshold,
+                               const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),
+      maxProposalsCount(0),
+      boxIOUThreshold(boxIOUThreshold),
+      variance({0.1f, 0.2f}),
+      steps({32, 64, 128}),
+      minSizes({{32, 64, 128}, {256}, {512}}) {}
+
+void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 2) {
+        throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs");
+    }
+
+    const ov::Layout outputLayout{"CHW"};
+    maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)];
+    for (const auto& output : model->outputs()) {
+        const auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+    model = ppp.build();
+
+    // --------------------------- Calculating anchors ----------------------------------------------------
+    std::vector<std::pair<size_t, size_t>> featureMaps;
+    for (auto s : steps) {
+        featureMaps.push_back({netInputHeight / s, netInputWidth / s});
+    }
+
+    priorBoxes(featureMaps);
+}
+
+void calculateAnchors(std::vector<Anchor>& anchors,
+                      const std::vector<float>& vx,
+                      const std::vector<float>& vy,
+                      const int minSize,
+                      const int step) {
+    float skx = static_cast<float>(minSize);
+    float sky = static_cast<float>(minSize);
+
+    std::vector<float> dense_cx, dense_cy;
+
+    for (auto x : vx) {
+        dense_cx.push_back(x * step);
+    }
+
+    for (auto y : vy) {
+        dense_cy.push_back(y * step);
+    }
+
+    for (auto cy : dense_cy) {
+        for (auto cx : dense_cx) {
+            anchors.push_back(
+                {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky});  // left top right bottom
+        }
+    }
+}
+
+void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
+                               const int fx,
+                               const int fy,
+                               const std::vector<int>& minSizes,
+                               const int step) {
+    for (auto s : minSizes) {
+        std::vector<float> vx, vy;
+        if (s == 32) {
+            vx.push_back(static_cast<float>(fx));
+            vx.push_back(fx + 0.25f);
+            vx.push_back(fx + 0.5f);
+            vx.push_back(fx + 0.75f);
+
+            vy.push_back(static_cast<float>(fy));
+            vy.push_back(fy + 0.25f);
+            vy.push_back(fy + 0.5f);
+            vy.push_back(fy + 0.75f);
+        } else if (s == 64) {
+            vx.push_back(static_cast<float>(fx));
+            vx.push_back(fx + 0.5f);
+
+            vy.push_back(static_cast<float>(fy));
+            vy.push_back(fy + 0.5f);
+        } else {
+            vx.push_back(fx + 0.5f);
+            vy.push_back(fy + 0.5f);
+        }
+        calculateAnchors(anchors, vx, vy, s, step);
+    }
+}
+
+void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) {
+    anchors.reserve(maxProposalsCount);
+
+    for (size_t k = 0; k < featureMaps.size(); ++k) {
+        std::vector<float> a;
+        for (size_t i = 0; i < featureMaps[k].first; ++i) {
+            for (size_t j = 0; j < featureMaps[k].second; ++j) {
+                if (k == 0) {
+                    calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]);
+                } else {
+                    calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]);
+                }
+            }
+        }
+    }
+}
+
+std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor,
+                                                                const float confidenceThreshold) {
+    auto shape = scoresTensor.get_shape();
+    const float* scoresPtr = ov::Tensor{scoresTensor}.data<const float>();
+
+    std::vector<size_t> indices;
+    std::vector<float> scores;
+    scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) {
+        if (scoresPtr[i] > confidenceThreshold) {
+            indices.push_back(i / 2);
+            scores.push_back(scoresPtr[i]);
+        }
+    }
+
+    return {indices, scores};
+}
+
+std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
+                                const std::vector<Anchor>& anchors,
+                                const std::vector<size_t>& validIndices,
+                                const std::vector<float>& variance) {
+    auto shape = boxesTensor.get_shape();
+    const float* boxesPtr = ov::Tensor{boxesTensor}.data<const float>();
+
+    std::vector<Anchor> boxes;
+    boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+    for (auto i : validIndices) {
+        auto objStart = shape[2] * i;
+
+        auto dx = boxesPtr[objStart];
+        auto dy = boxesPtr[objStart + 1];
+        auto dw = boxesPtr[objStart + 2];
+        auto dh = boxesPtr[objStart + 3];
+
+        auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter();
+        auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter();
+        auto predW = exp(dw * variance[1]) * anchors[i].getWidth();
+        auto predH = exp(dh * variance[1]) * anchors[i].getHeight();
+
+        boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW),
+                         static_cast<float>(predCtrY - 0.5f * predH),
+                         static_cast<float>(predCtrX + 0.5f * predW),
+                         static_cast<float>(predCtrY + 0.5f * predH)});
+    }
+
+    return boxes;
+}
+
+std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) {
+    // Filter scores and get valid indices for bounding boxes
+    const auto scoresTensor = infResult.outputsData[outputsNames[1]];
+    const auto scores = filterScores(scoresTensor, confidenceThreshold);
+
+    // Filter bounding boxes on indices
+    auto boxesTensor = infResult.outputsData[outputsNames[0]];
+    std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);
+
+    // Apply Non-maximum Suppression
+    const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold);
+
+    // Create detection result objects
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const float scaleX = static_cast<float>(netInputWidth) / imgWidth;
+    const float scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+    result->objects.reserve(keep.size());
+    for (auto i : keep) {
+        DetectedObject desc;
+        desc.confidence = scores.second[i];
+        desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight));
+        desc.labelID = 0;
+        desc.label = labels[0];
+
+        result->objects.push_back(desc);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
@@ -0,0 +1,394 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 bool useAutoResize,
+                                 float boxIOUThreshold,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),  // Default label is "Face"
+      shouldDetectMasks(false),
+      shouldDetectLandmarks(false),
+      boxIOUThreshold(boxIOUThreshold),
+      maskThreshold(0.8f),
+      landmarkStd(1.0f),
+      anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {
+    generateAnchorsFpn();
+}
+
+void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");
+    }
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");
+    }
+
+    const ov::Layout outputLayout{"NCHW"};
+    std::vector<size_t> outputsSizes[OUT_MAX];
+    for (const auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+
+        OutputType type = OUT_MAX;
+        if (outTensorName.find("box") != std::string::npos) {
+            type = OUT_BOXES;
+        } else if (outTensorName.find("cls") != std::string::npos) {
+            type = OUT_SCORES;
+        } else if (outTensorName.find("landmark") != std::string::npos) {
+            type = OUT_LANDMARKS;
+            shouldDetectLandmarks = true;
+        } else if (outTensorName.find("type") != std::string::npos) {
+            type = OUT_MASKSCORES;
+            labels.clear();
+            labels.push_back("No Mask");
+            labels.push_back("Mask");
+            shouldDetectMasks = true;
+            landmarkStd = 0.2f;
+        } else {
+            continue;
+        }
+
+        size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];
+        size_t i = 0;
+        for (; i < outputsSizes[type].size(); ++i) {
+            if (num < outputsSizes[type][i]) {
+                break;
+            }
+        }
+        separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);
+        outputsSizes[type].insert(outputsSizes[type].begin() + i, num);
+    }
+    model = ppp.build();
+
+    for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {
+        size_t width = outputsSizes[OUT_BOXES][idx];
+        size_t height = outputsSizes[OUT_BOXES][idx];
+        auto s = anchorCfg[idx].stride;
+        auto anchorNum = anchorsFpn[s].size();
+
+        anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
+        for (size_t iw = 0; iw < width; ++iw) {
+            size_t sw = iw * s;
+            for (size_t ih = 0; ih < height; ++ih) {
+                size_t sh = ih * s;
+                for (size_t k = 0; k < anchorNum; ++k) {
+                    Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];
+                    anc.left = anchorsFpn[s][k].left + sw;
+                    anc.top = anchorsFpn[s][k].top + sh;
+                    anc.right = anchorsFpn[s][k].right + sw;
+                    anc.bottom = anchorsFpn[s][k].bottom + sh;
+                }
+            }
+        }
+    }
+}
+
+std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
+    std::vector<Anchor> retVal;
+    const auto w = anchor.getWidth();
+    const auto h = anchor.getHeight();
+    const auto xCtr = anchor.getXCenter();
+    const auto yCtr = anchor.getYCenter();
+
+    for (const auto ratio : ratios) {
+        const auto size = w * h;
+        const auto sizeRatio = static_cast<float>(size) / ratio;
+        const auto ws = sqrt(sizeRatio);
+        const auto hs = ws * ratio;
+        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+    }
+    return retVal;
+}
+
+std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
+    std::vector<Anchor> retVal;
+    const auto w = anchor.getWidth();
+    const auto h = anchor.getHeight();
+    const auto xCtr = anchor.getXCenter();
+    const auto yCtr = anchor.getYCenter();
+
+    for (auto scale : scales) {
+        const auto ws = w * scale;
+        const auto hs = h * scale;
+        retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+                          static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+                          static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+    }
+    return retVal;
+}
+
+std::vector<Anchor> generateAnchors(const int baseSize,
+                                                     const std::vector<int>& ratios,
+                                                     const std::vector<int>& scales) {
+    Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
+    auto ratioAnchors = ratioEnum(baseAnchor, ratios);
+    std::vector<Anchor> retVal;
+
+    for (const auto& ra : ratioAnchors) {
+        auto addon = scaleEnum(ra, scales);
+        retVal.insert(retVal.end(), addon.begin(), addon.end());
+    }
+    return retVal;
+}
+
+void ModelRetinaFace::generateAnchorsFpn() {
+    auto cfg = anchorCfg;
+    std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {
+        return x.stride > y.stride;
+    });
+
+    for (const auto& cfgLine : cfg) {
+        anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));
+    }
+}
+
+std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {
+    std::vector<size_t> indices;
+    indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);
+    auto shape = scoresTensor.get_shape();
+    size_t restAnchors = shape[1] - anchorNum;
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    for (size_t x = anchorNum; x < shape[1]; ++x) {
+        for (size_t y = 0; y < shape[2]; ++y) {
+            for (size_t z = 0; z < shape[3]; ++z) {
+                auto idx = (x * shape[2] + y) * shape[3] + z;
+                auto score = scoresPtr[idx];
+                if (score >= confidenceThreshold) {
+                    indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));
+                }
+            }
+        }
+    }
+
+    return indices;
+}
+
+void filterScores(std::vector<float>& scores,
+                  const std::vector<size_t>& indices,
+                  const ov::Tensor& scoresTensor,
+                  const int anchorNum) {
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+    const auto start = shape[2] * shape[3] * anchorNum;
+
+    for (auto i : indices) {
+        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+        scores.push_back(scoresPtr[start + offset]);
+    }
+}
+
+void filterBoxes(std::vector<Anchor>& boxes,
+                 const std::vector<size_t>& indices,
+                 const ov::Tensor& boxesTensor,
+                 int anchorNum,
+                 const std::vector<Anchor>& anchors) {
+    const auto& shape = boxesTensor.get_shape();
+    const float* boxesPtr = boxesTensor.data<float>();
+    const auto boxPredLen = shape[1] / anchorNum;
+    const auto blockWidth = shape[2] * shape[3];
+
+    for (auto i : indices) {
+        auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);
+
+        const auto dx = boxesPtr[offset];
+        const auto dy = boxesPtr[offset + blockWidth];
+        const auto dw = boxesPtr[offset + blockWidth * 2];
+        const auto dh = boxesPtr[offset + blockWidth * 3];
+
+        const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();
+        const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();
+        const auto predW = exp(dw) * anchors[i].getWidth();
+        const auto predH = exp(dh) * anchors[i].getHeight();
+
+        boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),
+                         static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),
+                         static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),
+                         static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});
+    }
+}
+
+void filterLandmarks(std::vector<cv::Point2f>& landmarks,
+                     const std::vector<size_t>& indices,
+                     const ov::Tensor& landmarksTensor,
+                     int anchorNum,
+                     const std::vector<Anchor>& anchors,
+                     const float landmarkStd) {
+    const auto& shape = landmarksTensor.get_shape();
+    const float* landmarksPtr = landmarksTensor.data<float>();
+    const auto landmarkPredLen = shape[1] / anchorNum;
+    const auto blockWidth = shape[2] * shape[3];
+
+    for (auto i : indices) {
+        for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {
+            auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;
+            auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;
+            auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;
+            landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),
+                                 deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});
+        }
+    }
+}
+
+void filterMasksScores(std::vector<float>& masks,
+                       const std::vector<size_t>& indices,
+                       const ov::Tensor& maskScoresTensor,
+                       const int anchorNum) {
+    auto shape = maskScoresTensor.get_shape();
+    const float* maskScoresPtr = maskScoresTensor.data<float>();
+    auto start = shape[2] * shape[3] * anchorNum * 2;
+
+    for (auto i : indices) {
+        auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+        masks.push_back(maskScoresPtr[start + offset]);
+    }
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {
+    std::vector<float> scores;
+    scores.reserve(INIT_VECTOR_SIZE);
+    std::vector<Anchor> boxes;
+    boxes.reserve(INIT_VECTOR_SIZE);
+    std::vector<cv::Point2f> landmarks;
+    std::vector<float> masks;
+
+    if (shouldDetectLandmarks) {
+        landmarks.reserve(INIT_VECTOR_SIZE);
+    }
+    if (shouldDetectMasks) {
+        masks.reserve(INIT_VECTOR_SIZE);
+    }
+
+    // --------------------------- Gather & Filter output from all levels
+    // ----------------------------------------------------------
+    for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {
+        const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];
+        const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];
+        auto s = anchorCfg[idx].stride;
+        auto anchorNum = anchorsFpn[s].size();
+
+        auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);
+        filterScores(scores, validIndices, scoresRaw, anchorNum);
+        filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);
+        if (shouldDetectLandmarks) {
+            const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];
+            filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);
+        }
+        if (shouldDetectMasks) {
+            const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];
+            filterMasksScores(masks, validIndices, masksRaw, anchorNum);
+        }
+    }
+    // --------------------------- Apply Non-maximum Suppression
+    // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if
+    // true - boundaries are included in areas calculation
+    const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);
+
+    // --------------------------- Create detection result objects
+    // --------------------------------------------------------
+    RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+    const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;
+    const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+    result->objects.reserve(keep.size());
+    result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);
+    for (auto i : keep) {
+        DetectedObject desc;
+        desc.confidence = scores[i];
+        //--- Scaling coordinates
+        boxes[i].left /= scaleX;
+        boxes[i].top /= scaleY;
+        boxes[i].right /= scaleX;
+        boxes[i].bottom /= scaleY;
+
+        desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+        desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+        desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+        desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+        //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask
+        desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;
+        desc.label = labels[desc.labelID];
+        result->objects.push_back(desc);
+
+        //--- Scaling landmarks coordinates
+        for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {
+            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =
+                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));
+            landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =
+                clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));
+            result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_retinaface_pt.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_retinaface_pt.cpp
@@ -0,0 +1,277 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface_pt.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName,
+                                     float confidenceThreshold,
+                                     bool useAutoResize,
+                                     float boxIOUThreshold,
+                                     const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),  // Default label is "Face"
+      landmarksNum(0),
+      boxIOUThreshold(boxIOUThreshold) {}
+
+void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("RetinaFacePT model wrapper expects models that have only 1 input");
+    }
+
+    const ov::Shape& inputShape = model->input().get_partial_shape().get_max_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Reading image input parameters -------------------------------------------
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 3) {
+        throw std::logic_error("RetinaFace model wrapper expects models that have 3 outputs");
+    }
+
+    landmarksNum = 0;
+
+    outputsNames.resize(2);
+    std::vector<uint32_t> outputsSizes[OUT_MAX];
+    const ov::Layout chw("CHW");
+    const ov::Layout nchw("NCHW");
+    for (auto& output : model->outputs()) {
+        auto outTensorName = output.get_any_name();
+        outputsNames.push_back(outTensorName);
+        ppp.output(outTensorName)
+            .tensor()
+            .set_element_type(ov::element::f32)
+            .set_layout(output.get_partial_shape().size() == 4 ? nchw : chw);
+
+        if (outTensorName.find("bbox") != std::string::npos) {
+            outputsNames[OUT_BOXES] = outTensorName;
+        } else if (outTensorName.find("cls") != std::string::npos) {
+            outputsNames[OUT_SCORES] = outTensorName;
+        } else if (outTensorName.find("landmark") != std::string::npos) {
+            // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the
+            // last item of array Considering that other outputs names are already filled in or will be filled later
+            outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1));
+            outputsNames[OUT_LANDMARKS] = outTensorName;
+            landmarksNum =
+                output.get_partial_shape()[ov::layout::width_idx(chw)].get_length() / 2;  // Each landmark consist of 2 variables (x and y)
+        } else {
+            continue;
+        }
+    }
+
+    if (outputsNames[OUT_BOXES] == "" || outputsNames[OUT_SCORES] == "") {
+        throw std::logic_error("Bbox or cls layers are not found");
+    }
+
+    model = ppp.build();
+    priors = generatePriorData();
+}
+
+std::vector<size_t> ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) {
+    std::vector<size_t> indicies;
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    for (size_t x = 0; x < shape[1]; ++x) {
+        const auto idx = (x * shape[2] + 1);
+        const auto score = scoresPtr[idx];
+        if (score >= confidenceThreshold) {
+            indicies.push_back(x);
+        }
+    }
+
+    return indicies;
+}
+
+std::vector<float> ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor,
+                                                        const std::vector<size_t>& indicies) {
+    const auto& shape = scoresTensor.get_shape();
+    const float* scoresPtr = scoresTensor.data<float>();
+
+    std::vector<float> scores;
+    scores.reserve(indicies.size());
+
+    for (auto i : indicies) {
+        scores.push_back(scoresPtr[i * shape[2] + 1]);
+    }
+    return scores;
+}
+
+std::vector<cv::Point2f> ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor,
+                                                                 const std::vector<size_t>& indicies,
+                                                                 int imgWidth,
+                                                                 int imgHeight) {
+    const auto& shape = landmarksTensor.get_shape();
+    const float* landmarksPtr = landmarksTensor.data<float>();
+
+    std::vector<cv::Point2f> landmarks(landmarksNum * indicies.size());
+
+    for (size_t i = 0; i < indicies.size(); i++) {
+        const size_t idx = indicies[i];
+        const auto& prior = priors[idx];
+        for (size_t j = 0; j < landmarksNum; j++) {
+            landmarks[i * landmarksNum + j].x =
+                clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth;
+            landmarks[i * landmarksNum + j].y =
+                clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) *
+                imgHeight;
+        }
+    }
+    return landmarks;
+}
+
+std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() {
+    const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}};
+    const float steps[] = {8., 16., 32.};
+    std::vector<ModelRetinaFacePT::Box> anchors;
+    for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) {
+        const int featureW = static_cast<int>(std::round(netInputWidth / steps[stepNum]));
+        const int featureH = static_cast<int>(std::round(netInputHeight / steps[stepNum]));
+
+        const auto& minSizes = globalMinSizes[stepNum];
+        for (int i = 0; i < featureH; i++) {
+            for (int j = 0; j < featureW; j++) {
+                for (auto minSize : minSizes) {
+                    const float sKX = minSize / netInputWidth;
+                    const float sKY = minSize / netInputHeight;
+                    const float denseCY = (i + 0.5f) * steps[stepNum] / netInputHeight;
+                    const float denseCX = (j + 0.5f) * steps[stepNum] / netInputWidth;
+                    anchors.push_back(ModelRetinaFacePT::Box{denseCX, denseCY, sKX, sKY});
+                }
+            }
+        }
+    }
+    return anchors;
+}
+
+std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
+                                                                             const std::vector<size_t>& indicies,
+                                                                             int imgWidth,
+                                                                             int imgHeight) {
+    std::vector<Anchor> rects;
+    rects.reserve(indicies.size());
+
+    const auto& shape = boxesTensor.get_shape();
+    const float* boxesPtr = boxesTensor.data<float>();
+
+    if (shape[1] != priors.size()) {
+        throw std::logic_error("rawBoxes size is not equal to priors size");
+    }
+
+    for (auto i : indicies) {
+        const auto pRawBox = reinterpret_cast<const Box*>(boxesPtr + i * shape[2]);
+        const auto& prior = priors[i];
+        const float cX = priors[i].cX + pRawBox->cX * variance[0] * prior.width;
+        const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height;
+        const float width = prior.width * exp(pRawBox->width * variance[1]);
+        const float height = prior.height * exp(pRawBox->height * variance[1]);
+        rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
+                               clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
+    }
+
+    return rects;
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFacePT::postprocess(InferenceResult& infResult) {
+    // (raw_output, scale_x, scale_y, face_prob_threshold, image_size):
+    const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]];
+    const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]];
+
+    const auto& validIndicies = filterByScore(scoresTensor, confidenceThreshold);
+    const auto& scores = getFilteredScores(scoresTensor, validIndicies);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]],
+                                                                validIndicies,
+                                                                internalData.inputImgWidth,
+                                                                internalData.inputImgHeight)
+                                         : std::vector<cv::Point2f>();
+
+    const auto& proposals =
+        getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight);
+
+    const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum);
+
+    // --------------------------- Create detection result objects
+    // --------------------------------------------------------
+    RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+    result->objects.reserve(keptIndicies.size());
+    result->landmarks.reserve(keptIndicies.size() * landmarksNum);
+    for (auto i : keptIndicies) {
+        DetectedObject desc;
+        desc.confidence = scores[i];
+
+        //--- Scaling coordinates
+        desc.x = proposals[i].left;
+        desc.y = proposals[i].top;
+        desc.width = proposals[i].getWidth();
+        desc.height = proposals[i].getHeight();
+
+        desc.labelID = 0;
+        desc.label = labels[desc.labelID];
+        result->objects.push_back(desc);
+
+        //--- Filtering landmarks coordinates
+        for (uint32_t l = 0; l < landmarksNum; ++l) {
+            result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
@@ -0,0 +1,286 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_ssd.h"
+
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+struct InputData;
+
+ModelSSD::ModelSSD() 
+{
+	// Constructor
+
+}
+ModelSSD::ModelSSD(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   bool useAutoResize,
+                   const std::vector<std::string>& labels,
+                   const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
+
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    if (inputsNames.size() > 1) {
+        auto imageInfoTensor = request.get_tensor(inputsNames[1]);
+        auto info = imageInfoTensor.data<float>();
+        info[0] = static_cast<float>(netInputHeight);
+        info[1] = static_cast<float>(netInputWidth);
+        info[2] = 1;
+        request.set_tensor(inputsNames[1], imageInfoTensor);
+    }
+
+    return DetectionModel::preprocess(inputData, request);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
+    return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
+    const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
+    size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
+    const float* detections = ov::Tensor{detectionsTensor}.data<const float>();
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float image_id = detections[i * objectSize + 0];
+        if (image_id < 0) {
+            break;
+        }
+
+        float confidence = detections[i * objectSize + 2];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
+                           0.f,
+                           static_cast<float>(internalData.inputImgWidth));
+            desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
+                           0.f,
+                           static_cast<float>(internalData.inputImgHeight));
+            desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
+                               0.f,
+                               static_cast<float>(internalData.inputImgWidth)) -
+                         desc.x;
+            desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
+                                0.f,
+                                static_cast<float>(internalData.inputImgHeight)) -
+                          desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
+    const float* boxes = infResult.outputsData[outputsNames[0]].data<const float>();
+    size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
+    const float* labels = infResult.outputsData[outputsNames[1]].data<const float>();
+    const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<const float>() : nullptr;
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    // In models with scores are stored in separate output, coordinates are normalized to [0,1]
+    // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
+    float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
+    float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(labels[i]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
+            desc.y =
+                clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
+            desc.width =
+                clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
+                desc.x;
+            desc.height =
+                clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
+                desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input ------------------------------------------------------
+    ov::preprocess::PrePostProcessor ppp(model);
+    for (const auto& input : model->inputs()) {
+        auto inputTensorName = input.get_any_name();
+        const ov::Shape& shape = input.get_shape();
+        ov::Layout inputLayout = getInputLayout(input);
+
+        if (shape.size() == 4) {  // 1st input contains images
+            if (inputsNames.empty()) {
+                inputsNames.push_back(inputTensorName);
+            } else {
+                inputsNames[0] = inputTensorName;
+            }
+
+            inputTransform.setPrecision(ppp, inputTensorName);
+            ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
+
+            if (useAutoResize) {
+                ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
+
+                ppp.input(inputTensorName)
+                    .preprocess()
+                    .convert_element_type(ov::element::f32)
+                    .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+            }
+
+            ppp.input(inputTensorName).model().set_layout(inputLayout);
+
+            netInputWidth = shape[ov::layout::width_idx(inputLayout)];
+            netInputHeight = shape[ov::layout::height_idx(inputLayout)];
+        } else if (shape.size() == 2) {  // 2nd input contains image info
+            inputsNames.resize(2);
+            inputsNames[1] = inputTensorName;
+            ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
+        } else {
+            throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
+                                   "D "
+                                   "input layer '" +
+                                   input.get_any_name() +
+                                   "'. "
+                                   "Only 2D and 4D input layers are supported");
+        }
+    }
+    model = ppp.build();
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() == 1) {
+        prepareSingleOutput(model);
+    } else {
+        prepareMultipleOutputs(model);
+    }
+}
+
+void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+
+    const ov::Shape& shape = output.get_shape();
+    const ov::Layout& layout("NCHW");
+    if (shape.size() != 4) {
+        throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
+    }
+    detectionsNumId = ov::layout::height_idx(layout);
+    objectSize = shape[ov::layout::width_idx(layout)];
+    if (objectSize != 7) {
+        throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
+                               std::to_string(objectSize));
+    }
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
+    model = ppp.build();
+}
+
+void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
+    const ov::OutputVector& outputs = model->outputs();
+    for (auto& output : outputs) {
+        const auto& tensorNames = output.get_names();
+        for (const auto& name : tensorNames) {
+            if (name.find("boxes") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("labels") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("scores") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            }
+        }
+    }
+    if (outputsNames.size() != 2 && outputsNames.size() != 3) {
+        throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
+                               std::to_string(outputsNames.size()));
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
+
+    ov::Layout boxesLayout;
+    if (boxesShape.size() == 2) {
+        boxesLayout = "NC";
+        detectionsNumId = ov::layout::batch_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
+
+        if (objectSize != 5) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
+        }
+    } else if (boxesShape.size() == 3) {
+        boxesLayout = "CHW";
+        detectionsNumId = ov::layout::height_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
+
+        if (objectSize != 4) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
+        }
+    } else {
+        throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
+                               std::to_string(boxesShape.size()));
+    }
+
+    ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
+
+    for (const auto& outName : outputsNames) {
+        ppp.output(outName).tensor().set_element_type(ov::element::f32);
+    }
+    model = ppp.build();
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
@@ -0,0 +1,482 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolo.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+std::vector<float> defaultAnchors[] = {
+    // YOLOv1v2
+    {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f},
+    // YOLOv3
+    {10.0f,
+     13.0f,
+     16.0f,
+     30.0f,
+     33.0f,
+     23.0f,
+     30.0f,
+     61.0f,
+     62.0f,
+     45.0f,
+     59.0f,
+     119.0f,
+     116.0f,
+     90.0f,
+     156.0f,
+     198.0f,
+     373.0f,
+     326.0f},
+    // YOLOv4
+    {12.0f,
+     16.0f,
+     19.0f,
+     36.0f,
+     40.0f,
+     28.0f,
+     36.0f,
+     75.0f,
+     76.0f,
+     55.0f,
+     72.0f,
+     146.0f,
+     142.0f,
+     110.0f,
+     192.0f,
+     243.0f,
+     459.0f,
+     401.0f},
+    // YOLOv4_Tiny
+    {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f},
+    // YOLOF
+    {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}};
+
+const std::vector<int64_t> defaultMasks[] = {
+    // YOLOv1v2
+    {},
+    // YOLOv3
+    {},
+    // YOLOv4
+    {0, 1, 2, 3, 4, 5, 6, 7, 8},
+    // YOLOv4_Tiny
+    {1, 2, 3, 3, 4, 5},
+    // YOLOF
+    {0, 1, 2, 3, 4, 5}};
+
+static inline float sigmoid(float x) {
+    return 1.f / (1.f + exp(-x));
+}
+
+static inline float linear(float x) {
+    return x;
+}
+
+ModelYolo::ModelYolo(const std::string& modelFileName,
+                     float confidenceThreshold,
+                     bool useAutoResize,
+                     bool useAdvancedPostprocessing,
+                     float boxIOUThreshold,
+                     const std::vector<std::string>& labels,
+                     const std::vector<float>& anchors,
+                     const std::vector<int64_t>& masks,
+                     const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout),
+      boxIOUThreshold(boxIOUThreshold),
+      useAdvancedPostprocessing(useAdvancedPostprocessing),
+      yoloVersion(YOLO_V3),
+      presetAnchors(anchors),
+      presetMasks(masks) {}
+
+void ModelYolo::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("YOLO model wrapper accepts models that have only 1 input");
+    }
+
+    const auto& input = model->input();
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(input);
+
+    if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 3-channel input");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, input.get_any_name());
+    ppp.input().tensor().set_layout({"NHWC"});
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    //--- Reading image input parameters
+    inputsNames.push_back(model->input().get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    std::map<std::string, ov::Shape> outShapes;
+    for (auto& out : outputs) {
+        ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32);
+        if (out.get_shape().size() == 4) {
+            if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] &&
+                out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) {
+                ppp.output(out.get_any_name()).model().set_layout("NHWC");
+                // outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build()
+                yoloRegionLayout = "NHWC";
+            }
+            // yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors
+            ppp.output(out.get_any_name()).tensor().set_layout("NCHW");
+        }
+        outputsNames.push_back(out.get_any_name());
+        outShapes[out.get_any_name()] = out.get_shape();
+    }
+    model = ppp.build();
+
+    yoloVersion = YOLO_V3;
+    bool isRegionFound = false;
+    for (const auto& op : model->get_ordered_ops()) {
+        if (std::string("RegionYolo") == op->get_type_name()) {
+            auto regionYolo = std::dynamic_pointer_cast<ov::op::v0::RegionYolo>(op);
+
+            if (regionYolo) {
+                if (!regionYolo->get_mask().size()) {
+                    yoloVersion = YOLO_V1V2;
+                }
+
+                const auto& opName = op->get_friendly_name();
+                for (const auto& out : outputs) {
+                    if (out.get_node()->get_friendly_name() == opName ||
+                        out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) {
+                        isRegionFound = true;
+                        regions.emplace(out.get_any_name(), Region(regionYolo));
+                    }
+                }
+            }
+        }
+    }
+
+    if (!isRegionFound) {
+        switch (outputsNames.size()) {
+            case 1:
+                yoloVersion = YOLOF;
+                break;
+            case 2:
+                yoloVersion = YOLO_V4_TINY;
+                break;
+            case 3:
+                yoloVersion = YOLO_V4;
+                break;
+        }
+
+        int num = yoloVersion == YOLOF ? 6 : 3;
+        isObjConf = yoloVersion == YOLOF ? 0 : 1;
+        int i = 0;
+
+        auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion];
+        if (chosenMasks.size() != num * outputs.size()) {
+            throw std::runtime_error(std::string("Invalid size of masks array, got ") +
+                                     std::to_string(presetMasks.size()) + ", should be " +
+                                     std::to_string(num * outputs.size()));
+        }
+
+        std::sort(outputsNames.begin(),
+                  outputsNames.end(),
+                  [&outShapes, this](const std::string& x, const std::string& y) {
+                      return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] >
+                             outShapes[y][ov::layout::height_idx(yoloRegionLayout)];
+                  });
+
+        for (const auto& name : outputsNames) {
+            const auto& shape = outShapes[name];
+            if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) {
+                throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension");
+            }
+            regions.emplace(
+                name,
+                Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0),
+                       4,
+                       presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion],
+                       std::vector<int64_t>(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num),
+                       shape[ov::layout::width_idx(yoloRegionLayout)],
+                       shape[ov::layout::height_idx(yoloRegionLayout)]));
+            i++;
+        }
+    } else {
+        // Currently externally set anchors and masks are supported only for YoloV4
+        if (presetAnchors.size() || presetMasks.size()) {
+            slog::warn << "Preset anchors and mask can be set for YoloV4 model only. "
+                          "This model is not YoloV4, so these options will be ignored."
+                       << slog::endl;
+        }
+    }
+}
+
+std::unique_ptr<ResultBase> ModelYolo::postprocess(InferenceResult& infResult) {
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    std::vector<DetectedObject> objects;
+
+    // Parsing outputs
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    for (auto& output : infResult.outputsData) {
+        this->parseYOLOOutput(output.first,
+                              output.second,
+                              netInputHeight,
+                              netInputWidth,
+                              internalData.inputImgHeight,
+                              internalData.inputImgWidth,
+                              objects);
+    }
+
+    if (useAdvancedPostprocessing) {
+        // Advanced postprocessing
+        // Checking IOU threshold conformance
+        // For every i-th object we're finding all objects it intersects with, and comparing confidence
+        // If i-th object has greater confidence than all others, we include it into result
+        for (const auto& obj1 : objects) {
+            bool isGoodResult = true;
+            for (const auto& obj2 : objects) {
+                if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence &&
+                    intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) {  // if obj1 is the same as obj2, condition
+                                                                             // expression will evaluate to false anyway
+                    isGoodResult = false;
+                    break;
+                }
+            }
+            if (isGoodResult) {
+                result->objects.push_back(obj1);
+            }
+        }
+    } else {
+        // Classic postprocessing
+        std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) {
+            return x.confidence > y.confidence;
+        });
+        for (size_t i = 0; i < objects.size(); ++i) {
+            if (objects[i].confidence == 0)
+                continue;
+            for (size_t j = i + 1; j < objects.size(); ++j)
+                if (intersectionOverUnion(objects[i], objects[j]) >= boxIOUThreshold)
+                    objects[j].confidence = 0;
+            result->objects.push_back(objects[i]);
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+void ModelYolo::parseYOLOOutput(const std::string& output_name,
+                                const ov::Tensor& tensor,
+                                const unsigned long resized_im_h,
+                                const unsigned long resized_im_w,
+                                const unsigned long original_im_h,
+                                const unsigned long original_im_w,
+                                std::vector<DetectedObject>& objects) {
+    // --------------------------- Extracting layer parameters -------------------------------------
+    auto it = regions.find(output_name);
+    if (it == regions.end()) {
+        throw std::runtime_error(std::string("Can't find output layer with name ") + output_name);
+    }
+    auto& region = it->second;
+
+    int sideW = 0;
+    int sideH = 0;
+    unsigned long scaleH;
+    unsigned long scaleW;
+    switch (yoloVersion) {
+        case YOLO_V1V2:
+            sideH = region.outputHeight;
+            sideW = region.outputWidth;
+            scaleW = region.outputWidth;
+            scaleH = region.outputHeight;
+            break;
+        case YOLO_V3:
+        case YOLO_V4:
+        case YOLO_V4_TINY:
+        case YOLOF:
+            sideH = static_cast<int>(tensor.get_shape()[ov::layout::height_idx("NCHW")]);
+            sideW = static_cast<int>(tensor.get_shape()[ov::layout::width_idx("NCHW")]);
+            scaleW = resized_im_w;
+            scaleH = resized_im_h;
+            break;
+    }
+
+    auto entriesNum = sideW * sideH;
+    const float* outData = tensor.data<float>();
+
+    auto postprocessRawData =
+        (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear;
+
+    // --------------------------- Parsing YOLO Region output -------------------------------------
+    for (int i = 0; i < entriesNum; ++i) {
+        int row = i / sideW;
+        int col = i % sideW;
+        for (int n = 0; n < region.num; ++n) {
+            //--- Getting region data
+            int obj_index = calculateEntryIndex(entriesNum,
+                                                region.coords,
+                                                region.classes + isObjConf,
+                                                n * entriesNum + i,
+                                                region.coords);
+            int box_index =
+                calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0);
+            float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1;
+
+            //--- Preliminary check for confidence threshold conformance
+            if (scale >= confidenceThreshold) {
+                //--- Calculating scaled region's coordinates
+                float x, y;
+                if (yoloVersion == YOLOF) {
+                    x = (static_cast<float>(col) / sideW +
+                         outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) *
+                        original_im_w;
+                    y = (static_cast<float>(row) / sideH +
+                         outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) *
+                        original_im_h;
+                } else {
+                    x = static_cast<float>((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW *
+                                           original_im_w);
+                    y = static_cast<float>((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH *
+                                           original_im_h);
+                }
+                float height = static_cast<float>(std::exp(outData[box_index + 3 * entriesNum]) *
+                                                  region.anchors[2 * n + 1] * original_im_h / scaleH);
+                float width = static_cast<float>(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] *
+                                                 original_im_w / scaleW);
+
+                DetectedObject obj;
+                obj.x = clamp(x - width / 2, 0.f, static_cast<float>(original_im_w));
+                obj.y = clamp(y - height / 2, 0.f, static_cast<float>(original_im_h));
+                obj.width = clamp(width, 0.f, static_cast<float>(original_im_w - obj.x));
+                obj.height = clamp(height, 0.f, static_cast<float>(original_im_h - obj.y));
+
+                for (size_t j = 0; j < region.classes; ++j) {
+                    int class_index = calculateEntryIndex(entriesNum,
+                                                          region.coords,
+                                                          region.classes + isObjConf,
+                                                          n * entriesNum + i,
+                                                          region.coords + isObjConf + j);
+                    float prob = scale * postprocessRawData(outData[class_index]);
+
+                    //--- Checking confidence threshold conformance and adding region to the list
+                    if (prob >= confidenceThreshold) {
+                        obj.confidence = prob;
+                        obj.labelID = j;
+                        obj.label = getLabelName(obj.labelID);
+                        objects.push_back(obj);
+                    }
+                }
+            }
+        }
+    }
+}
+
+int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) {
+    int n = location / totalCells;
+    int loc = location % totalCells;
+    return (n * (lcoords + lclasses) + entry) * totalCells + loc;
+}
+
+double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) {
+    double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x);
+    double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y);
+    double intersectionArea =
+        (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth;
+    double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea;
+    return intersectionArea / unionArea;
+}
+
+ModelYolo::Region::Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo) {
+    coords = regionYolo->get_num_coords();
+    classes = regionYolo->get_num_classes();
+    auto mask = regionYolo->get_mask();
+    num = mask.size();
+
+    auto shape = regionYolo->get_input_shape(0);
+    outputWidth = shape[3];
+    outputHeight = shape[2];
+
+    if (num) {
+        // Parsing YoloV3 parameters
+        anchors.resize(num * 2);
+
+        for (int i = 0; i < num; ++i) {
+            anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2];
+            anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1];
+        }
+    } else {
+        // Parsing YoloV2 parameters
+        num = regionYolo->get_num_regions();
+        anchors = regionYolo->get_anchors();
+        if (anchors.empty()) {
+            anchors = defaultAnchors[YOLO_V1V2];
+            num = 5;
+        }
+    }
+}
+
+ModelYolo::Region::Region(size_t classes,
+                          int coords,
+                          const std::vector<float>& anchors,
+                          const std::vector<int64_t>& masks,
+                          size_t outputWidth,
+                          size_t outputHeight)
+    : classes(classes),
+      coords(coords),
+      outputWidth(outputWidth),
+      outputHeight(outputHeight) {
+    num = masks.size();
+
+    if (anchors.size() == 0 || anchors.size() % 2 != 0) {
+        throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector");
+    }
+
+    if (num) {
+        this->anchors.resize(num * 2);
+
+        for (int i = 0; i < num; ++i) {
+            this->anchors[i * 2] = anchors[masks[i] * 2];
+            this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1];
+        }
+    } else {
+        this->anchors = anchors;
+        num = anchors.size() / 2;
+    }
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolov3_onnx.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolov3_onnx.cpp
@@ -0,0 +1,180 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolov3_onnx.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+
+ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 const std::vector<std::string>& labels,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
+        interpolationMode = cv::INTER_CUBIC;
+        resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
+    }
+
+
+void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare inputs ------------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 2) {
+        throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputsNames.reserve(inputs.size());
+    for (auto& input : inputs) {
+        const ov::Shape& currentShape = input.get_shape();
+        std::string currentName = input.get_any_name();
+        const ov::Layout& currentLayout = getInputLayout(input);
+
+        if (currentShape.size() == 4) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
+                throw std::logic_error("Expected 4D image input with 3 channels");
+            }
+            inputsNames[0] = currentName;
+            netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
+            netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
+            ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+        } else if (currentShape.size() == 2) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
+                throw std::logic_error("Expected 2D image info input with 2 channels");
+            }
+            inputsNames[1] = currentName;
+            ppp.input(currentName).tensor().set_element_type(ov::element::i32);
+        }
+        ppp.input(currentName).model().set_layout(currentLayout);
+    }
+
+    // --------------------------- Prepare outputs -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 3) {
+        throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
+    }
+
+    for (auto& output : outputs) {
+        const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
+        std::string currentName = output.get_any_name();
+        if (currentShape.back() == 3) {
+            indicesOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::i32);
+        } else if (currentShape[2] == 4) {
+            boxesOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+        } else if (currentShape[1] == numberOfClasses) {
+            scoresOutputName = currentName;
+            ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+        } else {
+            throw std::logic_error("Expected shapes [:,:,4], [:,"
+                + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs");
+        }
+        outputsNames.push_back(currentName);
+    }
+    model = ppp.build();
+}
+
+std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
+                                                               ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    ov::Tensor info{ov::element::i32, ov::Shape({1, 2})};
+    int32_t* data = info.data<int32_t>();
+    data[0] = origImg.rows;
+    data[1] = origImg.cols;
+    request.set_tensor(inputsNames[1], info);
+    return ImageModel::preprocess(inputData, request);
+}
+
+namespace {
+float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
+    return scoresTensor.data<float>()[classInd * scoresTensor.get_shape()[2] + boxInd];
+}
+}
+
+std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
+    // Get info about input image
+    const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+    const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+
+    // Get outputs tensors
+    const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
+    const float* boxesPtr = boxes.data<float>();
+
+    const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
+    const ov::Tensor& indices = infResult.outputsData[indicesOutputName];
+
+    const int* indicesData = indices.data<int>();
+    const auto indicesShape = indices.get_shape();
+    const auto boxShape = boxes.get_shape();
+
+    // Generate detection results
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
+    size_t indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
+
+    for (size_t i = 0; i < numberOfBoxes; ++i) {
+        int batchInd = indicesData[i * indicesStride];
+        int classInd = indicesData[i * indicesStride + 1];
+        int boxInd = indicesData[i * indicesStride + 2];
+
+        if (batchInd == -1) {
+            break;
+        }
+
+        float score = getScore(scores, classInd, boxInd);
+
+        if (score > confidenceThreshold) {
+            DetectedObject obj;
+            size_t startPos = boxShape[2] * boxInd;
+
+            auto x = boxesPtr[startPos + 1];
+            auto y = boxesPtr[startPos];
+            auto width = boxesPtr[startPos + 3] - x;
+            auto height = boxesPtr[startPos + 2] - y;
+
+            // Create new detected box
+            obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
+            obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
+            obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
+            obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
+            obj.confidence = score;
+            obj.labelID = classInd;
+            obj.label = getLabelName(classInd);
+
+            result->objects.push_back(obj);
+
+        }
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
+++ b/engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
@@ -0,0 +1,194 @@
+/*
+// Copyright (C) 2022-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolox.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+#include "utils/nms.hpp"
+
+ModelYoloX::ModelYoloX(const std::string& modelFileName,
+                                 float confidenceThreshold,
+                                 float boxIOUThreshold,
+                                 const std::vector<std::string>& labels,
+                                 const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout),
+      boxIOUThreshold(boxIOUThreshold) {
+        resizeMode = RESIZE_KEEP_ASPECT;
+}
+
+void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1) {
+        throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input");
+    }
+
+    //--- Check image input
+    const auto& input = model->input();
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(input);
+
+    if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("Expected 4D image input with 3 channels");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    //--- Reading image input parameters
+    inputsNames.push_back(input.get_any_name());
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+    setStridesGrids();
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("YoloX model wrapper expects models that have only 1 output");
+    }
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+    const ov::Shape& shape = output.get_shape();
+
+    if (shape.size() != 3) {
+        throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size()));
+    }
+    ppp.output().tensor().set_element_type(ov::element::f32);
+
+    model = ppp.build();
+}
+
+void ModelYoloX::setStridesGrids() {
+    std::vector<size_t> strides = {8, 16, 32};
+    std::vector<size_t> hsizes(3);
+    std::vector<size_t> wsizes(3);
+
+    for (size_t i = 0; i < strides.size(); ++i) {
+        hsizes[i] = netInputHeight / strides[i];
+        wsizes[i] = netInputWidth / strides[i];
+    }
+
+    for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) {
+        for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) {
+            for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) {
+                grids.emplace_back(w_index, h_index);
+                expandedStrides.push_back(strides[size_index]);
+            }
+        }
+    }
+}
+
+std::shared_ptr<InternalModelData> ModelYoloX::preprocess(const InputData& inputData,
+                                                          ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    float scale = std::min(static_cast<float>(netInputWidth) / origImg.cols,
+                           static_cast<float>(netInputHeight) / origImg.rows);
+
+    cv::Mat resizedImage = resizeImageExt(origImg, netInputWidth, netInputHeight, resizeMode,
+                                          interpolationMode, nullptr, cv::Scalar(114, 114, 114));
+
+    request.set_input_tensor(wrapMat2Tensor(resizedImage));
+    return std::make_shared<InternalScaleData>(origImg.cols, origImg.rows, scale, scale);
+}
+
+std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult) {
+    // Get metadata about input image shape and scale
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+
+    // Get output tensor
+    const ov::Tensor& output = infResult.outputsData[outputsNames[0]];
+    const auto& outputShape = output.get_shape();
+    float* outputPtr = output.data<float>();
+
+    // Generate detection results
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+    // Update coordinates according to strides
+    for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+        size_t startPos = outputShape[2] * box_index;
+        outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index];
+        outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index];
+        outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index];
+        outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index];
+    }
+
+    // Filter predictions
+    std::vector<Anchor> validBoxes;
+    std::vector<float> scores;
+    std::vector<size_t> classes;
+    for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+        size_t startPos = outputShape[2] * box_index;
+        float score = outputPtr[startPos + 4];
+        if (score < confidenceThreshold)
+            continue;
+        float maxClassScore = -1;
+        size_t mainClass = 0;
+        for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) {
+            if (outputPtr[startPos + 5 + class_index] > maxClassScore) {
+                maxClassScore = outputPtr[startPos + 5 + class_index];
+                mainClass = class_index;
+            }
+        }
+
+        // Filter by score
+        score *= maxClassScore;
+        if (score < confidenceThreshold)
+            continue;
+
+        // Add successful boxes
+        scores.push_back(score);
+        classes.push_back(mainClass);
+        Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2,
+                          outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2};
+        validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, trueBox.top / scale.scaleY,
+                                     trueBox.right / scale.scaleX, trueBox.bottom / scale.scaleY}));
+    }
+
+    // NMS for valid boxes
+    std::vector<int> keep = nms(validBoxes, scores, boxIOUThreshold, true);
+    for (auto& index: keep) {
+        // Create new detected box
+        DetectedObject obj;
+        obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+        obj.y = clamp(validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+        obj.height = clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+        obj.width = clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+        obj.confidence = scores[index];
+        obj.labelID = classes[index];
+        obj.label = getLabelName(classes[index]);
+        result->objects.push_back(obj);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/hpe_model_associative_embedding.cpp
+++ b/engines/OpenVINOEngine/src/models/hpe_model_associative_embedding.cpp
@@ -0,0 +1,264 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_associative_embedding.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/associative_embedding_decoder.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128);
+const float HpeAssociativeEmbedding::detectionThreshold = 0.1f;
+const float HpeAssociativeEmbedding::tagThreshold = 1.0f;
+
+HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName,
+                                                 double aspectRatio,
+                                                 int targetSize,
+                                                 float confidenceThreshold,
+                                                 const std::string& layout,
+                                                 float delta,
+                                                 RESIZE_MODE resizeMode)
+    : ImageModel(modelFileName, false, layout),
+      aspectRatio(aspectRatio),
+      targetSize(targetSize),
+      confidenceThreshold(confidenceThreshold),
+      delta(delta) {
+        this->resizeMode = resizeMode;
+        interpolationMode = cv::INTER_CUBIC;
+      }
+
+void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input Tensors ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("HPE AE model wrapper supports topologies with only 1 input.");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output Tensors -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 2 && outputs.size() != 3) {
+        throw std::logic_error("HPE AE model model wrapper supports topologies only with 2 or 3 outputs");
+    }
+
+    for (const auto& output : model->outputs()) {
+        const auto& outTensorName = output.get_any_name();
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32);
+
+        for (const auto& name : output.get_names()) {
+            outputsNames.push_back(name);
+        }
+
+        const ov::Shape& outputShape = output.get_shape();
+        if (outputShape.size() != 4 && outputShape.size() != 5) {
+            throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional");
+        }
+        if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) {
+            throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels");
+        }
+    }
+    model = ppp.build();
+
+    embeddingsTensorName = findTensorByName("embeddings", outputsNames);
+    heatmapsTensorName = findTensorByName("heatmaps", outputsNames);
+    try {
+        nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames);
+    } catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; }
+
+    changeInputSize(model);
+}
+
+void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->input());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (!targetSize) {
+        targetSize = static_cast<int>(std::min(inputShape[heightId], inputShape[widthId]));
+    }
+    int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast<int>(std::round(targetSize / aspectRatio));
+    int inputWidth = aspectRatio >= 1.0 ? static_cast<int>(std::round(targetSize * aspectRatio)) : targetSize;
+    int height = static_cast<int>((inputHeight + stride - 1) / stride) * stride;
+    int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+    inputShape[batchId] = 1;
+    inputShape[heightId] = height;
+    inputShape[widthId] = width;
+    inputLayerSize = cv::Size(width, height);
+
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HpeAssociativeEmbedding::preprocess(const InputData& inputData,
+                                                                       ov::InferRequest& request) {
+    auto& image = inputData.asRef<ImageInputData>().inputImage;
+    cv::Rect roi;
+    auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+    if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+    request.set_input_tensor(wrapMat2Tensor(paddedImage));
+
+    return std::make_shared<InternalScaleData>(paddedImage.cols,
+                                               paddedImage.rows,
+                                               image.size().width / static_cast<float>(roi.width),
+                                               image.size().height / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) {
+    HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+    const auto& aembds = infResult.outputsData[embeddingsTensorName];
+    const ov::Shape& aembdsShape = aembds.get_shape();
+    float* const aembdsMapped = aembds.data<float>();
+    std::vector<cv::Mat> aembdsMaps = split(aembdsMapped, aembdsShape);
+
+    const auto& heats = infResult.outputsData[heatmapsTensorName];
+    const ov::Shape& heatMapsShape = heats.get_shape();
+    float* const heatMapsMapped = heats.data<float>();
+    std::vector<cv::Mat> heatMaps = split(heatMapsMapped, heatMapsShape);
+
+    std::vector<cv::Mat> nmsHeatMaps = heatMaps;
+    if (nmsHeatmapsTensorName != heatmapsTensorName) {
+        const auto& nmsHeats = infResult.outputsData[nmsHeatmapsTensorName];
+        const ov::Shape& nmsHeatMapsShape = nmsHeats.get_shape();
+        float* const nmsHeatMapsMapped = nmsHeats.data<float>();
+        nmsHeatMaps = split(nmsHeatMapsMapped, nmsHeatMapsShape);
+    }
+    std::vector<HumanPose> poses = extractPoses(heatMaps, aembdsMaps, nmsHeatMaps);
+
+    // Rescale poses to the original image
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+    const float outputScale = inputLayerSize.width / static_cast<float>(heatMapsShape[3]);
+    float shiftX = 0.0, shiftY = 0.0;
+    float scaleX = 1.0, scaleY = 1.0;
+
+    if (resizeMode == RESIZE_KEEP_ASPECT_LETTERBOX) {
+        scaleX = scaleY = std::min(scale.scaleX, scale.scaleY);
+        if (aspectRatio >= 1.0)
+            shiftX = static_cast<float>((targetSize * scaleX * aspectRatio - scale.inputImgWidth * scaleX) / 2);
+        else
+            shiftY = static_cast<float>((targetSize * scaleY / aspectRatio - scale.inputImgHeight * scaleY) / 2);
+        scaleX = scaleY *= outputScale;
+    } else {
+        scaleX = scale.scaleX * outputScale;
+        scaleY = scale.scaleY * outputScale;
+    }
+
+    for (auto& pose : poses) {
+        for (auto& keypoint : pose.keypoints) {
+            if (keypoint != cv::Point2f(-1, -1)) {
+                keypoint.x = keypoint.x * scaleX + shiftX;
+                keypoint.y = keypoint.y * scaleY + shiftY;
+            }
+        }
+        result->poses.push_back(pose);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName,
+                                                      const std::vector<std::string>& outputsNames) {
+    std::vector<std::string> suitableLayers;
+    for (auto& outputName : outputsNames) {
+        if (outputName.rfind(tensorName, 0) == 0) {
+            suitableLayers.push_back(outputName);
+        }
+    }
+    if (suitableLayers.empty()) {
+        throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found");
+    } else if (suitableLayers.size() > 1) {
+        throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output");
+    }
+    return suitableLayers[0];
+}
+
+std::vector<cv::Mat> HpeAssociativeEmbedding::split(float* data, const ov::Shape& shape) {
+    std::vector<cv::Mat> flattenData(shape[1]);
+    for (size_t i = 0; i < flattenData.size(); i++) {
+        flattenData[i] = cv::Mat(shape[2], shape[3], CV_32FC1, data + i * shape[2] * shape[3]);
+    }
+    return flattenData;
+}
+
+std::vector<HumanPose> HpeAssociativeEmbedding::extractPoses(std::vector<cv::Mat>& heatMaps,
+                                                             const std::vector<cv::Mat>& aembdsMaps,
+                                                             const std::vector<cv::Mat>& nmsHeatMaps) const {
+    std::vector<std::vector<Peak>> allPeaks(numJoints);
+    for (int i = 0; i < numJoints; i++) {
+        findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold);
+    }
+    std::vector<Pose> allPoses = matchByTag(allPeaks, maxNumPeople, numJoints, tagThreshold);
+    // swap for all poses
+    for (auto& pose : allPoses) {
+        for (size_t j = 0; j < numJoints; j++) {
+            Peak& peak = pose.getPeak(j);
+            std::swap(peak.keypoint.x, peak.keypoint.y);
+        }
+    }
+    std::vector<HumanPose> poses;
+    for (size_t i = 0; i < allPoses.size(); i++) {
+        Pose& pose = allPoses[i];
+        // Filtering poses with low mean scores
+        if (pose.getMeanScore() <= confidenceThreshold) {
+            continue;
+        }
+        for (size_t j = 0; j < heatMaps.size(); j++) {
+            heatMaps[j] = cv::abs(heatMaps[j]);
+        }
+        adjustAndRefine(allPoses, heatMaps, aembdsMaps, i, delta);
+        std::vector<cv::Point2f> keypoints;
+        for (size_t j = 0; j < numJoints; j++) {
+            Peak& peak = pose.getPeak(j);
+            keypoints.push_back(peak.keypoint);
+        }
+        poses.push_back({keypoints, pose.getMeanScore()});
+    }
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
+++ b/engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
@@ -0,0 +1,256 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_openpose.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/openpose_decoder.h"
+#include "models/results.h"
+
+const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128);
+const float HPEOpenPose::minPeaksDistance = 3.0f;
+const float HPEOpenPose::midPointsScoreThreshold = 0.05f;
+const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f;
+const float HPEOpenPose::minSubsetScore = 0.2f;
+
+HPEOpenPose::HPEOpenPose(const std::string& modelFileName,
+                         double aspectRatio,
+                         int targetSize,
+                         float confidenceThreshold,
+                         const std::string& layout)
+    : ImageModel(modelFileName, false, layout),
+      aspectRatio(aspectRatio),
+      targetSize(targetSize),
+      confidenceThreshold(confidenceThreshold) {
+        resizeMode = RESIZE_KEEP_ASPECT;
+        interpolationMode = cv::INTER_CUBIC;
+      }
+
+void HPEOpenPose::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("HPE OpenPose model wrapper supports topologies with only 1 input");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3)
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 2) {
+        throw std::runtime_error("HPE OpenPose supports topologies with only 2 outputs");
+    }
+
+    const ov::Layout outputLayout("NCHW");
+    for (const auto& output : model->outputs()) {
+        const auto& outTensorName = output.get_any_name();
+        ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+        outputsNames.push_back(outTensorName);
+    }
+    model = ppp.build();
+
+    const size_t batchId = ov::layout::batch_idx(outputLayout);
+    const size_t channelsId = ov::layout::channels_idx(outputLayout);
+    const size_t widthId = ov::layout::width_idx(outputLayout);
+    const size_t heightId = ov::layout::height_idx(outputLayout);
+
+    ov::Shape heatmapsOutputShape = model->outputs().front().get_shape();
+    ov::Shape pafsOutputShape = model->outputs().back().get_shape();
+    if (heatmapsOutputShape[channelsId] > pafsOutputShape[channelsId]) {
+        std::swap(heatmapsOutputShape, pafsOutputShape);
+        std::swap(outputsNames[0], outputsNames[1]);
+    }
+
+    if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 ||
+        heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) {
+        throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) +
+                               "xHFMxWFM dimension of model's heatmap is expected");
+    }
+    if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 ||
+        pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) {
+        throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) +
+                               "xHFMxWFM dimension of model's output is expected");
+    }
+    if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] ||
+        pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) {
+        throw std::logic_error("output and heatmap are expected to have matching last two dimensions");
+    }
+
+    changeInputSize(model);
+}
+
+void HPEOpenPose::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (!targetSize) {
+        targetSize = inputShape[heightId];
+    }
+    int height = static_cast<int>((targetSize + stride - 1) / stride) * stride;
+    int inputWidth = static_cast<int>(std::round(targetSize * aspectRatio));
+    int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+    inputShape[batchId] = 1;
+    inputShape[heightId] = height;
+    inputShape[widthId] = width;
+    inputLayerSize = cv::Size(width, height);
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    auto& image = inputData.asRef<ImageInputData>().inputImage;
+    cv::Rect roi;
+    auto paddedImage =
+        resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+    if (inputLayerSize.width < roi.width)
+        throw std::runtime_error("The image aspect ratio doesn't fit current model shape");
+
+    if (inputLayerSize.width - stride >= roi.width) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+
+    request.set_input_tensor(wrapMat2Tensor(paddedImage));
+    return std::make_shared<InternalScaleData>(paddedImage.cols,
+                                               paddedImage.rows,
+                                               image.cols / static_cast<float>(roi.width),
+                                               image.rows / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HPEOpenPose::postprocess(InferenceResult& infResult) {
+    HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+    auto& heatMapsMapped = infResult.outputsData[outputsNames[0]];
+    auto& outputMapped = infResult.outputsData[outputsNames[1]];
+
+    const ov::Shape& outputShape = outputMapped.get_shape();
+    const ov::Shape& heatMapShape = heatMapsMapped.get_shape();
+
+    float* const predictions = outputMapped.data<float>();
+    float* const heats = heatMapsMapped.data<float>();
+
+    std::vector<cv::Mat> heatMaps(keypointsNumber);
+    for (size_t i = 0; i < heatMaps.size(); i++) {
+        heatMaps[i] =
+            cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]);
+    }
+    resizeFeatureMaps(heatMaps);
+
+    std::vector<cv::Mat> pafs(outputShape[1]);
+    for (size_t i = 0; i < pafs.size(); i++) {
+        pafs[i] =
+            cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]);
+    }
+    resizeFeatureMaps(pafs);
+
+    std::vector<HumanPose> poses = extractPoses(heatMaps, pafs);
+
+    const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+    float scaleX = stride / upsampleRatio * scale.scaleX;
+    float scaleY = stride / upsampleRatio * scale.scaleY;
+    for (auto& pose : poses) {
+        for (auto& keypoint : pose.keypoints) {
+            if (keypoint != cv::Point2f(-1, -1)) {
+                keypoint.x *= scaleX;
+                keypoint.y *= scaleY;
+            }
+        }
+    }
+    for (size_t i = 0; i < poses.size(); ++i) {
+        result->poses.push_back(poses[i]);
+    }
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+void HPEOpenPose::resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const {
+    for (auto& featureMap : featureMaps) {
+        cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC);
+    }
+}
+
+class FindPeaksBody : public cv::ParallelLoopBody {
+public:
+    FindPeaksBody(const std::vector<cv::Mat>& heatMaps,
+                  float minPeaksDistance,
+                  std::vector<std::vector<Peak>>& peaksFromHeatMap,
+                  float confidenceThreshold)
+        : heatMaps(heatMaps),
+          minPeaksDistance(minPeaksDistance),
+          peaksFromHeatMap(peaksFromHeatMap),
+          confidenceThreshold(confidenceThreshold) {}
+
+    void operator()(const cv::Range& range) const override {
+        for (int i = range.start; i < range.end; i++) {
+            findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i, confidenceThreshold);
+        }
+    }
+
+private:
+    const std::vector<cv::Mat>& heatMaps;
+    float minPeaksDistance;
+    std::vector<std::vector<Peak>>& peaksFromHeatMap;
+    float confidenceThreshold;
+};
+
+std::vector<HumanPose> HPEOpenPose::extractPoses(const std::vector<cv::Mat>& heatMaps,
+                                                 const std::vector<cv::Mat>& pafs) const {
+    std::vector<std::vector<Peak>> peaksFromHeatMap(heatMaps.size());
+    FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold);
+    cv::parallel_for_(cv::Range(0, static_cast<int>(heatMaps.size())), findPeaksBody);
+    int peaksBefore = 0;
+    for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) {
+        peaksBefore += static_cast<int>(peaksFromHeatMap[heatmapId - 1].size());
+        for (auto& peak : peaksFromHeatMap[heatmapId]) {
+            peak.id += peaksBefore;
+        }
+    }
+    std::vector<HumanPose> poses = groupPeaksToPoses(peaksFromHeatMap,
+                                                     pafs,
+                                                     keypointsNumber,
+                                                     midPointsScoreThreshold,
+                                                     foundMidPointsRatioThreshold,
+                                                     minJointsNumber,
+                                                     minSubsetScore);
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/image_model.cpp
+++ b/engines/OpenVINOEngine/src/models/image_model.cpp
@@ -0,0 +1,62 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/image_model.h"
+
+#include <stdexcept>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+
+ImageModel::ImageModel() {
+
+}
+ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+    : ModelBase(modelFileName, layout),
+      useAutoResize(useAutoResize) {}
+
+std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+    auto img = inputTransform(origImg);
+
+    if (!useAutoResize) {
+        // /* Resize and copy data from the image to the input tensor */
+        const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]);  // first input should be image
+        const ov::Shape& tensorShape = frameTensor.get_shape();
+        const ov::Layout layout("NHWC");
+        const size_t width = tensorShape[ov::layout::width_idx(layout)];
+        const size_t height = tensorShape[ov::layout::height_idx(layout)];
+        const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+        if (static_cast<size_t>(img.channels()) != channels) {
+            throw std::runtime_error(std::string("The number of channels for model input: ") +
+                                     std::to_string(channels) + " and image: " +
+                                     std::to_string(img.channels()) + " - must match");
+        }
+        if (channels != 1 && channels != 3) {
+            throw std::runtime_error("Unsupported number of channels");
+        }
+        img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
+    }
+    request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+    return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
+}
--- a/engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
+++ b/engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
@@ -0,0 +1,167 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/image_model.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/jpeg_restoration_model.h"
+#include "models/results.h"
+
+JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName,
+                                           const cv::Size& inputImgSize,
+                                           bool _jpegCompression,
+                                           const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {
+    netInputHeight = inputImgSize.height;
+    netInputWidth = inputImgSize.width;
+    jpegCompression = _jpegCompression;
+}
+
+void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input  ------------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 input");
+    }
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    const ov::Layout& inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output");
+    }
+    const ov::Shape& outputShape = model->output().get_shape();
+    const ov::Layout outputLayout{"NCHW"};
+    if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+        outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's output is expected");
+    }
+
+    outputsNames.push_back(model->output().get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    changeInputSize(model);
+}
+
+void JPEGRestorationModel::changeInputSize(std::shared_ptr<ov::Model>& model) {
+    ov::Shape inputShape = model->input().get_shape();
+    const ov::Layout& layout = ov::layout::get_layout(model->input());
+
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    if (inputShape[heightId] % stride || inputShape[widthId] % stride) {
+        throw std::logic_error("The shape of the model input must be divisible by stride");
+    }
+
+    netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride;
+    netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride;
+
+    inputShape[batchId] = 1;
+    inputShape[heightId] = netInputHeight;
+    inputShape[widthId] = netInputWidth;
+
+    model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> JPEGRestorationModel::preprocess(const InputData& inputData,
+                                                                    ov::InferRequest& request) {
+    cv::Mat image = inputData.asRef<ImageInputData>().inputImage;
+    const size_t h = image.rows;
+    const size_t w = image.cols;
+    cv::Mat resizedImage;
+    if (jpegCompression) {
+        std::vector<uchar> encimg;
+        std::vector<int> params{cv::IMWRITE_JPEG_QUALITY, 40};
+        cv::imencode(".jpg", image, encimg, params);
+        image = cv::imdecode(cv::Mat(encimg), 3);
+    }
+
+    if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) {
+        int bottom = netInputHeight - h;
+        int right = netInputWidth - w;
+        cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0);
+    } else {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+        cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight));
+    }
+    request.set_input_tensor(wrapMat2Tensor(resizedImage));
+
+    return std::make_shared<InternalImageModelData>(image.cols, image.rows);
+}
+
+std::unique_ptr<ResultBase> JPEGRestorationModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    std::vector<cv::Mat> imgPlanes;
+    const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+    const size_t outHeight = static_cast<int>(outputShape[2]);
+    const size_t outWidth = static_cast<int>(outputShape[3]);
+    const size_t numOfPixels = outWidth * outHeight;
+    imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+
+    if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) &&
+        static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight &&
+        netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) &&
+        static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) {
+        result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+    } else {
+        cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+    }
+
+    result->resultImage.convertTo(result->resultImage, CV_8UC3, 255);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/model_base.cpp
+++ b/engines/OpenVINOEngine/src/models/model_base.cpp
@@ -0,0 +1,65 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/model_base.h"
+#include <utility>
+#include <openvino/openvino.hpp>
+#include <utils/common.hpp>
+#include <utils/config_factory.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+std::shared_ptr<ov::Model> ModelBase::prepareModel(ov::Core& core) {
+    // --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
+    /** Read model **/
+    slog::info << "Reading model " << modelFileName << slog::endl;
+    std::shared_ptr<ov::Model> model = core.read_model(modelFileName);
+    logBasicModelInfo(model);
+    // -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes)
+    prepareInputsOutputs(model);
+    setBatch(model);
+
+    return model;
+}
+
+ov::CompiledModel ModelBase::compileModel(const ModelConfig& config, ov::Core& core) {
+    this->config = config;
+    auto model = prepareModel(core);
+    compiledModel = core.compile_model(model, config.deviceName, config.compiledModelConfig);
+    logCompiledModelInfo(compiledModel, modelFileName, config.deviceName);
+    return compiledModel;
+}
+
+ov::Layout ModelBase::getInputLayout(const ov::Output<ov::Node>& input) {
+    ov::Layout layout = ov::layout::get_layout(input);
+    if (layout.empty()) {
+        if (inputsLayouts.empty()) {
+            layout = getLayoutFromShape(input.get_partial_shape());
+            slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '"
+                       << input.get_any_name() << "' will be used." << slog::endl;
+        } else if (inputsLayouts.size() == 1) {
+            layout = inputsLayouts.begin()->second;
+        } else {
+            layout = inputsLayouts[input.get_any_name()];
+        }
+    }
+
+    return layout;
+}
+
+void ModelBase::setBatch(std::shared_ptr<ov::Model>& model) {
+    ov::set_batch(model, 1);
+}
--- a/engines/OpenVINOEngine/src/models/openpose_decoder.cpp
+++ b/engines/OpenVINOEngine/src/models/openpose_decoder.cpp
@@ -0,0 +1,345 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/openpose_decoder.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <utils/common.hpp>
+
+#include "models/results.h"
+
+Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {}
+
+HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber)
+    : peaksIndices(std::vector<int>(keypointsNumber, -1)),
+      nJoints(0),
+      score(0.0f) {}
+
+TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score)
+    : firstJointIdx(firstJointIdx),
+      secondJointIdx(secondJointIdx),
+      score(score) {}
+
+void findPeaks(const std::vector<cv::Mat>& heatMaps,
+               const float minPeaksDistance,
+               std::vector<std::vector<Peak>>& allPeaks,
+               int heatMapId,
+               float confidenceThreshold) {
+    std::vector<cv::Point> peaks;
+    const cv::Mat& heatMap = heatMaps[heatMapId];
+    const float* heatMapData = heatMap.ptr<float>();
+    size_t heatMapStep = heatMap.step1();
+    for (int y = -1; y < heatMap.rows + 1; y++) {
+        for (int x = -1; x < heatMap.cols + 1; x++) {
+            float val = 0;
+            if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) {
+                val = heatMapData[y * heatMapStep + x];
+                val = val >= confidenceThreshold ? val : 0;
+            }
+
+            float left_val = 0;
+            if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) {
+                left_val = heatMapData[y * heatMapStep + x + 1];
+                left_val = left_val >= confidenceThreshold ? left_val : 0;
+            }
+
+            float right_val = 0;
+            if (x > 0 && y >= 0 && y < heatMap.rows) {
+                right_val = heatMapData[y * heatMapStep + x - 1];
+                right_val = right_val >= confidenceThreshold ? right_val : 0;
+            }
+
+            float top_val = 0;
+            if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) {
+                top_val = heatMapData[(y + 1) * heatMapStep + x];
+                top_val = top_val >= confidenceThreshold ? top_val : 0;
+            }
+
+            float bottom_val = 0;
+            if (x >= 0 && y > 0 && x < heatMap.cols) {
+                bottom_val = heatMapData[(y - 1) * heatMapStep + x];
+                bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0;
+            }
+
+            if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) {
+                peaks.push_back(cv::Point(x, y));
+            }
+        }
+    }
+    std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) {
+        return a.x < b.x;
+    });
+    std::vector<bool> isActualPeak(peaks.size(), true);
+    int peakCounter = 0;
+    std::vector<Peak>& peaksWithScoreAndID = allPeaks[heatMapId];
+    for (size_t i = 0; i < peaks.size(); i++) {
+        if (isActualPeak[i]) {
+            for (size_t j = i + 1; j < peaks.size(); j++) {
+                if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) +
+                         (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) {
+                    isActualPeak[j] = false;
+                }
+            }
+            peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at<float>(peaks[i])));
+        }
+    }
+}
+
+std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
+                                         const std::vector<cv::Mat>& pafs,
+                                         const size_t keypointsNumber,
+                                         const float midPointsScoreThreshold,
+                                         const float foundMidPointsRatioThreshold,
+                                         const int minJointsNumber,
+                                         const float minSubsetScore) {
+    static const std::pair<int, int> limbIdsHeatmap[] = {{2, 3},
+                                                         {2, 6},
+                                                         {3, 4},
+                                                         {4, 5},
+                                                         {6, 7},
+                                                         {7, 8},
+                                                         {2, 9},
+                                                         {9, 10},
+                                                         {10, 11},
+                                                         {2, 12},
+                                                         {12, 13},
+                                                         {13, 14},
+                                                         {2, 1},
+                                                         {1, 15},
+                                                         {15, 17},
+                                                         {1, 16},
+                                                         {16, 18},
+                                                         {3, 17},
+                                                         {6, 18}};
+    static const std::pair<int, int> limbIdsPaf[] = {{31, 32},
+                                                     {39, 40},
+                                                     {33, 34},
+                                                     {35, 36},
+                                                     {41, 42},
+                                                     {43, 44},
+                                                     {19, 20},
+                                                     {21, 22},
+                                                     {23, 24},
+                                                     {25, 26},
+                                                     {27, 28},
+                                                     {29, 30},
+                                                     {47, 48},
+                                                     {49, 50},
+                                                     {53, 54},
+                                                     {51, 52},
+                                                     {55, 56},
+                                                     {37, 38},
+                                                     {45, 46}};
+
+    std::vector<Peak> candidates;
+    for (const auto& peaks : allPeaks) {
+        candidates.insert(candidates.end(), peaks.begin(), peaks.end());
+    }
+    std::vector<HumanPoseByPeaksIndices> subset(0, HumanPoseByPeaksIndices(keypointsNumber));
+    for (size_t k = 0; k < arraySize(limbIdsPaf); k++) {
+        std::vector<TwoJointsConnection> connections;
+        const int mapIdxOffset = keypointsNumber + 1;
+        std::pair<cv::Mat, cv::Mat> scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset],
+                                                pafs[limbIdsPaf[k].second - mapIdxOffset]};
+        const int idxJointA = limbIdsHeatmap[k].first - 1;
+        const int idxJointB = limbIdsHeatmap[k].second - 1;
+        const std::vector<Peak>& candA = allPeaks[idxJointA];
+        const std::vector<Peak>& candB = allPeaks[idxJointB];
+        const size_t nJointsA = candA.size();
+        const size_t nJointsB = candB.size();
+        if (nJointsA == 0 && nJointsB == 0) {
+            continue;
+        } else if (nJointsA == 0) {
+            for (size_t i = 0; i < nJointsB; i++) {
+                int num = 0;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointB] == candB[i].id) {
+                        num++;
+                        continue;
+                    }
+                }
+                if (num == 0) {
+                    HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+                    personKeypoints.peaksIndices[idxJointB] = candB[i].id;
+                    personKeypoints.nJoints = 1;
+                    personKeypoints.score = candB[i].score;
+                    subset.push_back(personKeypoints);
+                }
+            }
+            continue;
+        } else if (nJointsB == 0) {
+            for (size_t i = 0; i < nJointsA; i++) {
+                int num = 0;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == candA[i].id) {
+                        num++;
+                        continue;
+                    }
+                }
+                if (num == 0) {
+                    HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+                    personKeypoints.peaksIndices[idxJointA] = candA[i].id;
+                    personKeypoints.nJoints = 1;
+                    personKeypoints.score = candA[i].score;
+                    subset.push_back(personKeypoints);
+                }
+            }
+            continue;
+        }
+
+        std::vector<TwoJointsConnection> tempJointConnections;
+        for (size_t i = 0; i < nJointsA; i++) {
+            for (size_t j = 0; j < nJointsB; j++) {
+                cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5;
+                cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y));
+                cv::Point2f vec = candB[j].pos - candA[i].pos;
+                double norm_vec = cv::norm(vec);
+                if (norm_vec == 0) {
+                    continue;
+                }
+                vec /= norm_vec;
+                float score = vec.x * scoreMid.first.at<float>(mid) + vec.y * scoreMid.second.at<float>(mid);
+                int height_n = pafs[0].rows / 2;
+                float suc_ratio = 0.0f;
+                float mid_score = 0.0f;
+                const int mid_num = 10;
+                const float scoreThreshold = -100.0f;
+                if (score > scoreThreshold) {
+                    float p_sum = 0;
+                    int p_count = 0;
+                    cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1),
+                                    (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1));
+                    for (int n = 0; n < mid_num; n++) {
+                        cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width),
+                                           cvRound(candA[i].pos.y + n * step.height));
+                        cv::Point2f pred(scoreMid.first.at<float>(midPoint), scoreMid.second.at<float>(midPoint));
+                        score = vec.x * pred.x + vec.y * pred.y;
+                        if (score > midPointsScoreThreshold) {
+                            p_sum += score;
+                            p_count++;
+                        }
+                    }
+                    suc_ratio = static_cast<float>(p_count / mid_num);
+                    float ratio = p_count > 0 ? p_sum / p_count : 0.0f;
+                    mid_score = ratio + static_cast<float>(std::min(height_n / norm_vec - 1, 0.0));
+                }
+                if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) {
+                    tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score));
+                }
+            }
+        }
+        if (!tempJointConnections.empty()) {
+            std::sort(tempJointConnections.begin(),
+                      tempJointConnections.end(),
+                      [](const TwoJointsConnection& a, const TwoJointsConnection& b) {
+                          return (a.score > b.score);
+                      });
+        }
+        size_t num_limbs = std::min(nJointsA, nJointsB);
+        size_t cnt = 0;
+        std::vector<int> occurA(nJointsA, 0);
+        std::vector<int> occurB(nJointsB, 0);
+        for (size_t row = 0; row < tempJointConnections.size(); row++) {
+            if (cnt == num_limbs) {
+                break;
+            }
+            const int& indexA = tempJointConnections[row].firstJointIdx;
+            const int& indexB = tempJointConnections[row].secondJointIdx;
+            const float& score = tempJointConnections[row].score;
+            if (occurA[indexA] == 0 && occurB[indexB] == 0) {
+                connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score));
+                cnt++;
+                occurA[indexA] = 1;
+                occurB[indexB] = 1;
+            }
+        }
+        if (connections.empty()) {
+            continue;
+        }
+
+        bool extraJointConnections = (k == 17 || k == 18);
+        if (k == 0) {
+            subset = std::vector<HumanPoseByPeaksIndices>(connections.size(), HumanPoseByPeaksIndices(keypointsNumber));
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                subset[i].peaksIndices[idxJointA] = indexA;
+                subset[i].peaksIndices[idxJointB] = indexB;
+                subset[i].nJoints = 2;
+                subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+            }
+        } else if (extraJointConnections) {
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) {
+                        subset[j].peaksIndices[idxJointB] = indexB;
+                    } else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) {
+                        subset[j].peaksIndices[idxJointA] = indexA;
+                    }
+                }
+            }
+            continue;
+        } else {
+            for (size_t i = 0; i < connections.size(); i++) {
+                const int& indexA = connections[i].firstJointIdx;
+                const int& indexB = connections[i].secondJointIdx;
+                bool num = false;
+                for (size_t j = 0; j < subset.size(); j++) {
+                    if (subset[j].peaksIndices[idxJointA] == indexA) {
+                        subset[j].peaksIndices[idxJointB] = indexB;
+                        subset[j].nJoints++;
+                        subset[j].score += candidates[indexB].score + connections[i].score;
+                        num = true;
+                    }
+                }
+                if (!num) {
+                    HumanPoseByPeaksIndices hpWithScore(keypointsNumber);
+                    hpWithScore.peaksIndices[idxJointA] = indexA;
+                    hpWithScore.peaksIndices[idxJointB] = indexB;
+                    hpWithScore.nJoints = 2;
+                    hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+                    subset.push_back(hpWithScore);
+                }
+            }
+        }
+    }
+    std::vector<HumanPose> poses;
+    for (const auto& subsetI : subset) {
+        if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) {
+            continue;
+        }
+        int position = -1;
+        HumanPose pose{std::vector<cv::Point2f>(keypointsNumber, cv::Point2f(-1.0f, -1.0f)),
+                       subsetI.score * std::max(0, subsetI.nJoints - 1)};
+        for (const auto& peakIdx : subsetI.peaksIndices) {
+            position++;
+            if (peakIdx >= 0) {
+                pose.keypoints[position] = candidates[peakIdx].pos;
+                pose.keypoints[position].x += 0.5;
+                pose.keypoints[position].y += 0.5;
+            }
+        }
+        poses.push_back(pose);
+    }
+    return poses;
+}
--- a/engines/OpenVINOEngine/src/models/segmentation_model.cpp
+++ b/engines/OpenVINOEngine/src/models/segmentation_model.cpp
@@ -0,0 +1,160 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/segmentation_model.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+    : ImageModel(modelFileName, useAutoResize, layout) {}
+
+std::vector<std::string> SegmentationModel::loadLabels(const std::string& labelFilename) {
+    std::vector<std::string> labelsList;
+
+    /* Read labels (if any) */
+    if (!labelFilename.empty()) {
+        std::ifstream inputFile(labelFilename);
+        if (!inputFile.is_open())
+            throw std::runtime_error("Can't open the labels file: " + labelFilename);
+        std::string label;
+        while (std::getline(inputFile, label)) {
+            labelsList.push_back(label);
+        }
+        if (labelsList.empty())
+            throw std::logic_error("File is empty: " + labelFilename);
+    }
+
+    return labelsList;
+}
+
+void SegmentationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input  -----------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input");
+    }
+    const auto& input = model->input();
+    inputsNames.push_back(input.get_any_name());
+
+    const ov::Layout& inputLayout = getInputLayout(input);
+    const ov::Shape& inputShape = input.get_shape();
+    if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    inputTransform.setPrecision(ppp, model->input().get_any_name());
+    ppp.input().tensor().set_layout("NHWC");
+
+    if (useAutoResize) {
+        ppp.input().tensor().set_spatial_dynamic_shape();
+
+        ppp.input()
+            .preprocess()
+            .convert_element_type(ov::element::f32)
+            .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+    }
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() != 1) {
+        throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
+    }
+
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+
+    const ov::Shape& outputShape = output.get_partial_shape().get_max_shape();
+
+    ov::Layout outputLayout = getLayoutFromShape(outputShape);
+    outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]);
+    outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]);
+
+    ppp.output().model().set_layout(outputLayout);
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    if (ov::layout::has_channels(outputLayout)) {
+        ppp.output().tensor().set_layout("NCHW");
+        outChannels = static_cast<int>(outputShape[ov::layout::channels_idx(outputLayout)]);
+    } else {
+        // deeplabv3
+        ppp.output().tensor().set_layout("NHW");
+        outChannels = 1;
+    }
+    model = ppp.build();
+}
+
+std::unique_ptr<ResultBase> SegmentationModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData);
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto& outTensor = infResult.getFirstOutputTensor();
+
+    result->resultImage = cv::Mat(outHeight, outWidth, CV_8UC1);
+
+    if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) {
+        cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data<int32_t>());
+        predictions.convertTo(result->resultImage, CV_8UC1);
+    } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) {
+        cv::Mat predictions(outHeight, outWidth, CV_32SC1);
+        const auto data = outTensor.data<int64_t>();
+        for (size_t i = 0; i < predictions.total(); ++i) {
+            reinterpret_cast<int32_t*>(predictions.data)[i] = int32_t(data[i]);
+        }
+        predictions.convertTo(result->resultImage, CV_8UC1);
+    } else if (outTensor.get_element_type() == ov::element::f32) {
+        const float* data = outTensor.data<float>();
+        for (int rowId = 0; rowId < outHeight; ++rowId) {
+            for (int colId = 0; colId < outWidth; ++colId) {
+                int classId = 0;
+                float maxProb = -1.0f;
+                for (int chId = 0; chId < outChannels; ++chId) {
+                    float prob = data[chId * outHeight * outWidth + rowId * outWidth + colId];
+                    if (prob > maxProb) {
+                        classId = chId;
+                        maxProb = prob;
+                    }
+                }  // nChannels
+
+                result->resultImage.at<uint8_t>(rowId, colId) = classId;
+            }  // width
+        }  // height
+    }
+
+    cv::resize(result->resultImage,
+               result->resultImage,
+               cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight),
+               0,
+               0,
+               cv::INTER_NEAREST);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/style_transfer_model.cpp
+++ b/engines/OpenVINOEngine/src/models/style_transfer_model.cpp
@@ -0,0 +1,107 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/style_transfer_model.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {}
+
+void StyleTransferModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    if (model->inputs().size() != 1) {
+        throw std::logic_error("Style transfer model wrapper supports topologies with only 1 input");
+    }
+
+    inputsNames.push_back(model->input().get_any_name());
+
+    const ov::Shape& inputShape = model->input().get_shape();
+    ov::Layout inputLayout = getInputLayout(model->input());
+
+    if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+        inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's input is expected");
+    }
+
+    netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+    netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.input().preprocess().convert_element_type(ov::element::f32);
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+    ppp.input().model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Style transfer model wrapper supports topologies with only 1 output");
+    }
+    outputsNames.push_back(model->output().get_any_name());
+
+    const ov::Shape& outputShape = model->output().get_shape();
+    ov::Layout outputLayout{"NCHW"};
+    if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+        outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+        throw std::logic_error("3-channel 4-dimensional model's output is expected");
+    }
+
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+}
+
+std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+    const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+    size_t outHeight = static_cast<int>(outputShape[2]);
+    size_t outWidth = static_cast<int>(outputShape[3]);
+    size_t numOfPixels = outWidth * outHeight;
+
+    std::vector<cv::Mat> imgPlanes;
+    imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                     cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+    cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+
+    result->resultImage.convertTo(result->resultImage, CV_8UC3);
+
+    return std::unique_ptr<ResultBase>(result);
+}
--- a/engines/OpenVINOEngine/src/models/super_resolution_model.cpp
+++ b/engines/OpenVINOEngine/src/models/super_resolution_model.cpp
@@ -0,0 +1,310 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/super_resolution_model.h"
+
+#include <stddef.h>
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName,
+                                           const cv::Size& inputImgSize,
+                                           const std::string& layout)
+    : ImageModel(modelFileName, false, layout) {
+    netInputHeight = inputImgSize.height;
+    netInputWidth = inputImgSize.width;
+}
+
+void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1 && inputs.size() != 2) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only");
+    }
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    inputsNames.push_back(lrInputTensorName);
+    ov::Shape lrShape = inputs.begin()->get_shape();
+    if (lrShape.size() != 4) {
+        throw std::logic_error("Number of dimensions for an input must be 4");
+    }
+    // in case of 2 inputs they have the same layouts
+    ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+    auto channelsId = ov::layout::channels_idx(inputLayout);
+    auto heightId = ov::layout::height_idx(inputLayout);
+    auto widthId = ov::layout::width_idx(inputLayout);
+
+    if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) {
+        throw std::logic_error("Input layer is expected to have 1 or 3 channels");
+    }
+
+    // A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the
+    // second input
+    std::string bicInputTensorName;
+    if (inputs.size() == 2) {
+        bicInputTensorName = (++inputs.begin())->get_any_name();
+        inputsNames.push_back(bicInputTensorName);
+        ov::Shape bicShape = (++inputs.begin())->get_shape();
+        if (bicShape.size() != 4) {
+            throw std::logic_error("Number of dimensions for both inputs must be 4");
+        }
+        if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) {
+            std::swap(bicShape, lrShape);
+            inputsNames[0].swap(inputsNames[1]);
+        } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) {
+            throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial"
+                                   "dimension of another input");
+        }
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    for (const auto& input : inputs) {
+        inputTransform.setPrecision(ppp, input.get_any_name());
+        ppp.input(input.get_any_name()).tensor().set_layout("NHWC");
+        ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+    }
+
+    // --------------------------- Prepare output -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+    }
+
+    outputsNames.push_back(outputs.begin()->get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    const ov::Shape& outShape = model->output().get_shape();
+
+    const ov::Layout outputLayout("NCHW");
+    const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+    const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+    changeInputSize(model, static_cast<int>(outWidth / inWidth));
+}
+
+void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) {
+    std::map<std::string, ov::PartialShape> shapes;
+    const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+    const auto batchId = ov::layout::batch_idx(layout);
+    const auto heightId = ov::layout::height_idx(layout);
+    const auto widthId = ov::layout::width_idx(layout);
+
+    const ov::OutputVector& inputs = model->inputs();
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    ov::Shape lrShape = inputs.begin()->get_shape();
+
+    if (inputs.size() == 2) {
+        std::string bicInputTensorName = (++inputs.begin())->get_any_name();
+        ov::Shape bicShape = (++inputs.begin())->get_shape();
+        if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) {
+            std::swap(bicShape, lrShape);
+            std::swap(bicInputTensorName, lrInputTensorName);
+        }
+        bicShape[batchId] = 1;
+        bicShape[heightId] = coeff * netInputHeight;
+        bicShape[widthId] = coeff * netInputWidth;
+        shapes[bicInputTensorName] = ov::PartialShape(bicShape);
+    }
+
+    lrShape[batchId] = 1;
+    lrShape[heightId] = netInputHeight;
+    lrShape[widthId] = netInputWidth;
+    shapes[lrInputTensorName] = ov::PartialShape(lrShape);
+
+    model->reshape(shapes);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData,
+                                                                    ov::InferRequest& request) {
+    auto imgData = inputData.asRef<ImageInputData>();
+    auto img = inputTransform(imgData.inputImage);
+    const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+    const ov::Layout layout("NHWC");
+
+    if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) {
+        cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
+    }
+
+    if (static_cast<size_t>(img.cols) != netInputWidth || 
+        static_cast<size_t>(img.rows) != netInputHeight) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+    const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+    const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+    img = resizeImageExt(img, width, height);
+    request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+
+    if (inputsNames.size() == 2) {
+        const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]);
+        const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]);
+        const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]);
+        cv::Mat resized;
+        cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC);
+        request.set_tensor(inputsNames[1], wrapMat2Tensor(resized));
+    }
+
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    std::vector<cv::Mat> imgPlanes;
+    const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+    const size_t outChannels = static_cast<int>(outShape[1]);
+    const size_t outHeight = static_cast<int>(outShape[2]);
+    const size_t outWidth = static_cast<int>(outShape[3]);
+    const size_t numOfPixels = outWidth * outHeight;
+    if (outChannels == 3) {
+        imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+                                         cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+                                         cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+    } else {
+        imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+        // Post-processing for text-image-super-resolution models
+        cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY);
+    }
+
+    for (auto& img : imgPlanes) {
+        img.convertTo(img, CV_8UC1, 255);
+    }
+    cv::Mat resultImg;
+    cv::merge(imgPlanes, resultImg);
+    result->resultImage = resultImg;
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionChannelJoint::postprocess(InferenceResult& infResult) {
+    ImageResult* result = new ImageResult;
+    *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+    const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+    const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+
+    const size_t outHeight = static_cast<int>(outShape[2]);
+    const size_t outWidth = static_cast<int>(outShape[3]);
+
+    std::vector<cv::Mat> channels;
+    for (int i = 0; i < 3; ++i) {
+        channels.emplace_back(outHeight, outWidth, CV_32FC1, &(outputData[0]) + i * outHeight * outWidth);
+    }
+    cv::Mat resultImg(outHeight, outWidth, CV_32FC3);
+    cv::merge(channels, resultImg);
+
+    resultImg.convertTo(resultImg, CV_8UC3);
+    result->resultImage = resultImg;
+
+    return std::unique_ptr<ResultBase>(result);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionChannelJoint::preprocess(const InputData& inputData,
+                                                                           ov::InferRequest& request) {
+    auto imgData = inputData.asRef<ImageInputData>();
+    auto& img = imgData.inputImage;
+
+    const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+    const ov::Layout layout("NCHW");
+
+    if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) {
+        slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+    }
+
+    const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+    const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+
+    std::vector<cv::Mat> channels;
+    cv::split(img, channels);
+
+    ov::Tensor tensor(ov::element::u8, {3, 1, height, width});
+    for (int i = 0; i < 3; ++i) {
+        matToTensor(channels[i], tensor, i);
+    }
+
+    request.set_tensor(inputsNames[0], tensor);
+
+    return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+void SuperResolutionChannelJoint::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output ---------------------------------------------
+    // --------------------------- Prepare input --------------------------------------------------
+    const ov::OutputVector& inputs = model->inputs();
+    if (inputs.size() != 1) {
+        throw std::logic_error("Super resolution channel joint model wrapper supports topologies with 1 inputs only");
+    }
+    std::string lrInputTensorName = inputs.begin()->get_any_name();
+    inputsNames.push_back(lrInputTensorName);
+    ov::Shape lrShape = inputs.begin()->get_shape();
+    if (lrShape.size() != 4) {
+        throw std::logic_error("Number of dimensions for an input must be 4");
+    }
+    // in case of 2 inputs they have the same layouts
+    ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+    auto channelsId = ov::layout::channels_idx(inputLayout);
+
+    if (lrShape[channelsId] != 1) {
+        throw std::logic_error("Input layer is expected to have 1 channel");
+    }
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    const auto& input = inputs.front();
+    ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NCHW");
+    ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+
+    // --------------------------- Prepare output -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 1) {
+        throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+    }
+
+    outputsNames.push_back(outputs.begin()->get_any_name());
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    model = ppp.build();
+
+    const ov::Shape& outShape = model->output().get_shape();
+
+    const ov::Layout outputLayout("NCHW");
+    const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+    const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+
+    changeInputSize(model, static_cast<int>(outWidth / inWidth));
+
+    ov::set_batch(model, 3);
+}
+
+void SuperResolutionChannelJoint::setBatch(std::shared_ptr<ov::Model>& model) {
+    ov::set_batch(model, 3);
+}
--- a/engines/OpenVINOEngine/src/monitors/cpu_monitor.cpp
+++ b/engines/OpenVINOEngine/src/monitors/cpu_monitor.cpp
@@ -0,0 +1,206 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "monitors/cpu_monitor.h"
+
+#include <algorithm>
+#ifdef _WIN32
+#include "query_wrapper.h"
+#include <string>
+#include <system_error>
+#include <PdhMsg.h>
+#include <Windows.h>
+
+namespace {
+const std::size_t nCores = []() {
+        SYSTEM_INFO sysinfo;
+        GetSystemInfo(&sysinfo);
+        return sysinfo.dwNumberOfProcessors;
+    }();
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+    PerformanceCounter() : coreTimeCounters(nCores) {
+        PDH_STATUS status;
+        for (std::size_t i = 0; i < nCores; ++i) {
+            std::wstring fullCounterPath{L"\\Processor(" + std::to_wstring(i) + L")\\% Processor Time"};
+            status = PdhAddCounterW(query, fullCounterPath.c_str(), 0, &coreTimeCounters[i]);
+            if (ERROR_SUCCESS != status) {
+                throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
+            }
+            status = PdhSetCounterScaleFactor(coreTimeCounters[i], -2); // scale counter to [0, 1]
+            if (ERROR_SUCCESS != status) {
+                throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
+            }
+        }
+        status = PdhCollectQueryData(query);
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+        }
+    }
+
+    std::vector<double> getCpuLoad() {
+        PDH_STATUS status;
+        status = PdhCollectQueryData(query);
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+        }
+
+        PDH_FMT_COUNTERVALUE displayValue;
+        std::vector<double> cpuLoad(coreTimeCounters.size());
+        for (std::size_t i = 0; i < coreTimeCounters.size(); ++i) {
+            status = PdhGetFormattedCounterValue(coreTimeCounters[i], PDH_FMT_DOUBLE, NULL,
+                &displayValue);
+            switch (status) {
+                case ERROR_SUCCESS: break;
+                // PdhGetFormattedCounterValue() can sometimes return PDH_CALC_NEGATIVE_DENOMINATOR for some reason
+                case PDH_CALC_NEGATIVE_DENOMINATOR: return {};
+                default:
+                    throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
+            }
+            if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
+                throw std::runtime_error("Error in counter data");
+            }
+
+            cpuLoad[i] = displayValue.doubleValue;
+        }
+        return cpuLoad;
+    }
+
+private:
+    QueryWrapper query;
+    std::vector<PDH_HCOUNTER> coreTimeCounters;
+};
+
+#elif __linux__
+#include <chrono>
+#include <regex>
+#include <utility>
+#include <fstream>
+#include <unistd.h>
+
+namespace {
+const long clockTicks = sysconf(_SC_CLK_TCK);
+
+const std::size_t nCores = sysconf(_SC_NPROCESSORS_CONF);
+
+std::vector<unsigned long> getIdleCpuStat() {
+    std::vector<unsigned long> idleCpuStat(nCores);
+    std::ifstream procStat("/proc/stat");
+    std::string line;
+    std::smatch match;
+    std::regex coreJiffies("^cpu(\\d+)\\s+"
+        "(\\d+)\\s+"
+        "(\\d+)\\s+"
+        "(\\d+)\\s+"
+        "(\\d+)\\s+" // idle
+        "(\\d+)"); // iowait
+
+    while (std::getline(procStat, line)) {
+        if (std::regex_search(line, match, coreJiffies)) {
+            // it doesn't handle overflow of sum and overflows of /proc/stat values
+            unsigned long idleInfo = stoul(match[5]) + stoul(match[6]),
+                coreId = stoul(match[1]);
+            if (nCores <= coreId) {
+                throw std::runtime_error("The number of cores has changed");
+            }
+            idleCpuStat[coreId] = idleInfo;
+        }
+    }
+    return idleCpuStat;
+}
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+    PerformanceCounter() : prevIdleCpuStat{getIdleCpuStat()}, prevTimePoint{std::chrono::steady_clock::now()} {}
+
+    std::vector<double> getCpuLoad() {
+        std::vector<unsigned long> idleCpuStat = getIdleCpuStat();
+        auto timePoint = std::chrono::steady_clock::now();
+        // don't update data too frequently which may result in negative values for cpuLoad.
+        // It may happen when collectData() is called just after setHistorySize().
+        if (timePoint - prevTimePoint > std::chrono::milliseconds{100}) {
+            std::vector<double> cpuLoad(nCores);
+            for (std::size_t i = 0; i < idleCpuStat.size(); ++i) {
+                double idleDiff = idleCpuStat[i] - prevIdleCpuStat[i];
+                typedef std::chrono::duration<double, std::chrono::seconds::period> Sec;
+                cpuLoad[i] = 1.0
+                    - idleDiff / clockTicks / std::chrono::duration_cast<Sec>(timePoint - prevTimePoint).count();
+            }
+            prevIdleCpuStat = std::move(idleCpuStat);
+            prevTimePoint = timePoint;
+            return cpuLoad;
+        }
+        return {};
+    }
+private:
+    std::vector<unsigned long> prevIdleCpuStat;
+    std::chrono::steady_clock::time_point prevTimePoint;
+};
+
+#else
+// not implemented
+namespace {
+const std::size_t nCores{0};
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+    std::vector<double> getCpuLoad() {return {};};
+};
+#endif
+
+CpuMonitor::CpuMonitor() :
+    samplesNumber{0},
+    historySize{0},
+    cpuLoadSum(nCores, 0) {}
+
+// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
+CpuMonitor::~CpuMonitor() = default;
+
+void CpuMonitor::setHistorySize(std::size_t size) {
+    if (0 == historySize && 0 != size) {
+        performanceCounter.reset(new PerformanceCounter);
+    } else if (0 != historySize && 0 == size) {
+        performanceCounter.reset();
+    }
+    historySize = size;
+    std::ptrdiff_t newSize = static_cast<std::ptrdiff_t>(min(size, cpuLoadHistory.size()));
+    cpuLoadHistory.erase(cpuLoadHistory.begin(), cpuLoadHistory.end() - newSize);
+}
+
+void CpuMonitor::collectData() {
+    std::vector<double> cpuLoad = performanceCounter->getCpuLoad();
+
+    if (!cpuLoad.empty()) {
+        for (std::size_t i = 0; i < cpuLoad.size(); ++i) {
+            cpuLoadSum[i] += cpuLoad[i];
+        }
+        ++samplesNumber;
+
+        cpuLoadHistory.push_back(std::move(cpuLoad));
+        if (cpuLoadHistory.size() > historySize) {
+            cpuLoadHistory.pop_front();
+        }
+    }
+}
+
+std::size_t CpuMonitor::getHistorySize() const {
+    return historySize;
+}
+
+std::deque<std::vector<double>> CpuMonitor::getLastHistory() const {
+    return cpuLoadHistory;
+}
+
+std::vector<double> CpuMonitor::getMeanCpuLoad() const {
+    std::vector<double> meanCpuLoad;
+    meanCpuLoad.reserve(cpuLoadSum.size());
+    for (double coreLoad : cpuLoadSum) {
+        meanCpuLoad.push_back(samplesNumber ? coreLoad / samplesNumber : 0);
+    }
+    return meanCpuLoad;
+}
--- a/engines/OpenVINOEngine/src/monitors/memory_monitor.cpp
+++ b/engines/OpenVINOEngine/src/monitors/memory_monitor.cpp
@@ -0,0 +1,213 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "monitors/memory_monitor.h"
+
+struct MemState {
+    double memTotal, usedMem, usedSwap;
+};
+
+#ifdef _WIN32
+#include "query_wrapper.h"
+#include <algorithm>
+#define PSAPI_VERSION 2
+#include <system_error>
+#include <Windows.h>
+#include <PdhMsg.h>
+#include <Psapi.h>
+
+namespace {
+double getMemTotal() {
+    PERFORMANCE_INFORMATION performanceInformation;
+    if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
+        throw std::runtime_error("GetPerformanceInfo() failed");
+    }
+    return static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
+        / (1024 * 1024 * 1024);
+}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+    PerformanceCounter() {
+        PDH_STATUS status = PdhAddCounterW(query, L"\\Paging File(_Total)\\% Usage", 0, &pagingFileUsageCounter);
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
+        }
+        status = PdhSetCounterScaleFactor(pagingFileUsageCounter, -2); // scale counter to [0, 1]
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
+        }
+    }
+
+    MemState getMemState() {
+        PERFORMANCE_INFORMATION performanceInformation;
+        if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
+            throw std::runtime_error("GetPerformanceInfo() failed");
+        }
+
+        PDH_STATUS status;
+        status = PdhCollectQueryData(query);
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+        }
+        PDH_FMT_COUNTERVALUE displayValue;
+        status = PdhGetFormattedCounterValue(pagingFileUsageCounter, PDH_FMT_DOUBLE, NULL, &displayValue);
+        if (ERROR_SUCCESS != status) {
+            throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
+        }
+        if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
+            throw std::runtime_error("Error in counter data");
+        }
+
+        double pagingFilesSize = static_cast<double>(
+            (performanceInformation.CommitLimit - performanceInformation.PhysicalTotal)
+            * performanceInformation.PageSize) / (1024 * 1024 * 1024);
+        return {static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
+                / (1024 * 1024 * 1024),
+            static_cast<double>(
+                (performanceInformation.PhysicalTotal - performanceInformation.PhysicalAvailable)
+                * performanceInformation.PageSize) / (1024 * 1024 * 1024),
+            pagingFilesSize * displayValue.doubleValue};
+    }
+private:
+    QueryWrapper query;
+    PDH_HCOUNTER pagingFileUsageCounter;
+};
+
+#elif __linux__
+#include <fstream>
+#include <utility>
+#include <vector>
+#include <regex>
+
+namespace {
+std::pair<std::pair<double, double>, std::pair<double, double>> getAvailableMemSwapTotalMemSwap() {
+    double memAvailable = 0, swapFree = 0, memTotal = 0, swapTotal = 0;
+    std::regex memRegex("^(.+):\\s+(\\d+) kB$");
+    std::string line;
+    std::smatch match;
+    std::ifstream meminfo("/proc/meminfo");
+    while (std::getline(meminfo, line)) {
+        if (std::regex_match(line, match, memRegex)) {
+            if ("MemAvailable" == match[1]) {
+                memAvailable = stod(match[2]) / (1024 * 1024);
+            } else if ("SwapFree" == match[1]) {
+                swapFree = stod(match[2]) / (1024 * 1024);
+            } else if ("MemTotal" == match[1]) {
+                memTotal = stod(match[2]) / (1024 * 1024);
+            } else if ("SwapTotal" == match[1]) {
+                swapTotal = stod(match[2]) / (1024 * 1024);
+            }
+        }
+    }
+    if (0 == memTotal) {
+        throw std::runtime_error("Can't get MemTotal");
+    }
+    return {{memAvailable, swapFree}, {memTotal, swapTotal}};
+}
+
+double getMemTotal() {
+    return getAvailableMemSwapTotalMemSwap().second.first;
+}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+    MemState getMemState() {
+        std::pair<std::pair<double, double>, std::pair<double, double>> availableMemSwapTotalMemSwap
+            = getAvailableMemSwapTotalMemSwap();
+        double memTotal = availableMemSwapTotalMemSwap.second.first;
+        double swapTotal = availableMemSwapTotalMemSwap.second.second;
+        return {memTotal, memTotal - availableMemSwapTotalMemSwap.first.first, swapTotal - availableMemSwapTotalMemSwap.first.second};
+    }
+};
+
+#else
+// not implemented
+namespace {
+double getMemTotal() {return 0.0;}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+    MemState getMemState() {return {0.0, 0.0, 0.0};}
+};
+#endif
+
+MemoryMonitor::MemoryMonitor() :
+    samplesNumber{0},
+    historySize{0},
+    memSum{0.0},
+    swapSum{0.0},
+    maxMem{0.0},
+    maxSwap{0.0},
+    memTotal{0.0},
+    maxMemTotal{0.0} {}
+
+// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
+MemoryMonitor::~MemoryMonitor() = default;
+
+void MemoryMonitor::setHistorySize(std::size_t size) {
+    if (0 == historySize && 0 != size) {
+        performanceCounter.reset(new MemoryMonitor::PerformanceCounter);
+        // memTotal is not initialized in constructor because for linux its initialization involves constructing
+        // std::regex which is unimplemented and throws an exception for gcc 4.8.5 (default for CentOS 7.4).
+        // Delaying initialization triggers the error only when the monitor is used
+        // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53631
+        memTotal = ::getMemTotal();
+    } else if (0 != historySize && 0 == size) {
+        performanceCounter.reset();
+    }
+    historySize = size;
+    std::size_t newSize = min(size, memSwapUsageHistory.size());
+    memSwapUsageHistory.erase(memSwapUsageHistory.begin(), memSwapUsageHistory.end() - newSize);
+}
+
+void MemoryMonitor::collectData() {
+    MemState memState = performanceCounter->getMemState();
+    maxMemTotal = max(maxMemTotal, memState.memTotal);
+    memSum += memState.usedMem;
+    swapSum += memState.usedSwap;
+    ++samplesNumber;
+    maxMem = max(maxMem, memState.usedMem);
+    maxSwap = max(maxSwap, memState.usedSwap);
+
+    memSwapUsageHistory.emplace_back(memState.usedMem, memState.usedSwap);
+    if (memSwapUsageHistory.size() > historySize) {
+        memSwapUsageHistory.pop_front();
+    }
+}
+
+std::size_t MemoryMonitor::getHistorySize() const {
+    return historySize;
+}
+
+std::deque<std::pair<double, double>> MemoryMonitor::getLastHistory() const {
+    return memSwapUsageHistory;
+}
+
+double MemoryMonitor::getMeanMem() const {
+    return samplesNumber ? memSum / samplesNumber : 0;
+}
+
+double MemoryMonitor::getMeanSwap() const {
+    return samplesNumber ? swapSum / samplesNumber : 0;
+}
+
+double MemoryMonitor::getMaxMem() const {
+    return maxMem;
+}
+
+double MemoryMonitor::getMaxSwap() const {
+    return maxSwap;
+}
+
+double MemoryMonitor::getMemTotal() const {
+    return memTotal;
+}
+
+double MemoryMonitor::getMaxMemTotal() const {
+    return maxMemTotal;
+}
--- a/engines/OpenVINOEngine/src/monitors/presenter.cpp
+++ b/engines/OpenVINOEngine/src/monitors/presenter.cpp
@@ -0,0 +1,330 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cctype>
+#include <chrono>
+#include <iomanip>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "monitors/presenter.h"
+
+namespace {
+const std::map<int, MonitorType> keyToMonitorType{
+    {'C', MonitorType::CpuAverage},
+    {'D', MonitorType::DistributionCpu},
+    {'M', MonitorType::Memory}};
+
+std::set<MonitorType> strKeysToMonitorSet(const std::string& keys) {
+    std::set<MonitorType> enabledMonitors;
+    if (keys == "h") {
+        return enabledMonitors;
+    }
+    for (unsigned char key: keys) {
+        if (key == 'h') {
+            throw std::runtime_error("Unacceptable combination of monitor types-can't show and hide info at the same time");
+        }
+        auto iter = keyToMonitorType.find(std::toupper(key));
+        if (keyToMonitorType.end() == iter) {
+            throw std::runtime_error("Unknown monitor type");
+        } else {
+            enabledMonitors.insert(iter->second);
+        }
+    }
+    return enabledMonitors;
+}
+}
+
+Presenter::Presenter(std::set<MonitorType> enabledMonitors,
+        int yPos,
+        cv::Size graphSize,
+        std::size_t historySize) :
+            yPos{yPos},
+            graphSize{graphSize},
+            graphPadding{std::max(1, static_cast<int>(graphSize.width * 0.05))},
+            historySize{historySize},
+            distributionCpuEnabled{false},
+            strStream{std::ios_base::app} {
+    for (MonitorType monitor : enabledMonitors) {
+        addRemoveMonitor(monitor);
+    }
+}
+
+Presenter::Presenter(const std::string& keys, int yPos, cv::Size graphSize, std::size_t historySize) :
+    Presenter{strKeysToMonitorSet(keys), yPos, graphSize, historySize} {}
+
+void Presenter::addRemoveMonitor(MonitorType monitor) {
+    unsigned updatedHistorySize = 1;
+    if (historySize > 1) {
+        int sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
+        // +1 to plot graphSize.width/sampleStep segments
+        // add round up to and an extra element if don't reach graph edge
+        updatedHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
+    }
+    switch(monitor) {
+        case MonitorType::CpuAverage: {
+            if (cpuMonitor.getHistorySize() > 1 && distributionCpuEnabled) {
+                cpuMonitor.setHistorySize(1);
+            } else if (cpuMonitor.getHistorySize() > 1 && !distributionCpuEnabled) {
+                cpuMonitor.setHistorySize(0);
+            } else { // cpuMonitor.getHistorySize() <= 1
+                cpuMonitor.setHistorySize(updatedHistorySize);
+            }
+            break;
+        }
+        case MonitorType::DistributionCpu: {
+            if (distributionCpuEnabled) {
+                distributionCpuEnabled = false;
+                if (1 == cpuMonitor.getHistorySize()) { // cpuMonitor was used only for DistributionCpu => disable it
+                    cpuMonitor.setHistorySize(0);
+                }
+            } else {
+                distributionCpuEnabled = true;
+                cpuMonitor.setHistorySize(std::max(std::size_t{1}, cpuMonitor.getHistorySize()));
+            }
+            break;
+        }
+        case MonitorType::Memory: {
+            if (memoryMonitor.getHistorySize() > 1) {
+                memoryMonitor.setHistorySize(0);
+            } else {
+                memoryMonitor.setHistorySize(updatedHistorySize);
+            }
+            break;
+        }
+    }
+}
+
+void Presenter::handleKey(int key) {
+    key = std::toupper(key);
+    if ('H' == key) {
+        if (0 == cpuMonitor.getHistorySize() && memoryMonitor.getHistorySize() <= 1) {
+            addRemoveMonitor(MonitorType::CpuAverage);
+            addRemoveMonitor(MonitorType::DistributionCpu);
+            addRemoveMonitor(MonitorType::Memory);
+        } else {
+            cpuMonitor.setHistorySize(0);
+            distributionCpuEnabled = false;
+            memoryMonitor.setHistorySize(0);
+        }
+    } else {
+        auto iter = keyToMonitorType.find(key);
+        if (keyToMonitorType.end() != iter) {
+            addRemoveMonitor(iter->second);
+        }
+    }
+}
+
+void Presenter::drawGraphs(cv::Mat& frame) {
+    const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now();
+    if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) {
+        prevTimeStamp = curTimeStamp;
+        if (0 != cpuMonitor.getHistorySize()) {
+            cpuMonitor.collectData();
+        }
+        if (memoryMonitor.getHistorySize() > 1) {
+            memoryMonitor.collectData();
+        }
+    }
+
+    int numberOfEnabledMonitors = (cpuMonitor.getHistorySize() > 1) + distributionCpuEnabled
+        + (memoryMonitor.getHistorySize() > 1);
+    int panelWidth = graphSize.width * numberOfEnabledMonitors
+        + std::max(0, numberOfEnabledMonitors - 1) * graphPadding;
+    while (panelWidth > frame.cols) {
+        panelWidth = std::max(0, panelWidth - graphSize.width - graphPadding);
+        --numberOfEnabledMonitors; // can't draw all monitors
+    }
+    int graphPos = std::max(0, (frame.cols - 1 - panelWidth) / 2);
+    int textGraphSplittingLine = graphSize.height / 5;
+    int graphRectHeight = graphSize.height - textGraphSplittingLine;
+    int sampleStep = 1;
+    unsigned possibleHistorySize = 1;
+    if (historySize > 1) {
+        sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
+        possibleHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
+    }
+
+    if (cpuMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
+        std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
+        cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+        if (!intersection.area()) {
+            return;
+        }
+        cv::Mat graph = frame(intersection);
+        graph = graph / 2 + cv::Scalar{127, 127, 127};
+
+        int lineXPos = graph.cols - 1;
+        std::vector<cv::Point> averageLoad(lastHistory.size());
+
+        for (int i = lastHistory.size() - 1; i >= 0; --i) {
+            double mean = std::accumulate(lastHistory[i].begin(), lastHistory[i].end(), 0.0) / lastHistory[i].size();
+            averageLoad[i] = {lineXPos, graphSize.height - static_cast<int>(mean * graphRectHeight)};
+            lineXPos -= sampleStep;
+        }
+
+        cv::polylines(graph, averageLoad, false, {255, 0, 0}, 2);
+        cv::rectangle(frame, cv::Rect{
+                cv::Point{graphPos, yPos + textGraphSplittingLine},
+                cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}
+            }, {0, 0, 0});
+        strStream.str("CPU");
+        if (!lastHistory.empty()) {
+            strStream << ": " << std::fixed << std::setprecision(1)
+                << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
+                    / lastHistory.back().size() * 100 << '%';
+        }
+        int baseline;
+        int textWidth = cv::getTextSize(strStream.str(),
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            1,
+            &baseline).width;
+        cv::putText(graph,
+            strStream.str(),
+            cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            {70, 0, 0},
+            1);
+        graphPos += graphSize.width + graphPadding;
+    }
+
+    if (distributionCpuEnabled && --numberOfEnabledMonitors >= 0) {
+        std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
+        cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+        if (!intersection.area()) {
+            return;
+        }
+        cv::Mat graph = frame(intersection);
+        graph = graph / 2 + cv::Scalar{127, 127, 127};
+
+        if (!lastHistory.empty()) {
+            int rectXPos = 0;
+            int step = (graph.cols + lastHistory.back().size() - 1) / lastHistory.back().size(); // round up
+            double sum = 0;
+            for (double coreLoad : lastHistory.back()) {
+                sum += coreLoad;
+                int height = static_cast<int>(graphRectHeight * coreLoad);
+                cv::Rect pillar{cv::Point{rectXPos, graph.rows - height}, cv::Size{step, height}};
+                cv::rectangle(graph, pillar, {255, 0, 0}, cv::FILLED);
+                cv::rectangle(graph, pillar, {0, 0, 0});
+                rectXPos += step;
+            }
+            sum /= lastHistory.back().size();
+            int yLine = graph.rows - static_cast<int>(graphRectHeight * sum);
+            cv::line(graph, cv::Point{0, yLine}, cv::Point{graph.cols, yLine}, {0, 255, 0}, 2);
+        }
+        cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
+            cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
+        cv::rectangle(frame, border, {0, 0, 0});
+        strStream.str("Core load");
+        if (!lastHistory.empty()) {
+            strStream << ": " << std::fixed << std::setprecision(1)
+                << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
+                    / lastHistory.back().size() * 100 << '%';
+        }
+        int baseline;
+        int textWidth = cv::getTextSize(strStream.str(),
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            1,
+            &baseline).width;
+        cv::putText(graph,
+            strStream.str(),
+            cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            {0, 70, 0});
+        graphPos += graphSize.width + graphPadding;
+    }
+
+    if (memoryMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
+        std::deque<std::pair<double, double>> lastHistory = memoryMonitor.getLastHistory();
+        cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+        if (!intersection.area()) {
+            return;
+        }
+        cv::Mat graph = frame(intersection);
+        graph = graph / 2 + cv::Scalar{127, 127, 127};
+        int histxPos = graph.cols - 1;
+        double range = std::min(memoryMonitor.getMaxMemTotal() + memoryMonitor.getMaxSwap(),
+            (memoryMonitor.getMaxMem() + memoryMonitor.getMaxSwap()) * 1.2);
+        if (lastHistory.size() > 1) {
+            for (auto memUsageIt = lastHistory.rbegin(); memUsageIt != lastHistory.rend() - 1; ++memUsageIt) {
+                constexpr double SWAP_THRESHOLD = 10.0 / 1024; // 10 MiB
+                cv::Vec3b color =
+                    (memoryMonitor.getMemTotal() * 0.95 > memUsageIt->first) || (memUsageIt->second < SWAP_THRESHOLD) ?
+                        cv::Vec3b{0, 255, 255} :
+                        cv::Vec3b{0, 0, 255};
+                cv::Point right{histxPos,
+                    graph.rows - static_cast<int>(graphRectHeight * (memUsageIt->first + memUsageIt->second) / range)};
+                cv::Point left{histxPos - sampleStep,
+                    graph.rows - static_cast<int>(
+                        graphRectHeight * ((memUsageIt + 1)->first + (memUsageIt + 1)->second) / range)};
+                cv::line(graph, right, left, color, 2);
+                histxPos -= sampleStep;
+            }
+        }
+
+        cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
+            cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
+        cv::rectangle(frame, {border}, {0, 0, 0});
+        if (lastHistory.empty()) {
+            strStream.str("Memory");
+        } else {
+            strStream.str("");
+            strStream << std::fixed << std::setprecision(1) << lastHistory.back().first << " + "
+                << lastHistory.back().second << " GiB";
+        }
+        int baseline;
+        int textWidth = cv::getTextSize(strStream.str(),
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            1,
+            &baseline).width;
+        cv::putText(graph,
+            strStream.str(),
+            cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+            cv::FONT_HERSHEY_SIMPLEX,
+            textGraphSplittingLine * 0.04,
+            {0, 35, 35});
+    }
+}
+
+std::vector<std::string> Presenter::reportMeans() const {
+    std::vector<std::string> collectedData;
+    if (cpuMonitor.getHistorySize() > 1 || distributionCpuEnabled || memoryMonitor.getHistorySize() > 1) {
+        collectedData.push_back("Resources usage:");
+    }
+    if (cpuMonitor.getHistorySize() > 1) {
+        std::ostringstream collectedDataStream;
+        collectedDataStream << std::fixed << std::setprecision(1);
+        collectedDataStream << "\tMean core utilization: ";
+        for (double mean : cpuMonitor.getMeanCpuLoad()) {
+            collectedDataStream << mean * 100 << "% ";
+        }
+        collectedData.push_back(collectedDataStream.str());
+    }
+    if (distributionCpuEnabled) {
+        std::ostringstream collectedDataStream;
+        collectedDataStream << std::fixed << std::setprecision(1);
+        std::vector<double> meanCpuLoad = cpuMonitor.getMeanCpuLoad();
+        double mean = std::accumulate(meanCpuLoad.begin(), meanCpuLoad.end(), 0.0) / meanCpuLoad.size();
+        collectedDataStream << "\tMean CPU utilization: " << mean * 100 << "%";
+        collectedData.push_back(collectedDataStream.str());
+    }
+    if (memoryMonitor.getHistorySize() > 1) {
+        std::ostringstream collectedDataStream;
+        collectedDataStream << std::fixed << std::setprecision(1);
+        collectedDataStream << "\tMemory mean usage: " << memoryMonitor.getMeanMem() << " GiB";
+        collectedData.push_back(collectedDataStream.str());
+        collectedDataStream.str("");
+        collectedDataStream << "\tMean swap usage: " << memoryMonitor.getMeanSwap() << " GiB";
+        collectedData.push_back(collectedDataStream.str());
+    }
+
+    return collectedData;
+}
--- a/engines/OpenVINOEngine/src/monitors/query_wrapper.cpp
+++ b/engines/OpenVINOEngine/src/monitors/query_wrapper.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "query_wrapper.h"
+
+#include <Windows.h>
+#include <system_error>
+
+QueryWrapper::QueryWrapper() {
+    PDH_STATUS status = PdhOpenQuery(NULL, NULL, &query);
+    if (ERROR_SUCCESS != status) {
+        throw std::system_error(status, std::system_category(), "PdhOpenQuery() failed");
+    }
+}
+QueryWrapper::~QueryWrapper() {
+    PdhCloseQuery(query);
+}
+
+QueryWrapper::operator PDH_HQUERY() const {
+    return query;
+}
--- a/engines/OpenVINOEngine/src/monitors/query_wrapper.h
+++ b/engines/OpenVINOEngine/src/monitors/query_wrapper.h
@@ -0,0 +1,17 @@
+// Copyright (C) 2019-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <Pdh.h>
+class QueryWrapper {
+public:
+    QueryWrapper();
+    ~QueryWrapper();
+    QueryWrapper(const QueryWrapper&) = delete;
+    QueryWrapper& operator=(const QueryWrapper&) = delete;
+    operator PDH_HQUERY() const;
+private:
+    PDH_HQUERY query;
+};
--- a/engines/OpenVINOEngine/src/pipelines/async_pipeline.cpp
+++ b/engines/OpenVINOEngine/src/pipelines/async_pipeline.cpp
@@ -0,0 +1,163 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pipelines/async_pipeline.h"
+
+#include <chrono>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+#include <openvino/openvino.hpp>
+
+#include <models/model_base.h>
+#include <models/results.h>
+#include <utils/config_factory.h>
+#include <utils/performance_metrics.hpp>
+#include <utils/slog.hpp>
+
+struct InputData;
+struct MetaData;
+
+AsyncPipeline::AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core)
+    : model(std::move(modelInstance)) {
+    compiledModel = model->compileModel(config, core);
+    // --------------------------- Create infer requests ------------------------------------------------
+    unsigned int nireq = config.maxAsyncRequests;
+    if (nireq == 0) {
+        try {
+            // +1 to use it as a buffer of the pipeline
+            nireq = compiledModel.get_property(ov::optimal_number_of_infer_requests) + 1;
+        } catch (const ov::Exception& ex) {
+			std::cerr << "Can't query optimal number of infer requests: " << ex.what() << std::endl;
+        }
+    }
+    slog::info << "\tNumber of inference requests: " << nireq << slog::endl;
+    requestsPool.reset(new RequestsPool(compiledModel, nireq));
+    // --------------------------- Call onLoadCompleted to complete initialization of model -------------
+    model->onLoadCompleted(requestsPool->getInferRequestsList());
+}
+
+AsyncPipeline::~AsyncPipeline() {
+    waitForTotalCompletion();
+}
+
+void AsyncPipeline::waitForData(bool shouldKeepOrder) {
+    std::unique_lock<std::mutex> lock(mtx);
+
+    condVar.wait(lock, [&]() {
+        return callbackException != nullptr || requestsPool->isIdleRequestAvailable() ||
+               (shouldKeepOrder ? completedInferenceResults.find(outputFrameId) != completedInferenceResults.end()
+                                : !completedInferenceResults.empty());
+    });
+
+    if (callbackException) {
+		return;
+    }
+}
+
+int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) {
+    auto frameID = inputFrameId;
+
+    auto request = requestsPool->getIdleRequest();
+    if (!request) {
+        return -1;
+    }
+
+    auto startTime = std::chrono::steady_clock::now();
+    auto internalModelData = model->preprocess(inputData, request);
+    preprocessMetrics.update(startTime);
+
+    request.set_callback(
+        [this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable {
+            {
+                const std::lock_guard<std::mutex> lock(mtx);
+                inferenceMetrics.update(startTime);
+                try {
+                    if (ex) {
+                        std::rethrow_exception(ex);
+                    }
+                    InferenceResult result;
+
+                    result.frameId = frameID;
+                    result.metaData = std::move(metaData);
+                    result.internalModelData = std::move(internalModelData);
+
+                    for (const auto& outName : model->getOutputsNames()) {
+                        auto tensor = request.get_tensor(outName);
+                        result.outputsData.emplace(outName, tensor);
+                    }
+
+                    completedInferenceResults.emplace(frameID, result);
+                    requestsPool->setRequestIdle(request);
+                } catch (...) {
+                    if (!callbackException) {
+                        callbackException = std::current_exception();
+                    }
+                }
+            }
+            condVar.notify_one();
+        });
+
+    inputFrameId++;
+    if (inputFrameId < 0)
+        inputFrameId = 0;
+
+    request.start_async();
+
+    return frameID;
+}
+
+std::unique_ptr<ResultBase> AsyncPipeline::getResult(bool shouldKeepOrder) {
+    auto infResult = AsyncPipeline::getInferenceResult(shouldKeepOrder);
+    if (infResult.IsEmpty()) {
+        return std::unique_ptr<ResultBase>();
+    }
+    auto startTime = std::chrono::steady_clock::now();
+    auto result = model->postprocess(infResult);
+    postprocessMetrics.update(startTime);
+
+    *result = static_cast<ResultBase&>(infResult);
+    return result;
+}
+
+InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) {
+    InferenceResult retVal;
+    {
+        const std::lock_guard<std::mutex> lock(mtx);
+
+        const auto& it =
+            shouldKeepOrder ? completedInferenceResults.find(outputFrameId) : completedInferenceResults.begin();
+
+        if (it != completedInferenceResults.end()) {
+            retVal = std::move(it->second);
+            completedInferenceResults.erase(it);
+        }
+    }
+
+    if (!retVal.IsEmpty()) {
+        outputFrameId = retVal.frameId;
+        outputFrameId++;
+        if (outputFrameId < 0) {
+            outputFrameId = 0;
+        }
+    }
+
+    return retVal;
+}
--- a/engines/OpenVINOEngine/src/pipelines/requests_pool.cpp
+++ b/engines/OpenVINOEngine/src/pipelines/requests_pool.cpp
@@ -0,0 +1,94 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pipelines/requests_pool.h"
+
+#include <algorithm>
+#include <exception>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : numRequestsInUse(0) {
+    for (unsigned int infReqId = 0; infReqId < size; ++infReqId) {
+        requests.emplace_back(compiledModel.create_infer_request(), false);
+    }
+}
+
+RequestsPool::~RequestsPool() {
+    // Setting empty callback to free resources allocated for previously assigned lambdas
+    for (auto& pair : requests) {
+        pair.first.set_callback([](std::exception_ptr) {});
+    }
+}
+
+ov::InferRequest RequestsPool::getIdleRequest() {
+    std::lock_guard<std::mutex> lock(mtx);
+
+    const auto& it = std::find_if(requests.begin(), requests.end(), [](const std::pair<ov::InferRequest, bool>& x) {
+        return !x.second;
+    });
+    if (it == requests.end()) {
+        return ov::InferRequest();
+    } else {
+        it->second = true;
+        numRequestsInUse++;
+        return it->first;
+    }
+}
+
+void RequestsPool::setRequestIdle(const ov::InferRequest& request) {
+    std::lock_guard<std::mutex> lock(mtx);
+    const auto& it = std::find_if(this->requests.begin(),
+                                  this->requests.end(),
+                                  [&request](const std::pair<ov::InferRequest, bool>& x) {
+                                      return x.first == request;
+                                  });
+    it->second = false;
+    numRequestsInUse--;
+}
+
+size_t RequestsPool::getInUseRequestsCount() {
+    std::lock_guard<std::mutex> lock(mtx);
+    return numRequestsInUse;
+}
+
+bool RequestsPool::isIdleRequestAvailable() {
+    std::lock_guard<std::mutex> lock(mtx);
+    return numRequestsInUse < requests.size();
+}
+
+void RequestsPool::waitForTotalCompletion() {
+    // Do not synchronize here to avoid deadlock (despite synchronization in other functions)
+    // Request status will be changed to idle in callback,
+    // upon completion of request we're waiting for. Synchronization is applied there
+    for (auto pair : requests) {
+        if (pair.second) {
+            pair.first.wait();
+        }
+    }
+}
+
+std::vector<ov::InferRequest> RequestsPool::getInferRequestsList() {
+    std::lock_guard<std::mutex> lock(mtx);
+    std::vector<ov::InferRequest> retVal;
+    retVal.reserve(requests.size());
+    for (auto& pair : requests) {
+        retVal.push_back(pair.first);
+    }
+
+    return retVal;
+}
--- a/engines/OpenVINOEngine/src/tracker/tracker.cpp
+++ b/engines/OpenVINOEngine/src/tracker/tracker.cpp
@@ -0,0 +1,398 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <vector>
+#include <tuple>
+#include <set>
+#include <utility>
+#include <unordered_map>
+
+#include "logger.hpp"
+#include "tracker/tracker.hpp"
+
+const int TrackedObject::UNKNOWN_LABEL_IDX = -1;
+
+cv::Point Center(const cv::Rect& rect) {
+    return cv::Point(static_cast<int>(rect.x + rect.width * 0.5),
+                     static_cast<int>(rect.y + rect.height * 0.5));
+}
+
+TrackerParams::TrackerParams() :
+    min_track_duration(25),
+    forget_delay(150),
+    affinity_thr(0.85f),
+    shape_affinity_w(0.5f),
+    motion_affinity_w(0.2f),
+    min_det_conf(0.0f),
+    bbox_aspect_ratios_range(0.666f, 5.0f),
+    bbox_heights_range(1, 1280),
+    drop_forgotten_tracks(true),
+    max_num_objects_in_track(300),
+    averaging_window_size_for_rects(1),
+    averaging_window_size_for_labels(1) {}
+
+bool IsInRange(float x, const cv::Vec2f& v) { return v[0] <= x && x <= v[1]; }
+bool IsInRange(float x, float a, float b) { return a <= x && x <= b; }
+
+void Tracker::FilterDetectionsAndStore(const TrackedObjects& detections) {
+    m_detections.clear();
+    for (const auto& det : detections) {
+        float aspect_ratio = static_cast<float>(det.rect.height) / det.rect.width;
+        if (det.confidence > m_params.min_det_conf &&
+                IsInRange(aspect_ratio, m_params.bbox_aspect_ratios_range) &&
+                IsInRange(static_cast<float>(det.rect.height), m_params.bbox_heights_range)) {
+            m_detections.emplace_back(det);
+        }
+    }
+}
+
+void Tracker::SolveAssignmentProblem(
+    const std::set<size_t>& track_ids, const TrackedObjects& detections,
+    std::set<size_t>* unmatched_tracks, std::set<size_t>* unmatched_detections,
+    std::set<std::tuple<size_t, size_t, float>>* matches) {
+    CV_Assert(unmatched_tracks);
+    CV_Assert(unmatched_detections);
+    unmatched_tracks->clear();
+    unmatched_detections->clear();
+
+    CV_Assert(!track_ids.empty());
+    CV_Assert(!detections.empty());
+    CV_Assert(matches);
+    matches->clear();
+
+    cv::Mat dissimilarity;
+    ComputeDissimilarityMatrix(track_ids, detections, &dissimilarity);
+
+    auto res = KuhnMunkres().Solve(dissimilarity);
+
+    for (size_t i = 0; i < detections.size(); i++) {
+        unmatched_detections->insert(i);
+    }
+
+    size_t i = 0;
+    for (size_t id : track_ids) {
+        if (res[i] < detections.size()) {
+            matches->emplace(id, res[i], 1 - dissimilarity.at<float>(i, res[i]));
+        } else {
+            unmatched_tracks->insert(id);
+        }
+        i++;
+    }
+}
+
+bool Tracker::EraseTrackIfBBoxIsOutOfFrame(size_t track_id) {
+    if (m_tracks.find(track_id) == m_tracks.end())
+        return true;
+    auto c = Center(m_tracks.at(track_id).back().rect);
+    if (m_frame_size != cv::Size() &&
+            (c.x < 0 || c.y < 0 || c.x > m_frame_size.width ||
+             c.y > m_frame_size.height)) {
+        m_tracks.at(track_id).lost = m_params.forget_delay + 1;
+        m_active_track_ids.erase(track_id);
+        return true;
+    }
+    return false;
+}
+
+bool Tracker::EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id) {
+    if (m_tracks.find(track_id) == m_tracks.end())
+        return true;
+    if (m_tracks.at(track_id).lost > m_params.forget_delay) {
+        m_active_track_ids.erase(track_id);
+        return true;
+    }
+    return false;
+}
+
+bool Tracker::UptateLostTrackAndEraseIfItsNeeded(size_t track_id) {
+    m_tracks.at(track_id).lost++;
+    bool erased = EraseTrackIfBBoxIsOutOfFrame(track_id);
+    if (!erased) erased = EraseTrackIfItWasLostTooManyFramesAgo(track_id);
+    return erased;
+}
+
+void Tracker::UpdateLostTracks(const std::set<size_t>& track_ids) {
+    for (auto track_id : track_ids) {
+        UptateLostTrackAndEraseIfItsNeeded(track_id);
+    }
+}
+
+void Tracker::Process(const cv::Mat& frame, const TrackedObjects& detections, int frame_idx) {
+    if (m_frame_size == cv::Size()) {
+        m_frame_size = frame.size();
+    } else {
+        CV_Assert(m_frame_size == frame.size());
+    }
+
+    FilterDetectionsAndStore(detections);
+    for (auto& obj : m_detections) {
+        obj.frame_idx = frame_idx;
+    }
+
+    auto active_tracks = m_active_track_ids;
+
+    if (!active_tracks.empty() && !m_detections.empty()) {
+        std::set<size_t> unmatched_tracks, unmatched_detections;
+        std::set<std::tuple<size_t, size_t, float>> matches;
+
+        SolveAssignmentProblem(active_tracks, m_detections, &unmatched_tracks,
+                               &unmatched_detections, &matches);
+
+        for (const auto& match : matches) {
+            size_t track_id = std::get<0>(match);
+            size_t det_id = std::get<1>(match);
+            float conf = std::get<2>(match);
+            if (conf > m_params.affinity_thr) {
+                AppendToTrack(track_id, m_detections[det_id]);
+                unmatched_detections.erase(det_id);
+            } else {
+                unmatched_tracks.insert(track_id);
+            }
+        }
+
+        AddNewTracks(m_detections, unmatched_detections);
+        UpdateLostTracks(unmatched_tracks);
+
+        for (size_t id : active_tracks) {
+            EraseTrackIfBBoxIsOutOfFrame(id);
+        }
+    } else {
+        AddNewTracks(m_detections);
+        UpdateLostTracks(active_tracks);
+    }
+
+    if (m_params.drop_forgotten_tracks)
+        DropForgottenTracks();
+}
+
+void Tracker::DropForgottenTracks() {
+    std::unordered_map<size_t, Track> new_tracks;
+    std::set<size_t> new_active_tracks;
+
+    size_t max_id = 0;
+    if (!m_active_track_ids.empty())
+        max_id = *std::max_element(m_active_track_ids.begin(), m_active_track_ids.end());
+
+    const size_t kMaxTrackID = 10000;
+    bool reassign_id = max_id > kMaxTrackID;
+
+    size_t counter = 0;
+    for (const auto& pair : m_tracks) {
+        if (!IsTrackForgotten(pair.first)) {
+            new_tracks.emplace(reassign_id ? counter : pair.first, pair.second);
+            new_active_tracks.emplace(reassign_id ? counter : pair.first);
+            counter++;
+        }
+    }
+    m_tracks.swap(new_tracks);
+    m_active_track_ids.swap(new_active_tracks);
+
+    m_tracks_counter = reassign_id ? counter : m_tracks_counter;
+}
+
+float Tracker::ShapeAffinity(const cv::Rect& trk, const cv::Rect& det) {
+    float w_dist = static_cast<float>(std::fabs(trk.width - det.width)) / static_cast<float>(trk.width + det.width);
+    float h_dist = static_cast<float>(std::fabs(trk.height - det.height)) / static_cast<float>(trk.height + det.height);
+    return exp(-m_params.shape_affinity_w * (w_dist + h_dist));
+}
+
+float Tracker::MotionAffinity(const cv::Rect& trk, const cv::Rect& det) {
+    float x_dist = static_cast<float>(trk.x - det.x) * (trk.x - det.x) / (det.width * det.width);
+    float y_dist = static_cast<float>(trk.y - det.y) * (trk.y - det.y) / (det.height * det.height);
+    return exp(-m_params.motion_affinity_w * (x_dist + y_dist));
+}
+
+void Tracker::ComputeDissimilarityMatrix(
+    const std::set<size_t>& active_tracks, const TrackedObjects& detections, cv::Mat* dissimilarity_matrix) {
+    dissimilarity_matrix->create(active_tracks.size(), detections.size(), CV_32F);
+    size_t i = 0;
+    for (auto id : active_tracks) {
+        auto ptr = dissimilarity_matrix->ptr<float>(i);
+        for (size_t j = 0; j < detections.size(); j++) {
+            auto last_det = m_tracks.at(id).objects.back();
+            ptr[j] = Distance(last_det, detections[j]);
+        }
+        i++;
+    }
+}
+
+void Tracker::AddNewTracks(const TrackedObjects& detections) {
+    for (size_t i = 0; i < detections.size(); i++) {
+        AddNewTrack(detections[i]);
+    }
+}
+
+void Tracker::AddNewTracks(const TrackedObjects& detections, const std::set<size_t>& ids) {
+    for (size_t i : ids) {
+        CV_Assert(i < detections.size());
+        AddNewTrack(detections[i]);
+    }
+}
+
+void Tracker::AddNewTrack(const TrackedObject& detection) {
+    auto detection_with_id = detection;
+    detection_with_id.object_id = m_tracks_counter;
+    m_tracks.emplace(std::pair<size_t, Track>(m_tracks_counter, Track({detection_with_id})));
+
+    m_active_track_ids.insert(m_tracks_counter);
+    m_tracks_counter++;
+}
+
+void Tracker::AppendToTrack(size_t track_id, const TrackedObject& detection) {
+    CV_Assert(!IsTrackForgotten(track_id));
+
+    auto detection_with_id = detection;
+    detection_with_id.object_id = track_id;
+
+    auto& track = m_tracks.at(track_id);
+
+    track.objects.emplace_back(detection_with_id);
+    track.lost = 0;
+    track.length++;
+
+    if (m_params.max_num_objects_in_track > 0) {
+        while (track.size() > static_cast<size_t>(m_params.max_num_objects_in_track)) {
+            track.objects.erase(track.objects.begin());
+        }
+    }
+}
+
+float Tracker::Distance(const TrackedObject& obj1, const TrackedObject& obj2) {
+    const float eps = 1e-6f;
+    float shp_aff = ShapeAffinity(obj1.rect, obj2.rect);
+    if (shp_aff < eps) return 1.0;
+
+    float mot_aff = MotionAffinity(obj1.rect, obj2.rect);
+    if (mot_aff < eps) return 1.0;
+
+    return 1.0f - shp_aff * mot_aff;
+}
+
+bool Tracker::IsTrackValid(size_t id) const {
+    const auto& track = m_tracks.at(id);
+    const auto& objects = track.objects;
+    if (objects.empty()) {
+        return false;
+    }
+    size_t duration_frames = objects.back().frame_idx - track.first_object.frame_idx;
+    if (duration_frames < m_params.min_track_duration)
+        return false;
+    return true;
+}
+
+bool Tracker::IsTrackForgotten(size_t id) const {
+    return m_tracks.at(id).lost > m_params.forget_delay;
+}
+
+void Tracker::Reset() {
+    m_active_track_ids.clear();
+    m_tracks.clear();
+
+    m_detections.clear();
+
+    m_tracks_counter = 0;
+
+    m_frame_size = cv::Size();
+}
+
+TrackedObjects Tracker::TrackedDetections() const {
+    TrackedObjects detections;
+    for (size_t idx : active_track_ids()) {
+        auto track = tracks().at(idx);
+        if (IsTrackValid(idx) && !track.lost) {
+            detections.emplace_back(track.objects.back());
+        }
+    }
+    return detections;
+}
+
+TrackedObjects Tracker::TrackedDetectionsWithLabels() const {
+    TrackedObjects detections;
+    for (size_t idx : active_track_ids()) {
+        const auto& track = tracks().at(idx);
+        if (IsTrackValid(idx) && !track.lost) {
+            TrackedObject object = track.objects.back();
+            int counter = 1;
+            size_t start = static_cast<int>(track.objects.size()) >= m_params.averaging_window_size_for_rects ?
+                        track.objects.size() - m_params.averaging_window_size_for_rects : 0;
+
+            for (size_t i = start; i < track.objects.size() - 1; i++) {
+                object.rect.width += track.objects[i].rect.width;
+                object.rect.height += track.objects[i].rect.height;
+                object.rect.x += track.objects[i].rect.x;
+                object.rect.y += track.objects[i].rect.y;
+                counter++;
+            }
+            object.rect.width /= counter;
+            object.rect.height /= counter;
+            object.rect.x /= counter;
+            object.rect.y /= counter;
+
+            object.label = LabelWithMaxFrequencyInTrack(track, m_params.averaging_window_size_for_labels);
+            object.object_id = idx;
+
+            detections.push_back(object);
+        }
+    }
+    return detections;
+}
+
+int LabelWithMaxFrequencyInTrack(const Track& track, int window_size) {
+    std::unordered_map<int, int> frequencies;
+    int max_frequent_count = 0;
+    int max_frequent_id = TrackedObject::UNKNOWN_LABEL_IDX;
+
+    int start = static_cast<int>(track.objects.size()) >= window_size ?
+        static_cast<int>(track.objects.size()) - window_size : 0;
+
+    for (size_t i = start; i < track.objects.size(); i++) {
+        const auto& detection = track.objects[i];
+        if (detection.label == TrackedObject::UNKNOWN_LABEL_IDX)
+            continue;
+        int count = ++frequencies[detection.label];
+        if (count > max_frequent_count) {
+            max_frequent_count = count;
+            max_frequent_id = detection.label;
+        }
+    }
+    return max_frequent_id;
+}
+
+std::vector<Track> UpdateTrackLabelsToBestAndFilterOutUnknowns(const std::vector<Track>& tracks) {
+    std::vector<Track> new_tracks;
+    for (auto& track : tracks) {
+        int best_label = LabelWithMaxFrequencyInTrack(track, std::numeric_limits<int>::max());
+        if (best_label == TrackedObject::UNKNOWN_LABEL_IDX)
+            continue;
+
+        Track new_track = track;
+
+        for (auto& obj : new_track.objects) {
+            obj.label = best_label;
+        }
+        new_track.first_object.label = best_label;
+
+        new_tracks.emplace_back(std::move(new_track));
+    }
+    return new_tracks;
+}
+
+const std::unordered_map<size_t, Track>& Tracker::tracks() const {
+    return m_tracks;
+}
+
+std::vector<Track> Tracker::vector_tracks() const {
+    std::set<size_t> keys;
+    for (auto& cur_pair : tracks()) {
+        keys.insert(cur_pair.first);
+    }
+    std::vector<Track> vec_tracks;
+    for (size_t k : keys) {
+        vec_tracks.push_back(tracks().at(k));
+    }
+    return vec_tracks;
+}
--- a/engines/OpenVINOEngine/src/utils/args_helper.cpp
+++ b/engines/OpenVINOEngine/src/utils/args_helper.cpp
@@ -0,0 +1,187 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils/args_helper.hpp"
+#include "utils/slog.hpp"
+
+#ifdef _WIN32
+#include "w_dirent.hpp"
+#else
+#include <dirent.h>
+#endif
+
+//#include <gflags/gflags.h>
+
+#include <sys/stat.h>
+#include <map>
+#include <algorithm>
+#include <iterator>
+#include <cctype>
+#include <sstream>
+
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) {
+    struct stat sb;
+    if (stat(arg.c_str(), &sb) != 0) {
+        if (arg.compare(0, 5, "rtsp:") != 0) {
+            slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+            return;
+        }
+    }
+    if (S_ISDIR(sb.st_mode)) {
+        DIR *dp;
+        dp = opendir(arg.c_str());
+        if (dp == nullptr) {
+            slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl;
+            return;
+        }
+
+        struct dirent *ep;
+        while (nullptr != (ep = readdir(dp))) {
+            std::string fileName = ep->d_name;
+            if (fileName == "." || fileName == "..") continue;
+            files.push_back(arg + "/" + ep->d_name);
+        }
+        closedir(dp);
+    } else {
+        files.push_back(arg);
+    }
+}
+
+//void parseInputFilesArguments(std::vector<std::string>& files) {
+//    std::vector<std::string> args = gflags::GetArgvs();
+//    bool readArguments = false;
+//    for (size_t i = 0; i < args.size(); i++) {
+//        if (args.at(i) == "-i" || args.at(i) == "--i") {
+//            readArguments = true;
+//            continue;
+//        }
+//        if (!readArguments) {
+//            continue;
+//        }
+//        if (args.at(i).c_str()[0] == '-') {
+//            break;
+//        }
+//        readInputFilesArguments(files, args.at(i));
+//    }
+//}
+
+std::vector<std::string> split(const std::string& s, char delim) {
+    std::vector<std::string> result;
+    std::stringstream ss(s);
+    std::string item;
+
+    while (getline(ss, item, delim)) {
+        result.push_back(item);
+    }
+    return result;
+}
+
+void split(const std::string& s, char delim, std::vector<float> &out) {
+    std::stringstream ss(s);
+    std::string item;
+
+    while (getline(ss, item, delim)) {
+        try {
+            out.push_back(std::stof(item));
+        } catch (...) {
+            std::cerr<<"cannot split the string: \"" + s + "\" onto floats";
+        }
+    }
+}
+
+template <class It>
+static std::string merge_impl(It begin, It end, const char* delim) {
+    std::stringstream ss;
+    std::copy(begin, end, std::ostream_iterator<std::string>(ss, delim));
+    std::string result = ss.str();
+    if (!result.empty()) {
+        result.resize(result.size() - strlen(delim));
+    }
+    return result;
+}
+std::string merge(std::initializer_list<std::string> list, const char* delim) {
+    return merge_impl(list.begin(), list.end(), delim);
+}
+
+std::string merge(const std::vector<std::string> &list, const char *delim) {
+    return merge_impl(list.begin(), list.end(), delim);
+}
+
+std::vector<std::string> parseDevices(const std::string& device_string) {
+    const std::string::size_type colon_position = device_string.find(":");
+    if (colon_position != std::string::npos) {
+        std::string device_type = device_string.substr(0, colon_position);
+        if (device_type == "HETERO" || device_type == "MULTI") {
+            std::string comma_separated_devices = device_string.substr(colon_position + 1);
+            std::vector<std::string> devices = split(comma_separated_devices, ',');
+            for (auto& device : devices)
+                device = device.substr(0, device.find("("));
+            return devices;
+        }
+    }
+    return {device_string};
+}
+
+// Format: <device1>:<value1>,<device2>:<value2> or just <value>
+std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices,
+                                                   const std::string& values_string) {
+    auto values_string_upper = values_string;
+    std::transform(values_string_upper.begin(),
+                   values_string_upper.end(),
+                   values_string_upper.begin(),
+                   [](unsigned char c){ return std::toupper(c); });
+    std::map<std::string, int32_t> result;
+    auto device_value_strings = split(values_string_upper, ',');
+    for (auto& device_value_string : device_value_strings) {
+        auto device_value_vec =  split(device_value_string, ':');
+        if (device_value_vec.size() == 2) {
+            auto it = std::find(devices.begin(), devices.end(), device_value_vec.at(0));
+            if (it != devices.end()) {
+                result[device_value_vec.at(0)] = std::stoi(device_value_vec.at(1));
+            }
+        } else if (device_value_vec.size() == 1) {
+            uint32_t value = std::stoi(device_value_vec.at(0));
+            for (const auto& device : devices) {
+                result[device] = value;
+            }
+        } else if (device_value_vec.size() != 0) {
+            std::cerr<<"Unknown string format: " << values_string;
+			return {};
+        }
+    }
+    return result;
+}
+
+cv::Size stringToSize(const std::string& str) {
+    std::vector<std::string> strings = split(str, 'x');
+    if (strings.size() != 2) {
+		return cv::Size(0, 0);
+    }
+    return {std::stoi(strings[0]), std::stoi(strings[1])};
+}
+
+std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string) {
+    // Parse parameter string like "input0:NCHW,input1:NC" or "NCHW" (applied to all
+    // inputs)
+    std::map<std::string, ov::Layout> layouts;
+    std::string searchStr = (layout_string.find_last_of(':') == std::string::npos && !layout_string.empty() ?
+        ":" : "") + layout_string;
+    auto colonPos = searchStr.find_last_of(':');
+    while (colonPos != std::string::npos) {
+        auto startPos = searchStr.find_last_of(',');
+        auto inputName = searchStr.substr(startPos + 1, colonPos - startPos - 1);
+        auto inputLayout = searchStr.substr(colonPos + 1);
+        layouts[inputName] = ov::Layout(inputLayout);
+        searchStr = searchStr.substr(0, startPos + 1);
+        if (searchStr.empty() || searchStr.back() != ',') {
+            break;
+        }
+        searchStr.pop_back();
+        colonPos = searchStr.find_last_of(':');
+    }
+    if (!searchStr.empty()) {
+		return  layouts;
+    }
+    return layouts;
+}
--- a/engines/OpenVINOEngine/src/utils/common.cpp
+++ b/engines/OpenVINOEngine/src/utils/common.cpp
@@ -0,0 +1,65 @@
+/*
+// Copyright (C) 2023-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <fstream>
+#include <map>
+#include "utils/common.hpp"
+
+std::vector<unsigned> loadClassIndices(const std::string &groundtruth_filepath,
+                                       const std::vector<std::string> &imageNames)
+{
+    std::vector<unsigned> classIndices;
+    if (groundtruth_filepath.empty()) {
+        classIndices.resize(imageNames.size(), 0);
+    } else {
+        std::map<std::string, unsigned> classIndicesMap;
+        std::ifstream inputGtFile(groundtruth_filepath);
+        if (!inputGtFile.is_open()) {
+            throw std::runtime_error("Can't open the ground truth file: " + groundtruth_filepath);
+        }
+
+        std::string line;
+        while (std::getline(inputGtFile, line)) {
+            size_t separatorIdx = line.find(' ');
+            if (separatorIdx == std::string::npos) {
+                throw std::runtime_error("The ground truth file has incorrect format.");
+            }
+            std::string imagePath = line.substr(0, separatorIdx);
+            size_t imagePathEndIdx = imagePath.rfind('/');
+            unsigned classIndex = static_cast<unsigned>(std::stoul(line.substr(separatorIdx + 1)));
+            if ((imagePathEndIdx != 1 || imagePath[0] != '.') && imagePathEndIdx != std::string::npos) {
+                throw std::runtime_error("The ground truth file has incorrect format.");
+            }
+            // std::map type for classIndicesMap guarantees to sort out images by name.
+            // The same logic is applied in openImagesCapture() for DirReader source type,
+            // which produces data for sorted pictures.
+            // To be coherent in detection of ground truth for pictures we have to
+            // use the same sorting approach for a source and ground truth data
+            // If you're going to copy paste this code, remember that pictures need to be sorted
+            classIndicesMap.insert({imagePath.substr(imagePathEndIdx + 1), classIndex});
+        }
+
+        for (size_t i = 0; i < imageNames.size(); i++) {
+            auto imageSearchResult = classIndicesMap.find(imageNames[i]);
+            if (imageSearchResult != classIndicesMap.end()) {
+                classIndices.push_back(imageSearchResult->second);
+            } else {
+                throw std::runtime_error("No class specified for image " + imageNames[i]);
+            }
+        }
+    }
+    return classIndices;
+}
--- a/engines/OpenVINOEngine/src/utils/config_factory.cpp
+++ b/engines/OpenVINOEngine/src/utils/config_factory.cpp
@@ -0,0 +1,100 @@
+/*
+// Copyright (C) 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "utils/config_factory.h"
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/runtime/intel_gpu/properties.hpp>
+
+#include "utils/args_helper.hpp"
+
+std::set<std::string> ModelConfig::getDevices() {
+    if (devices.empty()) {
+        for (const std::string& device : parseDevices(deviceName)) {
+            devices.insert(device);
+        }
+    }
+
+    return devices;
+}
+
+ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d,
+                                         uint32_t flags_nireq,
+                                         const std::string& flags_nstreams,
+                                         uint32_t flags_nthreads) {
+    auto config = getCommonConfig(flags_d, flags_nireq);
+    std::map<std::string, int> deviceNstreams = parseValuePerDevice(config.getDevices(), flags_nstreams);
+    for (const auto& device : config.getDevices()) {
+        if (device == "CPU") {  // CPU supports a few special performance-oriented keys
+            // limit threading for CPU portion of inference
+            if (flags_nthreads != 0)
+                config.compiledModelConfig.emplace(ov::inference_num_threads.name(), flags_nthreads);
+            //config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE);
+            //config.compiledModelConfig.emplace(ov::hint::enable_cpu_pinning); // Do not know how to use it
+            ov::streams::Num nstreams =
+                deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
+            config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
+        } else if (device == "GPU") {
+            ov::streams::Num nstreams =
+                deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
+            config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
+            if (flags_d.find("MULTI") != std::string::npos &&
+                config.getDevices().find("CPU") != config.getDevices().end()) {
+                // multi-device execution with the CPU + GPU performs best with GPU throttling hint,
+                // which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
+                config.compiledModelConfig.emplace(ov::intel_gpu::hint::queue_throttle.name(),
+                                                   ov::intel_gpu::hint::ThrottleLevel(1));
+            }
+        }
+    }
+    return config;
+}
+
+ModelConfig ConfigFactory::getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq) {
+    auto config = getCommonConfig(flags_d, flags_nireq);
+    for (const auto& device : config.getDevices()) {
+        if (device == "CPU") {  // CPU supports a few special performance-oriented keys
+            config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
+        } else if (device == "GPU") {
+            config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
+        }
+    }
+    return config;
+}
+
+ModelConfig ConfigFactory::getCommonConfig(const std::string& flags_d, uint32_t flags_nireq) {
+    ModelConfig config;
+
+    if (!flags_d.empty()) {
+        config.deviceName = flags_d;
+    }
+
+    config.maxAsyncRequests = flags_nireq;
+
+    return config;
+}
+
+std::map<std::string, std::string> ModelConfig::getLegacyConfig() const {
+    std::map<std::string, std::string> config;
+    for (const auto& item : compiledModelConfig) {
+        config[item.first] = item.second.as<std::string>();
+    }
+    return config;
+}
--- a/engines/OpenVINOEngine/src/utils/image_utils.cpp
+++ b/engines/OpenVINOEngine/src/utils/image_utils.cpp
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "utils/image_utils.h"
+
+cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode,
+                       cv::InterpolationFlags interpolationMode, cv::Rect* roi, cv::Scalar BorderConstant) {
+    if (width == mat.cols && height == mat.rows) {
+        return mat;
+    }
+
+    cv::Mat dst;
+
+    switch (resizeMode) {
+    case RESIZE_FILL:
+    {
+        cv::resize(mat, dst, cv::Size(width, height), interpolationMode);
+        if (roi) {
+            *roi = cv::Rect(0, 0, width, height);
+        }
+        break;
+    }
+    case RESIZE_KEEP_ASPECT:
+    case RESIZE_KEEP_ASPECT_LETTERBOX:
+    {
+        double scale = std::min(static_cast<double>(width) / mat.cols, static_cast<double>(height) / mat.rows);
+        cv::Mat resizedImage;
+        cv::resize(mat, resizedImage, cv::Size(0, 0), scale, scale, interpolationMode);
+
+        int dx = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (width - resizedImage.cols) / 2;
+        int dy = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (height - resizedImage.rows) / 2;
+
+        cv::copyMakeBorder(resizedImage, dst, dy, height - resizedImage.rows - dy,
+            dx, width - resizedImage.cols - dx, cv::BORDER_CONSTANT, BorderConstant);
+        if (roi) {
+            *roi = cv::Rect(dx, dy, resizedImage.cols, resizedImage.rows);
+        }
+        break;
+    }
+    }
+    return dst;
+}
--- a/engines/OpenVINOEngine/src/utils/images_capture.cpp
+++ b/engines/OpenVINOEngine/src/utils/images_capture.cpp
@@ -0,0 +1,326 @@
+// Copyright (C) 2020-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "utils/images_capture.h"
+
+#include <string.h>
+
+#ifdef _WIN32
+#    include "w_dirent.hpp"
+#else
+#    include <dirent.h>  // for closedir, dirent, opendir, readdir, DIR
+#endif
+
+#include <algorithm>
+#include <chrono>
+#include <fstream>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/videoio.hpp>
+
+class InvalidInput : public std::runtime_error {
+public:
+    explicit InvalidInput(const std::string& message) noexcept : std::runtime_error(message) {}
+};
+
+class OpenError : public std::runtime_error {
+public:
+    explicit OpenError(const std::string& message) noexcept : std::runtime_error(message) {}
+};
+
+class ImreadWrapper : public ImagesCapture {
+    cv::Mat img;
+    bool canRead;
+
+public:
+    ImreadWrapper(const std::string& input, bool loop) : ImagesCapture{loop}, canRead{true} {
+        auto startTime = std::chrono::steady_clock::now();
+
+        std::ifstream file(input.c_str());
+        if (!file.good())
+            throw InvalidInput("Can't find the image by " + input);
+
+        img = cv::imread(input);
+        if (!img.data)
+            throw OpenError("Can't open the image from " + input);
+        else
+            readerMetrics.update(startTime);
+    }
+
+    double fps() const override {
+        return 1.0;
+    }
+
+    std::string getType() const override {
+        return "IMAGE";
+    }
+
+    cv::Mat read() override {
+        if (loop)
+            return img.clone();
+        if (canRead) {
+            canRead = false;
+            return img.clone();
+        }
+        return cv::Mat{};
+    }
+};
+
+class DirReader : public ImagesCapture {
+    std::vector<std::string> names;
+    size_t fileId;
+    size_t nextImgId;
+    const size_t initialImageId;
+    const size_t readLengthLimit;
+    const std::string input;
+
+public:
+    DirReader(const std::string& input, bool loop, size_t initialImageId, size_t readLengthLimit)
+        : ImagesCapture{loop},
+          fileId{0},
+          nextImgId{0},
+          initialImageId{initialImageId},
+          readLengthLimit{readLengthLimit},
+          input{input} {
+        DIR* dir = opendir(input.c_str());
+        if (!dir)
+            throw InvalidInput("Can't find the dir by " + input);
+        while (struct dirent* ent = readdir(dir))
+            if (strcmp(ent->d_name, ".") && strcmp(ent->d_name, ".."))
+                names.emplace_back(ent->d_name);
+        closedir(dir);
+        if (names.empty())
+            throw OpenError("The dir " + input + " is empty");
+        sort(names.begin(), names.end());
+        size_t readImgs = 0;
+        while (fileId < names.size()) {
+            cv::Mat img = cv::imread(input + '/' + names[fileId]);
+            if (img.data) {
+                ++readImgs;
+                if (readImgs - 1 >= initialImageId)
+                    return;
+            }
+            ++fileId;
+        }
+        throw OpenError("Can't read the first image from " + input);
+    }
+
+    double fps() const override {
+        return 1.0;
+    }
+
+    std::string getType() const override {
+        return "DIR";
+    }
+
+    cv::Mat read() override {
+        auto startTime = std::chrono::steady_clock::now();
+
+        while (fileId < names.size() && nextImgId < readLengthLimit) {
+            cv::Mat img = cv::imread(input + '/' + names[fileId]);
+            ++fileId;
+            if (img.data) {
+                ++nextImgId;
+                readerMetrics.update(startTime);
+                return img;
+            }
+        }
+
+        if (loop) {
+            fileId = 0;
+            size_t readImgs = 0;
+            while (fileId < names.size()) {
+                cv::Mat img = cv::imread(input + '/' + names[fileId]);
+                ++fileId;
+                if (img.data) {
+                    ++readImgs;
+                    if (readImgs - 1 >= initialImageId) {
+                        nextImgId = 1;
+                        readerMetrics.update(startTime);
+                        return img;
+                    }
+                }
+            }
+        }
+        return cv::Mat{};
+    }
+};
+
+class VideoCapWrapper : public ImagesCapture {
+    cv::VideoCapture cap;
+    bool first_read;
+    const read_type type;
+    size_t nextImgId;
+    const double initialImageId;
+    size_t readLengthLimit;
+
+public:
+    VideoCapWrapper(const std::string& input, bool loop, read_type type, size_t initialImageId, size_t readLengthLimit)
+        : ImagesCapture{loop},
+          first_read{true},
+          type{type},
+          nextImgId{0},
+          initialImageId{static_cast<double>(initialImageId)} {
+        if (0 == readLengthLimit) {
+            throw std::runtime_error("readLengthLimit must be positive");
+        }
+        if (cap.open(input)) {
+            this->readLengthLimit = readLengthLimit;
+            if (!cap.set(cv::CAP_PROP_POS_FRAMES, this->initialImageId))
+                throw OpenError("Can't set the frame to begin with");
+            return;
+        }
+        throw InvalidInput("Can't open the video from " + input);
+    }
+
+    double fps() const override {
+        return cap.get(cv::CAP_PROP_FPS);
+    }
+
+    std::string getType() const override {
+        return "VIDEO";
+    }
+
+    cv::Mat read() override {
+        auto startTime = std::chrono::steady_clock::now();
+
+        if (nextImgId >= readLengthLimit) {
+            if (loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
+                nextImgId = 1;
+                cv::Mat img;
+                cap.read(img);
+                if (type == read_type::safe) {
+                    img = img.clone();
+                }
+                readerMetrics.update(startTime);
+                return img;
+            }
+            return cv::Mat{};
+        }
+        cv::Mat img;
+        bool success = cap.read(img);
+        if (!success && first_read) {
+            throw std::runtime_error("The first image can't be read");
+        }
+        first_read = false;
+        if (!success && loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
+            nextImgId = 1;
+            cap.read(img);
+        } else {
+            ++nextImgId;
+        }
+        if (type == read_type::safe) {
+            img = img.clone();
+        }
+        readerMetrics.update(startTime);
+        return img;
+    }
+};
+
+class CameraCapWrapper : public ImagesCapture {
+    cv::VideoCapture cap;
+    const read_type type;
+    size_t nextImgId;
+    size_t readLengthLimit;
+
+public:
+    CameraCapWrapper(const std::string& input,
+                     bool loop,
+                     read_type type,
+                     size_t readLengthLimit,
+                     cv::Size cameraResolution)
+        : ImagesCapture{loop},
+          type{type},
+          nextImgId{0} {
+        if (0 == readLengthLimit) {
+            throw std::runtime_error("readLengthLimit must be positive");
+        }
+        try {
+            if (cap.open(std::stoi(input))) {
+                this->readLengthLimit = loop ? std::numeric_limits<size_t>::max() : readLengthLimit;
+                cap.set(cv::CAP_PROP_BUFFERSIZE, 1);
+                cap.set(cv::CAP_PROP_FRAME_WIDTH, cameraResolution.width);
+                cap.set(cv::CAP_PROP_FRAME_HEIGHT, cameraResolution.height);
+                cap.set(cv::CAP_PROP_AUTOFOCUS, true);
+                cap.set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'));
+                return;
+            }
+            throw OpenError("Can't open the camera from " + input);
+        } catch (const std::invalid_argument&) {
+            throw InvalidInput("Can't find the camera " + input);
+        } catch (const std::out_of_range&) { throw InvalidInput("Can't find the camera " + input); }
+    }
+
+    double fps() const override {
+        return cap.get(cv::CAP_PROP_FPS) > 0 ? cap.get(cv::CAP_PROP_FPS) : 30;
+    }
+
+    std::string getType() const override {
+        return "CAMERA";
+    }
+
+    cv::Mat read() override {
+        auto startTime = std::chrono::steady_clock::now();
+
+        if (nextImgId >= readLengthLimit) {
+            return cv::Mat{};
+        }
+        cv::Mat img;
+        if (!cap.read(img)) {
+            throw std::runtime_error("The image can't be captured from the camera");
+        }
+        if (type == read_type::safe) {
+            img = img.clone();
+        }
+        ++nextImgId;
+
+        readerMetrics.update(startTime);
+        return img;
+    }
+};
+
+std::unique_ptr<ImagesCapture> openImagesCapture(const std::string& input,
+                                                 bool loop,
+                                                 read_type type,
+                                                 size_t initialImageId,
+                                                 size_t readLengthLimit,
+                                                 cv::Size cameraResolution) {
+    if (readLengthLimit == 0)
+        throw std::runtime_error{"Read length limit must be positive"};
+    std::vector<std::string> invalidInputs, openErrors;
+    try {
+        return std::unique_ptr<ImagesCapture>(new ImreadWrapper{input, loop});
+    } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+        openErrors.push_back(e.what());
+    }
+
+    try {
+        return std::unique_ptr<ImagesCapture>(new DirReader{input, loop, initialImageId, readLengthLimit});
+    } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+        openErrors.push_back(e.what());
+    }
+
+    try {
+        return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, type, initialImageId, readLengthLimit});
+    } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+        openErrors.push_back(e.what());
+    }
+
+    try {
+        return std::unique_ptr<ImagesCapture>(
+            new CameraCapWrapper{input, loop, type, readLengthLimit, cameraResolution});
+    } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+        openErrors.push_back(e.what());
+    }
+
+    std::vector<std::string> errorMessages = openErrors.empty() ? invalidInputs : openErrors;
+    std::string errorsInfo;
+    for (const auto& message : errorMessages) {
+        errorsInfo.append(message + "\n");
+    }
+    throw std::runtime_error(errorsInfo);
+}
--- a/engines/OpenVINOEngine/src/utils/kuhn_munkres.cpp
+++ b/engines/OpenVINOEngine/src/utils/kuhn_munkres.cpp
@@ -0,0 +1,169 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include <utils/kuhn_munkres.hpp>
+
+KuhnMunkres::KuhnMunkres(bool greedy) : n_(), greedy_(greedy) {}
+
+std::vector<size_t> KuhnMunkres::Solve(const cv::Mat& dissimilarity_matrix) {
+    CV_Assert(dissimilarity_matrix.type() == CV_32F);
+    double min_val;
+    cv::minMaxLoc(dissimilarity_matrix, &min_val);
+
+    n_ = std::max(dissimilarity_matrix.rows, dissimilarity_matrix.cols);
+    dm_ = cv::Mat(n_, n_, CV_32F, cv::Scalar(0));
+    marked_ = cv::Mat(n_, n_, CV_8S, cv::Scalar(0));
+    points_ = std::vector<cv::Point>(n_ * 2);
+
+    dissimilarity_matrix.copyTo(dm_(
+            cv::Rect(0, 0, dissimilarity_matrix.cols, dissimilarity_matrix.rows)));
+
+    is_row_visited_ = std::vector<int>(n_, 0);
+    is_col_visited_ = std::vector<int>(n_, 0);
+
+    Run();
+
+    std::vector<size_t> results(dissimilarity_matrix.rows, -1);
+    for (int i = 0; i < dissimilarity_matrix.rows; i++) {
+        const auto ptr = marked_.ptr<char>(i);
+        for (int j = 0; j < dissimilarity_matrix.cols; j++) {
+            if (ptr[j] == kStar) {
+                results[i] = (size_t)j;
+            }
+        }
+    }
+    return results;
+}
+
+void KuhnMunkres::TrySimpleCase() {
+    auto is_row_visited = std::vector<int>(n_, 0);
+    auto is_col_visited = std::vector<int>(n_, 0);
+
+    for (int row = 0; row < n_; row++) {
+        auto ptr = dm_.ptr<float>(row);
+        auto marked_ptr = marked_.ptr<char>(row);
+        auto min_val = *std::min_element(ptr, ptr + n_);
+        for (int col = 0; col < n_; col++) {
+            ptr[col] -= min_val;
+            if (ptr[col] == 0 && !is_col_visited[col] && !is_row_visited[row]) {
+                marked_ptr[col] = kStar;
+                is_col_visited[col] = 1;
+                is_row_visited[row] = 1;
+            }
+        }
+    }
+}
+
+bool KuhnMunkres::CheckIfOptimumIsFound() {
+    int count = 0;
+    for (int i = 0; i < n_; i++) {
+        const auto marked_ptr = marked_.ptr<char>(i);
+        for (int j = 0; j < n_; j++) {
+            if (marked_ptr[j] == kStar) {
+                is_col_visited_[j] = 1;
+                count++;
+            }
+        }
+    }
+
+    return count >= n_;
+}
+
+cv::Point KuhnMunkres::FindUncoveredMinValPos() {
+    auto min_val = std::numeric_limits<float>::max();
+    cv::Point min_val_pos(-1, -1);
+    for (int i = 0; i < n_; i++) {
+        if (!is_row_visited_[i]) {
+            auto dm_ptr = dm_.ptr<float>(i);
+            for (int j = 0; j < n_; j++) {
+                if (!is_col_visited_[j] && dm_ptr[j] < min_val) {
+                    min_val = dm_ptr[j];
+                    min_val_pos = cv::Point(j, i);
+                }
+            }
+        }
+    }
+    return min_val_pos;
+}
+
+void KuhnMunkres::UpdateDissimilarityMatrix(float val) {
+    for (int i = 0; i < n_; i++) {
+        auto dm_ptr = dm_.ptr<float>(i);
+        for (int j = 0; j < n_; j++) {
+            if (is_row_visited_[i]) dm_ptr[j] += val;
+            if (!is_col_visited_[j]) dm_ptr[j] -= val;
+        }
+    }
+}
+
+int KuhnMunkres::FindInRow(int row, int what) {
+    for (int j = 0; j < n_; j++) {
+        if (marked_.at<char>(row, j) == what) {
+            return j;
+        }
+    }
+    return -1;
+}
+
+int KuhnMunkres::FindInCol(int col, int what) {
+    for (int i = 0; i < n_; i++) {
+        if (marked_.at<char>(i, col) == what) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+void KuhnMunkres::Run() {
+    TrySimpleCase();
+    if (greedy_)
+        return;
+    while (!CheckIfOptimumIsFound()) {
+        while (true) {
+            auto point = FindUncoveredMinValPos();
+            auto min_val = dm_.at<float>(point.y, point.x);
+            if (min_val > 0) {
+                UpdateDissimilarityMatrix(min_val);
+            } else {
+                marked_.at<char>(point.y, point.x) = kPrime;
+                int col = FindInRow(point.y, kStar);
+                if (col >= 0) {
+                    is_row_visited_[point.y] = 1;
+                    is_col_visited_[col] = 0;
+                } else {
+                    int count = 0;
+                    points_[count] = point;
+
+                    while (true) {
+                        int row = FindInCol(points_[count].x, kStar);
+                        if (row >= 0) {
+                            count++;
+                            points_[count] = cv::Point(points_[count - 1].x, row);
+                            int col = FindInRow(points_[count].y, kPrime);
+                            count++;
+                            points_[count] = cv::Point(col, points_[count - 1].y);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    for (int i = 0; i < count + 1; i++) {
+                        auto& mark = marked_.at<char>(points_[i].y, points_[i].x);
+                        mark = mark == kStar ? 0 : kStar;
+                    }
+
+                    is_row_visited_ = std::vector<int>(n_, 0);
+                    is_col_visited_ = std::vector<int>(n_, 0);
+
+                    marked_.setTo(0, marked_ == kPrime);
+                    break;
+                }
+            }
+        }
+    }
+}
--- a/engines/OpenVINOEngine/src/utils/performance_metrics.cpp
+++ b/engines/OpenVINOEngine/src/utils/performance_metrics.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2020-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <limits>
+#include "utils/performance_metrics.hpp"
+#include "utils/slog.hpp"
+
+// timeWindow defines the length of the timespan over which the 'current fps' value is calculated
+PerformanceMetrics::PerformanceMetrics(Duration timeWindow)
+    : timeWindowSize(timeWindow)
+    , firstFrameProcessed(false)
+{}
+
+void PerformanceMetrics::update(TimePoint lastRequestStartTime,
+    const cv::Mat& frame,
+    cv::Point position,
+    int fontFace,
+    double fontScale,
+    cv::Scalar color,
+    int thickness,
+    MetricTypes metricType) {
+    update(lastRequestStartTime);
+    paintMetrics(frame, position, fontFace, fontScale, color, thickness, metricType);
+}
+
+void PerformanceMetrics::update(TimePoint lastRequestStartTime) {
+    TimePoint currentTime = Clock::now();
+
+    if (!firstFrameProcessed) {
+        lastUpdateTime = lastRequestStartTime;
+        firstFrameProcessed = true;
+    }
+
+    currentMovingStatistic.latency += currentTime - lastRequestStartTime;
+    currentMovingStatistic.period = currentTime - lastUpdateTime;
+    currentMovingStatistic.frameCount++;
+
+    if (currentTime - lastUpdateTime > timeWindowSize) {
+        lastMovingStatistic = currentMovingStatistic;
+        totalStatistic.combine(lastMovingStatistic);
+        currentMovingStatistic = Statistic();
+
+        lastUpdateTime = currentTime;
+    }
+}
+
+void PerformanceMetrics::paintMetrics(const cv::Mat& frame, cv::Point position, int fontFace,
+    double fontScale, cv::Scalar color, int thickness, MetricTypes metricType) const {
+    // Draw performance stats over frame
+    Metrics metrics = getLast();
+
+    std::ostringstream out;
+    if (!std::isnan(metrics.latency) &&
+        (metricType == PerformanceMetrics::MetricTypes::LATENCY || metricType == PerformanceMetrics::MetricTypes::ALL)) {
+        out << "Latency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms";
+        putHighlightedText(frame, out.str(), position, fontFace, fontScale, color, thickness);
+    }
+    if (!std::isnan(metrics.fps) &&
+        (metricType == PerformanceMetrics::MetricTypes::FPS || metricType == PerformanceMetrics::MetricTypes::ALL)) {
+        out.str("");
+        out << "FPS: " << std::fixed << std::setprecision(1) << metrics.fps;
+        int offset = metricType == PerformanceMetrics::MetricTypes::ALL ? 30 : 0;
+        putHighlightedText(frame, out.str(), {position.x, position.y + offset}, fontFace, fontScale, color, thickness);
+    }
+}
+
+PerformanceMetrics::Metrics PerformanceMetrics::getLast() const {
+    Metrics metrics;
+
+    metrics.latency = lastMovingStatistic.frameCount != 0
+                      ? std::chrono::duration_cast<Ms>(lastMovingStatistic.latency).count()
+                        / lastMovingStatistic.frameCount
+                      : std::numeric_limits<double>::signaling_NaN();
+    metrics.fps = lastMovingStatistic.period != Duration::zero()
+                  ? lastMovingStatistic.frameCount
+                    / std::chrono::duration_cast<Sec>(lastMovingStatistic.period).count()
+                  : std::numeric_limits<double>::signaling_NaN();
+
+    return metrics;
+}
+
+PerformanceMetrics::Metrics PerformanceMetrics::getTotal() const {
+    Metrics metrics;
+
+    int frameCount = totalStatistic.frameCount + currentMovingStatistic.frameCount;
+    if (frameCount != 0) {
+        metrics.latency = std::chrono::duration_cast<Ms>(
+            totalStatistic.latency + currentMovingStatistic.latency).count() / frameCount;
+        metrics.fps = frameCount / std::chrono::duration_cast<Sec>(
+                                       totalStatistic.period + currentMovingStatistic.period).count();
+    } else {
+        metrics.latency = std::numeric_limits<double>::signaling_NaN();
+        metrics.fps = std::numeric_limits<double>::signaling_NaN();
+    }
+
+    return metrics;
+}
+
+void PerformanceMetrics::logTotal() const {
+    Metrics metrics = getTotal();
+
+    slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms" << slog::endl;
+    slog::info << "\tFPS: " << metrics.fps << slog::endl;
+}
+
+void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat) {
+    slog::info << "\tDecoding:\t" << std::fixed << std::setprecision(1) <<
+        readLat << " ms" << slog::endl;
+    slog::info << "\tPreprocessing:\t" << preprocLat << " ms" << slog::endl;
+    slog::info << "\tInference:\t" << inferLat << " ms" << slog::endl;
+    slog::info << "\tPostprocessing:\t" << postprocLat << " ms" << slog::endl;
+    slog::info << "\tRendering:\t" << renderLat << " ms" << slog::endl;
+}
--- a/engines/OpenVINOEngine/src/utils/visualizer.cpp
+++ b/engines/OpenVINOEngine/src/utils/visualizer.cpp
@@ -0,0 +1,180 @@
+/*
+// Copyright (C) 2021-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "utils/visualizer.hpp"
+
+#include <ctype.h>
+#include <stddef.h>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <models/results.h>
+#include <pipelines/metadata.h>
+#include <utils/ocv_common.hpp>
+
+Visualizer::Visualizer(const std::string& type) {
+    if (type == "sr")
+        winName = "Image Processing Demo - Super Resolution (press A for help)";
+    else if (type == "jr")
+        winName = "Image Processing Demo - JPEG Restoration (press A for help)";
+}
+
+cv::Size Visualizer::getSize() {
+    return resolution;
+}
+
+void Visualizer::handleKey(int key) {
+    key = std::tolower(key);
+    if (key == 'a') {
+        isHelpShown = !isHelpShown;
+    }
+    if (cv::getWindowProperty(winName, 0) < 0) {
+        mode = "result";
+        disableTrackbar();
+    }
+    if (key == 'o') {
+        mode = "orig";
+        addTrackbar();
+    }
+    if (key == 'v') {
+        mode = "diff";
+        addTrackbar();
+    }
+    if (key == 'r') {
+        mode = "result";
+        cv::destroyWindow(winName);
+        disableTrackbar();
+    }
+}
+
+cv::Mat Visualizer::renderResultData(ImageResult result, cv::Size& newResolution) {
+    if (!result.metaData) {
+        throw std::invalid_argument("Renderer: metadata is null");
+    }
+    // Input image is stored inside metadata, as we put it there during submission stage
+    inputImg = result.metaData->asRef<ImageMetaData>().img;
+    if (inputImg.empty()) {
+        throw std::invalid_argument("Renderer: image provided in metadata is empty");
+    }
+
+    if (!isResolutionSet) {
+        setResolution(newResolution);
+    }
+
+    cv::resize(result.resultImage, result.resultImage, resolution);
+    cv::resize(inputImg, inputImg, resolution);
+
+    if (inputImg.channels() != result.resultImage.channels()) {
+        cv::cvtColor(result.resultImage, resultImg, cv::COLOR_GRAY2BGR);
+    } else {
+        resultImg = result.resultImage;
+    }
+  /*  changeDisplayImg();
+    displayImg=resultImg.clone();*/
+    return resultImg;
+}
+
+void Visualizer::show(cv::Mat img) {
+    if (img.empty()) {
+        changeDisplayImg();
+        img = displayImg;
+    }
+
+    if (isHelpShown) {
+        int pad = 10;
+        int margin = 60;
+        int baseline = 0;
+        int lineH = cv::getTextSize(helpMessage[0], cv::FONT_HERSHEY_COMPLEX_SMALL, 0.75, 1, &baseline).height + pad;
+        for (size_t i = 0; i < 4; ++i) {
+            putHighlightedText(img,
+                               helpMessage[i],
+                               cv::Point(pad, margin + baseline + (i + 1) * lineH),
+                               cv::FONT_HERSHEY_COMPLEX,
+                               0.65,
+                               cv::Scalar(255, 0, 0),
+                               2);
+        }
+    }
+
+    cv::imshow(winName, img);
+}
+
+void Visualizer::changeDisplayImg() {
+    displayImg = resultImg.clone();
+    if (mode == "orig") {
+        inputImg(cv::Rect(0, 0, slider, inputImg.rows)).copyTo(displayImg(cv::Rect(0, 0, slider, resultImg.rows)));
+        markImage(displayImg, {"O", "R"}, static_cast<float>(slider) / resolution.width);
+        drawSweepLine(displayImg);
+    } else if (mode == "result") {
+        markImage(displayImg, {"R", ""}, 1);
+    } else if (mode == "diff") {
+        cv::Mat diffImg;
+        cv::absdiff(inputImg, resultImg, diffImg);
+        double min, max;
+        cv::minMaxLoc(diffImg, &min, &max);
+        double scale = 255.0 / (max - min);
+        diffImg = (diffImg - min) * scale;
+        diffImg(cv::Rect(0, 0, slider, resultImg.rows)).copyTo(displayImg(cv::Rect(0, 0, slider, displayImg.rows)));
+        markImage(displayImg, {"D", "R"}, static_cast<float>(slider) / resolution.width);
+        drawSweepLine(displayImg);
+    }
+}
+
+void Visualizer::markImage(cv::Mat& image, const std::pair<std::string, std::string>& marks, float alpha) {
+    int pad = 25;
+    std::pair<float, float> positions(static_cast<float>(image.cols) * alpha / 2.0f,
+                                      static_cast<float>(image.cols) * (1 + alpha) / 2.0f);
+    putHighlightedText(image,
+                       marks.first,
+                       cv::Point(static_cast<int>(positions.first) - pad, 25),
+                       cv::FONT_HERSHEY_COMPLEX,
+                       1,
+                       cv::Scalar(0, 0, 255),
+                       2);
+    putHighlightedText(image,
+                       marks.second,
+                       cv::Point(static_cast<int>(positions.second), 25),
+                       cv::FONT_HERSHEY_COMPLEX,
+                       1,
+                       cv::Scalar(0, 0, 255),
+                       2);
+}
+
+void Visualizer::drawSweepLine(cv::Mat& image) {
+    cv::line(image, cv::Point(slider, 0), cv::Point(slider, image.rows), cv::Scalar(0, 255, 0), 2);
+}
+
+void Visualizer::setResolution(cv::Size& newResolution) {
+    resolution = newResolution;
+    isResolutionSet = true;
+}
+
+void Visualizer::addTrackbar() {
+    if (!isTrackbarShown) {
+        cv::createTrackbar(trackbarName, winName, &slider, resolution.width);
+        cv::setTrackbarMin(trackbarName, winName, 1);
+        isTrackbarShown = true;
+    }
+}
+
+void Visualizer::disableTrackbar() {
+    isTrackbarShown = false;
+}
--- a/engines/OpenVINOEngine/src/utils/w_dirent.hpp
+++ b/engines/OpenVINOEngine/src/utils/w_dirent.hpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if defined(_WIN32)
+
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
+
+#include <WinSock2.h>
+#include <Windows.h>
+#include <stdlib.h>
+
+#else
+
+#include <unistd.h>
+#include <cstdlib>
+#include <string.h>
+
+#endif
+
+#include <string>
+
+#include <sys/stat.h>
+
+#if defined(WIN32)
+    // Copied from linux libc sys/stat.h:
+    #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+    #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+
+struct dirent {
+    char *d_name;
+
+    explicit dirent(const wchar_t *wsFilePath) {
+        size_t i;
+        auto slen = wcslen(wsFilePath);
+        d_name = static_cast<char*>(malloc(slen + 1));
+        wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
+    }
+
+    ~dirent() {
+        free(d_name);
+    }
+};
+
+class DIR {
+    WIN32_FIND_DATAA FindFileData;
+    HANDLE hFind;
+    dirent *next;
+
+    static inline bool endsWith(const std::string &src, const char *with) {
+        int wl = static_cast<int>(strlen(with));
+        int so = static_cast<int>(src.length()) - wl;
+        if (so < 0) return false;
+        return 0 == strncmp(with, &src[so], wl);
+    }
+
+public:
+    explicit DIR(const char *dirPath) : next(nullptr) {
+        std::string ws = dirPath;
+        if (endsWith(ws, "\\"))
+            ws += "*";
+        else
+            ws += "\\*";
+        hFind = FindFirstFileA(ws.c_str(), &FindFileData);
+        FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+    }
+
+    ~DIR() {
+        if (!next) delete next;
+        FindClose(hFind);
+    }
+
+    bool isValid() const {
+        return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+    }
+
+    dirent* nextEnt() {
+        if (next != nullptr) delete next;
+        next = nullptr;
+
+        if (!FindFileData.dwReserved0) return nullptr;
+
+        wchar_t wbuf[4096];
+
+        size_t outSize;
+        mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
+        next = new dirent(wbuf);
+        FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
+        return next;
+    }
+};
+
+
+static DIR *opendir(const char* dirPath) {
+    auto dp = new DIR(dirPath);
+    if (!dp->isValid()) {
+        delete dp;
+        return nullptr;
+    }
+    return dp;
+}
+
+static struct dirent *readdir(DIR *dp) {
+    return dp->nextEnt();
+}
+
+static void closedir(DIR *dp) {
+    delete dp;
+}