// Copyright (C) 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include #include #include #include // clang-format off #include "openvino/openvino.hpp" #include "samples/args_helper.hpp" #include "samples/common.hpp" #include "samples/latency_metrics.hpp" #include "samples/slog.hpp" // clang-format on using Ms = std::chrono::duration>; int main(int argc, char* argv[]) { try { slog::info << "OpenVINO:" << slog::endl; slog::info << ov::get_openvino_version(); std::string device_name = "CPU"; if (argc == 3) { device_name = argv[2]; } else if (argc != 2) { slog::info << "Usage : " << argv[0] << " (default: CPU)" << slog::endl; return EXIT_FAILURE; } // Optimize for throughput. Best throughput can be reached by // running multiple ov::InferRequest instances asyncronously ov::AnyMap tput{{ov::hint::performance_mode.name(), ov::hint::PerformanceMode::THROUGHPUT}}; // Create ov::Core and use it to compile a model. // Select the device by providing the name as the second parameter to CLI. // It is possible to set CUMULATIVE_THROUGHPUT as ov::hint::PerformanceMode for AUTO device ov::Core core; ov::CompiledModel compiled_model = core.compile_model(argv[1], device_name, tput); // Create optimal number of ov::InferRequest instances uint32_t nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); std::vector ireqs(nireq); std::generate(ireqs.begin(), ireqs.end(), [&] { return compiled_model.create_infer_request(); }); // Fill input data for ireqs for (ov::InferRequest& ireq : ireqs) { for (const ov::Output& model_input : compiled_model.inputs()) { fill_tensor_random(ireq.get_tensor(model_input)); } } // Warm up for (ov::InferRequest& ireq : ireqs) { ireq.start_async(); } for (ov::InferRequest& ireq : ireqs) { ireq.wait(); } // Benchmark for seconds_to_run seconds and at least niter iterations std::chrono::seconds seconds_to_run{10}; size_t niter = 10; std::vector latencies; std::mutex mutex; std::condition_variable cv; std::exception_ptr callback_exception; struct TimedIreq { ov::InferRequest& ireq; // ref std::chrono::steady_clock::time_point start; bool has_start_time; }; std::deque finished_ireqs; for (ov::InferRequest& ireq : ireqs) { finished_ireqs.push_back({ireq, std::chrono::steady_clock::time_point{}, false}); } auto start = std::chrono::steady_clock::now(); auto time_point_to_finish = start + seconds_to_run; // Once there’s a finished ireq wake up main thread. // Compute and save latency for that ireq and prepare for next inference by setting up callback. // Callback pushes that ireq again to finished ireqs when infrence is completed. // Start asynchronous infer with updated callback for (;;) { std::unique_lock lock(mutex); while (!callback_exception && finished_ireqs.empty()) { cv.wait(lock); } if (callback_exception) { std::rethrow_exception(callback_exception); } if (!finished_ireqs.empty()) { auto time_point = std::chrono::steady_clock::now(); if (time_point > time_point_to_finish && latencies.size() > niter) { break; } TimedIreq timedIreq = finished_ireqs.front(); finished_ireqs.pop_front(); if (lock.owns_lock()) { lock.unlock(); } ov::InferRequest& ireq = timedIreq.ireq; if (timedIreq.has_start_time) { latencies.push_back(std::chrono::duration_cast(time_point - timedIreq.start).count()); } ireq.set_callback( [&ireq, time_point, &mutex, &finished_ireqs, &callback_exception, &cv](std::exception_ptr ex) { // Keep callback small. This improves performance for fast (tens of thousands FPS) models { std::unique_lock lock(mutex); try { if (ex) { std::rethrow_exception(ex); } finished_ireqs.push_back({ireq, time_point, true}); } catch (const std::exception&) { if (!callback_exception) { callback_exception = std::current_exception(); } } } cv.notify_one(); }); ireq.start_async(); } } auto end = std::chrono::steady_clock::now(); double duration = std::chrono::duration_cast(end - start).count(); // Report results slog::info << "Count: " << latencies.size() << " iterations" << slog::endl << "Duration: " << duration << " ms" << slog::endl << "Latency:" << slog::endl; size_t percent = 50; LatencyMetrics{latencies, "", percent}.write_to_slog(); slog::info << "Throughput: " << double_to_string(1000 * latencies.size() / duration) << " FPS" << slog::endl; } catch (const std::exception& ex) { slog::err << ex.what() << slog::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }