748 lines
39 KiB
Python
748 lines
39 KiB
Python
# Copyright (C) 2018-2025 Intel Corporation
|
||
# SPDX-License-Identifier: Apache-2.0
|
||
|
||
import os
|
||
import sys
|
||
import platform
|
||
from datetime import datetime
|
||
|
||
from openvino import Dimension, properties
|
||
|
||
from openvino.tools.benchmark.benchmark import Benchmark
|
||
from openvino.tools.benchmark.parameters import parse_args
|
||
from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, \
|
||
CPU_DEVICE_NAME, GPU_DEVICE_NAME, \
|
||
BLOB_EXTENSION, AUTO_DEVICE_NAME
|
||
from openvino.tools.benchmark.utils.inputs_filling import get_input_data
|
||
from openvino.tools.benchmark.utils.logging import logger
|
||
from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \
|
||
process_help_inference_string, print_perf_counters, print_perf_counters_sort, dump_exec_graph, get_duration_in_milliseconds, \
|
||
get_command_line_arguments, parse_value_per_device, parse_devices, get_inputs_info, \
|
||
print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \
|
||
check_for_static, can_measure_as_static, parse_value_for_virtual_device, is_virtual_device, is_virtual_device_found
|
||
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, JsonStatisticsReport, CsvStatisticsReport, \
|
||
averageCntReport, detailedCntReport
|
||
|
||
def get_peak_memory_usage():
|
||
if platform.system() == "Linux":
|
||
with open("/proc/self/status", "r") as f:
|
||
for line in f:
|
||
if line.startswith("VmPeak:"):
|
||
return int(line.split()[1]) # The value in KB
|
||
raise RuntimeError("VmPeak attribute not found. Unable to determine peak memory usage.")
|
||
|
||
# No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure
|
||
# No Macos support due to no /proc/self/status file
|
||
return None
|
||
|
||
def log_memory_usage(logger, start_mem_usage, end_mem_usage, action_name):
|
||
if start_mem_usage is None or end_mem_usage is None:
|
||
return
|
||
|
||
capitalized_action_name = action_name.capitalize()
|
||
action_name = "compilation" if action_name == "compile" else action_name
|
||
logger.info(f"Start of {action_name} memory usage: Peak {start_mem_usage} KB")
|
||
logger.info(f"End of {action_name} memory usage: Peak {end_mem_usage} KB")
|
||
logger.info(f"{capitalized_action_name} model ram used {end_mem_usage - start_mem_usage} KB")
|
||
|
||
def parse_and_check_command_line():
|
||
def arg_not_empty(arg_value,empty_value):
|
||
return not arg_value is None and not arg_value == empty_value
|
||
|
||
parser = parse_args()
|
||
args = parser.parse_args()
|
||
|
||
if args.latency_percentile < 1 or args.latency_percentile > 100:
|
||
parser.print_help()
|
||
raise RuntimeError("The percentile value is incorrect. The applicable values range is [1, 100].")
|
||
|
||
if not args.perf_hint == "none" and (arg_not_empty(args.number_streams, "") or arg_not_empty(args.number_threads, 0) or arg_not_empty(args.infer_threads_pinning, "")):
|
||
raise Exception("-nstreams, -nthreads and -pin options are fine tune options. To use them you " \
|
||
"should explicitely set -hint option to none. This is not OpenVINO limitation " \
|
||
"(those options can be used in OpenVINO together), but a benchmark_app UI rule.")
|
||
|
||
if args.report_type == "average_counters" and MULTI_DEVICE_NAME in args.target_device:
|
||
raise Exception("only detailed_counters report type is supported for MULTI device")
|
||
|
||
_, ext = os.path.splitext(args.path_to_model)
|
||
is_network_compiled = True if ext == BLOB_EXTENSION else False
|
||
is_precisiton_set = not (args.input_precision == "" and args.output_precision == "" and args.input_output_precision == "")
|
||
|
||
if is_network_compiled and is_precisiton_set:
|
||
raise Exception("Cannot set precision for a compiled model. " \
|
||
"Please re-compile your model with required precision.")
|
||
|
||
if args.api_type == "":
|
||
args.api_type = "sync" if args.perf_hint == "latency" else "async"
|
||
|
||
if args.api_type == "sync":
|
||
if args.time == 0 and (args.number_infer_requests > args.number_iterations):
|
||
raise Exception("Number of infer requests should be less than or equal to number of iterations in sync mode.")
|
||
|
||
return args, is_network_compiled
|
||
|
||
def main():
|
||
statistics = None
|
||
try:
|
||
# ------------------------------ 1. Parsing and validating input arguments ------------------------------
|
||
next_step()
|
||
logger.info("Parsing input parameters")
|
||
args, is_network_compiled = parse_and_check_command_line()
|
||
|
||
command_line_arguments = get_command_line_arguments(sys.argv)
|
||
if args.report_type:
|
||
_statistics_class = JsonStatisticsReport if args.json_stats else CsvStatisticsReport
|
||
statistics = _statistics_class(StatisticsReport.Config(args.report_type, args.report_folder))
|
||
statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments)
|
||
|
||
def is_flag_set_in_command_line(flag):
|
||
return any(x.strip('-') == flag for x, y in command_line_arguments)
|
||
|
||
device_name = args.target_device
|
||
|
||
devices = parse_devices(device_name)
|
||
device_number_streams = parse_value_per_device(devices, args.number_streams, "nstreams")
|
||
device_infer_precision = parse_value_per_device(devices, args.infer_precision, "infer_precision")
|
||
|
||
config = {}
|
||
if args.load_config:
|
||
load_config(args.load_config, config)
|
||
|
||
if is_network_compiled:
|
||
logger.info("Model is compiled")
|
||
|
||
# ------------------------------ 2. Loading OpenVINO Runtime -------------------------------------------
|
||
next_step(step_id=2)
|
||
|
||
benchmark = Benchmark(args.target_device, args.number_infer_requests,
|
||
args.number_iterations, args.time, args.api_type,
|
||
args.inference_only, args.maximum_inference_rate)
|
||
|
||
if args.extensions:
|
||
benchmark.add_extension(path_to_extensions=args.extensions)
|
||
|
||
## GPU (clDNN) Extensions
|
||
if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config:
|
||
if GPU_DEVICE_NAME not in config.keys():
|
||
config[GPU_DEVICE_NAME] = {}
|
||
config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config
|
||
|
||
if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys():
|
||
cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE']
|
||
benchmark.add_extension(path_to_cldnn_config=cldnn_config)
|
||
|
||
benchmark.print_version_info()
|
||
|
||
# --------------------- 3. Setting device configuration --------------------------------------------------------
|
||
next_step()
|
||
|
||
def set_performance_hint(device):
|
||
perf_hint = properties.hint.PerformanceMode.THROUGHPUT
|
||
supported_properties = benchmark.core.get_property(device, properties.supported_properties())
|
||
if properties.hint.performance_mode() in supported_properties:
|
||
if is_flag_set_in_command_line('hint'):
|
||
if args.perf_hint == "throughput" or args.perf_hint == "tput":
|
||
perf_hint = properties.hint.PerformanceMode.THROUGHPUT
|
||
elif args.perf_hint == "latency":
|
||
perf_hint = properties.hint.PerformanceMode.LATENCY
|
||
elif args.perf_hint == "cumulative_throughput" or args.perf_hint == "ctput":
|
||
perf_hint = properties.hint.PerformanceMode.CUMULATIVE_THROUGHPUT
|
||
elif args.perf_hint=='none':
|
||
# Not set PerformanceMode, and plugin will apply its internal default PerformanceMode
|
||
return
|
||
else:
|
||
raise RuntimeError("Incorrect performance hint. Please set -hint option to"
|
||
"`throughput`(tput), `latency', 'cumulative_throughput'(ctput) value or 'none'.")
|
||
else:
|
||
perf_hint = properties.hint.PerformanceMode.LATENCY if benchmark.api_type == "sync" else properties.hint.PerformanceMode.THROUGHPUT
|
||
logger.warning(f"Performance hint was not explicitly specified in command line. " +
|
||
f"Device({device}) performance hint will be set to {perf_hint}.")
|
||
config[device][properties.hint.performance_mode()] = perf_hint
|
||
else:
|
||
logger.warning(f"Device {device} does not support performance hint property(-hint).")
|
||
|
||
|
||
def get_device_type_from_name(name) :
|
||
new_name = str(name)
|
||
new_name = new_name.split(".", 1)[0]
|
||
new_name = new_name.split("(", 1)[0]
|
||
return new_name
|
||
|
||
## Set default values from dumped config
|
||
default_devices = set()
|
||
for device in devices:
|
||
device_type = get_device_type_from_name(device)
|
||
if device_type in config and device not in config:
|
||
config[device] = config[device_type].copy()
|
||
default_devices.add(device_type)
|
||
|
||
for def_device in default_devices:
|
||
config.pop(def_device)
|
||
|
||
perf_counts = False
|
||
# check if using the virtual device
|
||
hw_devices_list = devices.copy()
|
||
# Remove the hardware devices if AUTO/MULTI/HETERO appears in the devices list.
|
||
is_virtual = is_virtual_device_found(devices)
|
||
if is_virtual:
|
||
devices.clear()
|
||
# Parse out the currect virtual device as the target device.
|
||
virtual_device = device_name.partition(":")[0]
|
||
hw_devices_list.remove(virtual_device)
|
||
devices.append(virtual_device)
|
||
parse_value_for_virtual_device(virtual_device, device_number_streams)
|
||
parse_value_for_virtual_device(virtual_device, device_infer_precision)
|
||
|
||
for device in devices:
|
||
supported_properties = benchmark.core.get_property(device, properties.supported_properties())
|
||
if device not in config.keys():
|
||
config[device] = {}
|
||
|
||
## high-level performance modes
|
||
set_performance_hint(device)
|
||
|
||
if is_flag_set_in_command_line('nireq'):
|
||
config[device][properties.hint.num_requests()] = str(args.number_infer_requests)
|
||
|
||
## Set performance counter
|
||
if is_flag_set_in_command_line('pc'):
|
||
## set to user defined value
|
||
config[device][properties.enable_profiling()] = True if args.perf_counts else False
|
||
elif properties.enable_profiling() in config[device].keys() and config[device][properties.enable_profiling()] == True:
|
||
logger.warning(f"Performance counters for {device} device is turned on. " +
|
||
"To print results use -pc option.")
|
||
elif args.report_type in [ averageCntReport, detailedCntReport ]:
|
||
logger.warning(f"Turn on performance counters for {device} device " +
|
||
f"since report type is {args.report_type}.")
|
||
config[device][properties.enable_profiling()] = True
|
||
elif args.exec_graph_path is not None:
|
||
logger.warning(f"Turn on performance counters for {device} device " +
|
||
"due to execution graph dumping.")
|
||
config[device][properties.enable_profiling()] = True
|
||
elif is_flag_set_in_command_line('pcsort'):
|
||
## set to default value
|
||
logger.warning(f"Turn on performance counters for {device} device " +
|
||
f"since pcsort value is {args.perf_counts_sort}.")
|
||
config[device][properties.enable_profiling()] = True if args.perf_counts_sort else False
|
||
else:
|
||
## set to default value
|
||
config[device][properties.enable_profiling()] = args.perf_counts
|
||
perf_counts = True if config[device][properties.enable_profiling()] == True else perf_counts
|
||
|
||
## insert or append property into hw device properties list
|
||
def update_configs(hw_device, property_name, property_value):
|
||
(key, value) = properties.device.properties({hw_device:{property_name:property_value}})
|
||
# add property into hw device properties list.
|
||
if key not in config[device].keys():
|
||
config[device][key] = value
|
||
else:
|
||
current_config = config[device][key].get()
|
||
if hw_device not in current_config.keys():
|
||
current_config.update(value.get())
|
||
else:
|
||
current_device_config = current_config[hw_device]
|
||
for prop in value.get().items():
|
||
current_device_config.update(prop[1])
|
||
current_config[hw_device].update(current_device_config)
|
||
config[device][key].set(current_config)
|
||
|
||
def update_device_config_for_virtual_device(value, config, key):
|
||
# check if the element contains the hardware device property
|
||
if len(value.split(':')) == 1:
|
||
config[device][key] = device_infer_precision[device]
|
||
else:
|
||
# set device nstreams properties in the AUTO/MULTI plugin
|
||
value_vec = value[value.find('{') + 1:value.rfind('}')].split(',')
|
||
device_properties = {value_vec[i].split(':')[0] : value_vec[i].split(':')[1] for i in range(0, len(value_vec))}
|
||
for hw_device in device_properties.keys():
|
||
update_configs(hw_device, key, device_properties[hw_device])
|
||
|
||
## infer precision
|
||
def set_infer_precision():
|
||
key = properties.hint.inference_precision()
|
||
if device in device_infer_precision.keys():
|
||
## set to user defined value
|
||
if key in supported_properties:
|
||
config[device][key] = device_infer_precision[device]
|
||
elif is_virtual_device(device):
|
||
update_device_config_for_virtual_device(device_infer_precision[device], config, key)
|
||
else:
|
||
raise Exception(f"Device {device} doesn't support config key INFERENCE_PRECISION_HINT!" \
|
||
" Please specify -infer_precision for correct devices in format" \
|
||
" <dev1>:<infer_precision1>,<dev2>:<infer_precision2> or via configuration file.")
|
||
return
|
||
|
||
## the rest are individual per-device settings (overriding the values the device will deduce from perf hint)
|
||
def set_throughput_streams():
|
||
key = get_device_type_from_name(device) + "_THROUGHPUT_STREAMS"
|
||
if device in device_number_streams.keys():
|
||
## set to user defined value
|
||
if key in supported_properties:
|
||
config[device][key] = device_number_streams[device]
|
||
elif properties.streams.num() in supported_properties:
|
||
key = properties.streams.num()
|
||
config[device][key] = device_number_streams[device]
|
||
elif is_virtual_device(device):
|
||
key = properties.streams.num()
|
||
update_device_config_for_virtual_device(device_number_streams[device], config, key)
|
||
else:
|
||
raise Exception(f"Device {device} doesn't support config key '{key}'! " +
|
||
"Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>")
|
||
elif key not in config[device].keys() and args.api_type == "async" and key not in config[device].keys() \
|
||
and 'PERFORMANCE_HINT' in config[device].keys() and config[device]['PERFORMANCE_HINT'] == '':
|
||
## set the _AUTO value for the #streams
|
||
logger.warning(f"-nstreams default value is determined automatically for {device} device. " +
|
||
"Although the automatic selection usually provides a reasonable performance, "
|
||
"but it still may be non-optimal for some cases, for more information look at README.")
|
||
if key in supported_properties:
|
||
config[device][key] = get_device_type_from_name(device) + "_THROUGHPUT_AUTO"
|
||
elif properties.streams.Num() in supported_properties:
|
||
key = properties.streams.Num()
|
||
config[device][key] = "-1" # Set AUTO mode for streams number
|
||
elif is_virtual_device(device):
|
||
# Set nstreams to default value auto if no nstreams specified from cmd line.
|
||
for hw_device in hw_devices_list:
|
||
hw_supported_properties = benchmark.core.get_property(hw_device, properties.supported_properties())
|
||
key = get_device_type_from_name(hw_device) + "_THROUGHPUT_STREAMS"
|
||
value = get_device_type_from_name(hw_device) + "_THROUGHPUT_AUTO"
|
||
if key not in hw_supported_properties:
|
||
key = properties.streams.Num()
|
||
value = properties.streams.Num.AUTO
|
||
if key in hw_supported_properties:
|
||
update_configs(hw_device, key, value)
|
||
if key in config[device].keys():
|
||
device_number_streams[device] = config[device][key]
|
||
return
|
||
|
||
def set_nthreads_pin(property_name, property_value):
|
||
if property_name in supported_properties or device_name == AUTO_DEVICE_NAME:
|
||
# create nthreads/pin primary property for HW device or AUTO if -d is AUTO directly.
|
||
config[device][property_name] = property_value
|
||
elif is_virtual:
|
||
# Create secondary property of -nthreads/-pin only for CPU if CPU device appears in the devices
|
||
# list specified by -d.
|
||
if CPU_DEVICE_NAME in hw_devices_list:
|
||
update_configs(CPU_DEVICE_NAME, property_name, property_value)
|
||
return
|
||
|
||
if args.number_threads and is_flag_set_in_command_line("nthreads"):
|
||
# limit threading for CPU portion of inference
|
||
set_nthreads_pin(properties.inference_num_threads(), str(args.number_threads))
|
||
|
||
if is_flag_set_in_command_line('pin'):
|
||
## set for CPU to user defined value
|
||
set_nthreads_pin(properties.hint.enable_cpu_pinning(), args.infer_threads_pinning)
|
||
|
||
set_throughput_streams()
|
||
set_infer_precision()
|
||
|
||
if is_virtual_device(device):
|
||
if device in device_number_streams.keys():
|
||
del device_number_streams[device]
|
||
|
||
device_config = {}
|
||
# In case of multiple devices found prefer the one given in CLI argument
|
||
if benchmark.device.find(device_name) == 0 and device_name in config.keys():
|
||
device_config = config[device_name]
|
||
else:
|
||
for device in config:
|
||
if benchmark.device.find(device) == 0:
|
||
device_config = config[device]
|
||
if args.cache_dir:
|
||
benchmark.set_cache_dir(args.cache_dir)
|
||
|
||
## If set batch size, disable the auto batching
|
||
if args.batch_size:
|
||
logger.warning("Batch size is set. Auto batching will be disabled")
|
||
device_config["ALLOW_AUTO_BATCHING"] = False
|
||
|
||
topology_name = ""
|
||
load_from_file_enabled = is_flag_set_in_command_line('load_from_file') or is_flag_set_in_command_line('lfile')
|
||
if load_from_file_enabled and not is_network_compiled:
|
||
if args.mean_values or args.scale_values:
|
||
raise RuntimeError("--mean_values and --scale_values aren't supported with --load_from_file. "
|
||
"The values can be set via model_optimizer while generating xml")
|
||
next_step()
|
||
print("Skipping the step for loading model from file")
|
||
next_step()
|
||
print("Skipping the step for loading model from file")
|
||
next_step()
|
||
print("Skipping the step for loading model from file")
|
||
|
||
# --------------------- 7. Loading the model to the device -------------------------------------------------
|
||
next_step()
|
||
|
||
start_mem_usage = get_peak_memory_usage()
|
||
start_time = datetime.utcnow()
|
||
|
||
compiled_model = benchmark.core.compile_model(args.path_to_model, benchmark.device, device_config)
|
||
|
||
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
|
||
end_mem_usage = get_peak_memory_usage()
|
||
logger.info(f"Compile model took {duration_ms} ms")
|
||
log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('compile model time (ms)', duration_ms)
|
||
])
|
||
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, compiled_model.inputs)
|
||
batch_size = get_network_batch_size(app_inputs_info)
|
||
elif not is_network_compiled:
|
||
# --------------------- 4. Read the Intermediate Representation of the network -----------------------------
|
||
next_step()
|
||
|
||
logger.info("Loading model files")
|
||
|
||
start_time = datetime.utcnow()
|
||
model = benchmark.read_model(args.path_to_model)
|
||
topology_name = model.get_name()
|
||
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
|
||
logger.info(f"Read model took {duration_ms} ms")
|
||
logger.info("Original model I/O parameters:")
|
||
print_inputs_and_outputs_info(model)
|
||
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('read model time (ms)', duration_ms)
|
||
])
|
||
|
||
# --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
|
||
next_step()
|
||
|
||
for port in model.inputs:
|
||
if not port.get_names():
|
||
port.set_names({port.node.get_friendly_name()})
|
||
|
||
app_inputs_info, reshape = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, model.inputs)
|
||
|
||
# use batch size according to provided layout and shapes
|
||
batch_size = get_network_batch_size(app_inputs_info)
|
||
logger.info(f'Model batch size: {batch_size}')
|
||
|
||
if reshape:
|
||
start_time = datetime.utcnow()
|
||
shapes = { info.name : info.partial_shape for info in app_inputs_info }
|
||
logger.info(
|
||
'Reshaping model: {}'.format(', '.join("'{}': {}".format(k, str(v)) for k, v in shapes.items())))
|
||
model.reshape(shapes)
|
||
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
|
||
logger.info(f"Reshape model took {duration_ms} ms")
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('reshape model time (ms)', duration_ms)
|
||
])
|
||
|
||
# --------------------- 6. Configuring inputs and outputs of the model --------------------------------------------------
|
||
next_step()
|
||
|
||
pre_post_processing(model, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision)
|
||
print_inputs_and_outputs_info(model)
|
||
|
||
# --------------------- 7. Loading the model to the device -------------------------------------------------
|
||
next_step()
|
||
start_mem_usage = get_peak_memory_usage()
|
||
start_time = datetime.utcnow()
|
||
|
||
compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config)
|
||
|
||
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
|
||
end_mem_usage = get_peak_memory_usage()
|
||
logger.info(f"Compile model took {duration_ms} ms")
|
||
log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('compile model time (ms)', duration_ms)
|
||
])
|
||
else:
|
||
if args.mean_values or args.scale_values:
|
||
raise RuntimeError("--mean_values and --scale_values aren't supported for compiled model. "
|
||
"The values can be set via model_optimizer while generating xml")
|
||
next_step()
|
||
print("Skipping the step for compiled model")
|
||
next_step()
|
||
print("Skipping the step for compiled model")
|
||
next_step()
|
||
print("Skipping the step for compiled model")
|
||
|
||
# --------------------- 7. Loading the model to the device -------------------------------------------------
|
||
next_step()
|
||
|
||
start_mem_usage = get_peak_memory_usage()
|
||
start_time = datetime.utcnow()
|
||
|
||
compiled_model = benchmark.core.import_model(args.path_to_model, benchmark.device, device_config)
|
||
|
||
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
|
||
end_mem_usage = get_peak_memory_usage()
|
||
logger.info(f"Import model took {duration_ms} ms")
|
||
log_memory_usage(logger, start_mem_usage, end_mem_usage, "import")
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('import model time (ms)', duration_ms)
|
||
])
|
||
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, compiled_model.inputs)
|
||
batch_size = get_network_batch_size(app_inputs_info)
|
||
|
||
# --------------------- 8. Querying optimal runtime parameters --------------------------------------------------
|
||
next_step()
|
||
|
||
## actual device-deduced settings
|
||
keys = compiled_model.get_property(properties.supported_properties())
|
||
logger.info("Model:")
|
||
for k in keys:
|
||
skip_keys = (properties.supported_properties())
|
||
if k not in skip_keys:
|
||
value = compiled_model.get_property(k)
|
||
if k == properties.device.properties():
|
||
for device_key in value.keys():
|
||
logger.info(f' {device_key}:')
|
||
for k2, value2 in value.get(device_key).items():
|
||
if k2 not in skip_keys:
|
||
logger.info(f' {k2}: {value2}')
|
||
else:
|
||
logger.info(f' {k}: {value}')
|
||
|
||
# Update number of streams
|
||
for device in device_number_streams.keys():
|
||
try:
|
||
key = get_device_type_from_name(device) + '_THROUGHPUT_STREAMS'
|
||
device_number_streams[device] = compiled_model.get_property(key)
|
||
except:
|
||
key = 'NUM_STREAMS'
|
||
device_number_streams[device] = compiled_model.get_property(key)
|
||
|
||
# ------------------------------------ 9. Creating infer requests and preparing input data ----------------------
|
||
next_step()
|
||
|
||
# Create infer requests
|
||
requests = benchmark.create_infer_requests(compiled_model)
|
||
|
||
# Prepare input data
|
||
paths_to_input = list()
|
||
if args.paths_to_input:
|
||
for path in args.paths_to_input:
|
||
if ":" in next(iter(path), ""):
|
||
paths_to_input.extend(path)
|
||
else:
|
||
paths_to_input.append(os.path.abspath(*path))
|
||
|
||
data_queue = get_input_data(paths_to_input, app_inputs_info)
|
||
|
||
static_mode = check_for_static(app_inputs_info)
|
||
allow_inference_only_or_sync = can_measure_as_static(app_inputs_info)
|
||
if not allow_inference_only_or_sync and benchmark.api_type == 'sync':
|
||
raise Exception("Benchmarking of the model with dynamic shapes is available for async API only. "
|
||
"Please use -api async -hint latency -nireq 1 to emulate sync behavior.")
|
||
|
||
if benchmark.inference_only == None:
|
||
if static_mode:
|
||
benchmark.inference_only = True
|
||
else:
|
||
benchmark.inference_only = False
|
||
elif benchmark.inference_only and not allow_inference_only_or_sync:
|
||
raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")
|
||
|
||
# update batch size in case dynamic network with one data_shape
|
||
if allow_inference_only_or_sync and batch_size.is_dynamic:
|
||
batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
|
||
|
||
benchmark.latency_groups = get_latency_groups(app_inputs_info)
|
||
|
||
if len(benchmark.latency_groups) > 1:
|
||
logger.info(f"Defined {len(benchmark.latency_groups)} tensor groups:")
|
||
for group in benchmark.latency_groups:
|
||
logger.info(f"\t{str(group)}")
|
||
|
||
# Iteration limit
|
||
benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, max(len(info.shapes) for info in app_inputs_info), benchmark.api_type)
|
||
|
||
# Set input tensors before first inference
|
||
for request in requests:
|
||
data_tensors = data_queue.get_next_input()
|
||
for port, data_tensor in data_tensors.items():
|
||
input_tensor = request.get_input_tensor(port)
|
||
if not static_mode:
|
||
input_tensor.shape = data_tensor.shape
|
||
if not len(input_tensor.shape):
|
||
input_tensor.data.flat[:] = data_tensor.data
|
||
else:
|
||
input_tensor.data[:] = data_tensor.data
|
||
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
|
||
[
|
||
('topology', topology_name),
|
||
('target device', device_name),
|
||
('API', args.api_type),
|
||
('inference_only', benchmark.inference_only),
|
||
('precision', "UNSPECIFIED"),
|
||
('batch size', str(batch_size)),
|
||
('number of iterations', str(benchmark.niter)),
|
||
('number of parallel infer requests', str(benchmark.nireq)),
|
||
('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))),
|
||
])
|
||
|
||
for nstreams in device_number_streams.items():
|
||
statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
|
||
[
|
||
(f"number of {nstreams[0]} streams", str(nstreams[1])),
|
||
])
|
||
|
||
# ------------------------------------ 10. Measuring performance -----------------------------------------------
|
||
|
||
output_string = process_help_inference_string(benchmark, device_number_streams)
|
||
|
||
next_step(additional_info=output_string)
|
||
|
||
if benchmark.inference_only:
|
||
logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).")
|
||
else:
|
||
logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).")
|
||
if not args.no_warmup:
|
||
duration_ms = f"{benchmark.first_infer(requests):.2f}"
|
||
logger.info(f"First inference took {duration_ms} ms")
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('first inference time (ms)', duration_ms)
|
||
])
|
||
else:
|
||
logger.info("Skipping warmup inference due to -no_warmup flag")
|
||
|
||
|
||
pcseq = args.pcseq
|
||
if static_mode or len(benchmark.latency_groups) == 1:
|
||
pcseq = False
|
||
|
||
fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration = benchmark.main_loop(requests, data_queue, batch_size, args.latency_percentile, pcseq)
|
||
|
||
# ------------------------------------ 11. Dumping statistics report -------------------------------------------
|
||
next_step()
|
||
|
||
if args.dump_config:
|
||
dump_config(args.dump_config, config)
|
||
logger.info(f"OpenVINO configuration settings were dumped to {args.dump_config}")
|
||
|
||
if args.exec_graph_path:
|
||
dump_exec_graph(compiled_model, args.exec_graph_path)
|
||
|
||
if perf_counts:
|
||
perfs_count_list = []
|
||
for request in requests:
|
||
perfs_count_list.append(request.profiling_info)
|
||
|
||
if args.perf_counts_sort:
|
||
total_sorted_list = print_perf_counters_sort(perfs_count_list,sort_flag=args.perf_counts_sort)
|
||
if statistics:
|
||
statistics.dump_performance_counters_sorted(total_sorted_list)
|
||
|
||
elif args.perf_counts:
|
||
print_perf_counters(perfs_count_list)
|
||
|
||
if statistics:
|
||
# if not args.perf_counts_sort:
|
||
statistics.dump_performance_counters(perfs_count_list)
|
||
|
||
if statistics:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('total execution time (ms)', f'{get_duration_in_milliseconds(total_duration_sec):.2f}'),
|
||
('total number of iterations', str(iteration)),
|
||
])
|
||
if MULTI_DEVICE_NAME not in device_name:
|
||
latency_prefix = None
|
||
if args.latency_percentile == 50:
|
||
latency_prefix = 'latency (ms)'
|
||
elif args.latency_percentile != 50:
|
||
latency_prefix = 'latency (' + str(args.latency_percentile) + ' percentile) (ms)'
|
||
if latency_prefix:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
(latency_prefix, f'{median_latency_ms:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("avg latency", f'{avg_latency_ms:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("min latency", f'{min_latency_ms:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("max latency", f'{max_latency_ms:.2f}'),
|
||
])
|
||
if pcseq:
|
||
for group in benchmark.latency_groups:
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("group", str(group)),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("avg latency", f'{group.avg:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("min latency", f'{group.min:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
("max latency", f'{group.max:.2f}'),
|
||
])
|
||
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[
|
||
('throughput', f'{fps:.2f}'),
|
||
])
|
||
statistics.dump()
|
||
|
||
try:
|
||
exeDevice = compiled_model.get_property("EXECUTION_DEVICES")
|
||
logger.info(f'Execution Devices:{exeDevice}')
|
||
except:
|
||
pass
|
||
logger.info(f'Count: {iteration} iterations')
|
||
logger.info(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms')
|
||
if MULTI_DEVICE_NAME not in device_name:
|
||
logger.info('Latency:')
|
||
if args.latency_percentile == 50:
|
||
logger.info(f' Median: {median_latency_ms:.2f} ms')
|
||
elif args.latency_percentile != 50:
|
||
logger.info(f' {args.latency_percentile} percentile: {median_latency_ms:.2f} ms')
|
||
logger.info(f' Average: {avg_latency_ms:.2f} ms')
|
||
logger.info(f' Min: {min_latency_ms:.2f} ms')
|
||
logger.info(f' Max: {max_latency_ms:.2f} ms')
|
||
|
||
if pcseq:
|
||
logger.info("Latency for each data shape group:")
|
||
for idx,group in enumerate(benchmark.latency_groups):
|
||
logger.info(f"{idx+1}.{str(group)}")
|
||
if args.latency_percentile == 50:
|
||
logger.info(f' Median: {group.median:.2f} ms')
|
||
elif args.latency_percentile != 50:
|
||
logger.info(f' {args.latency_percentile} percentile: {group.median:.2f} ms')
|
||
logger.info(f' Average: {group.avg:.2f} ms')
|
||
logger.info(f' Min: {group.min:.2f} ms')
|
||
logger.info(f' Max: {group.max:.2f} ms')
|
||
|
||
logger.info(f'Throughput: {fps:.2f} FPS')
|
||
|
||
del compiled_model
|
||
|
||
next_step.step_id = 0
|
||
except Exception as e:
|
||
logger.exception(e)
|
||
|
||
if statistics:
|
||
statistics.add_parameters(
|
||
StatisticsReport.Category.EXECUTION_RESULTS,
|
||
[('error', str(e))]
|
||
)
|
||
statistics.dump()
|
||
sys.exit(1)
|