# Copyright (C) 2018-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os import sys import platform from datetime import datetime from openvino import Dimension, properties from openvino.tools.benchmark.benchmark import Benchmark from openvino.tools.benchmark.parameters import parse_args from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, \ CPU_DEVICE_NAME, GPU_DEVICE_NAME, \ BLOB_EXTENSION, AUTO_DEVICE_NAME from openvino.tools.benchmark.utils.inputs_filling import get_input_data from openvino.tools.benchmark.utils.logging import logger from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \ process_help_inference_string, print_perf_counters, print_perf_counters_sort, dump_exec_graph, get_duration_in_milliseconds, \ get_command_line_arguments, parse_value_per_device, parse_devices, get_inputs_info, \ print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \ check_for_static, can_measure_as_static, parse_value_for_virtual_device, is_virtual_device, is_virtual_device_found from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, JsonStatisticsReport, CsvStatisticsReport, \ averageCntReport, detailedCntReport def get_peak_memory_usage(): if platform.system() == "Linux": with open("/proc/self/status", "r") as f: for line in f: if line.startswith("VmPeak:"): return int(line.split()[1]) # The value in KB raise RuntimeError("VmPeak attribute not found. Unable to determine peak memory usage.") # No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure # No Macos support due to no /proc/self/status file return None def log_memory_usage(logger, start_mem_usage, end_mem_usage, action_name): if start_mem_usage is None or end_mem_usage is None: return capitalized_action_name = action_name.capitalize() action_name = "compilation" if action_name == "compile" else action_name logger.info(f"Start of {action_name} memory usage: Peak {start_mem_usage} KB") logger.info(f"End of {action_name} memory usage: Peak {end_mem_usage} KB") logger.info(f"{capitalized_action_name} model ram used {end_mem_usage - start_mem_usage} KB") def parse_and_check_command_line(): def arg_not_empty(arg_value,empty_value): return not arg_value is None and not arg_value == empty_value parser = parse_args() args = parser.parse_args() if args.latency_percentile < 1 or args.latency_percentile > 100: parser.print_help() raise RuntimeError("The percentile value is incorrect. The applicable values range is [1, 100].") if not args.perf_hint == "none" and (arg_not_empty(args.number_streams, "") or arg_not_empty(args.number_threads, 0) or arg_not_empty(args.infer_threads_pinning, "")): raise Exception("-nstreams, -nthreads and -pin options are fine tune options. To use them you " \ "should explicitely set -hint option to none. This is not OpenVINO limitation " \ "(those options can be used in OpenVINO together), but a benchmark_app UI rule.") if args.report_type == "average_counters" and MULTI_DEVICE_NAME in args.target_device: raise Exception("only detailed_counters report type is supported for MULTI device") _, ext = os.path.splitext(args.path_to_model) is_network_compiled = True if ext == BLOB_EXTENSION else False is_precisiton_set = not (args.input_precision == "" and args.output_precision == "" and args.input_output_precision == "") if is_network_compiled and is_precisiton_set: raise Exception("Cannot set precision for a compiled model. " \ "Please re-compile your model with required precision.") if args.api_type == "": args.api_type = "sync" if args.perf_hint == "latency" else "async" if args.api_type == "sync": if args.time == 0 and (args.number_infer_requests > args.number_iterations): raise Exception("Number of infer requests should be less than or equal to number of iterations in sync mode.") return args, is_network_compiled def main(): statistics = None try: # ------------------------------ 1. Parsing and validating input arguments ------------------------------ next_step() logger.info("Parsing input parameters") args, is_network_compiled = parse_and_check_command_line() command_line_arguments = get_command_line_arguments(sys.argv) if args.report_type: _statistics_class = JsonStatisticsReport if args.json_stats else CsvStatisticsReport statistics = _statistics_class(StatisticsReport.Config(args.report_type, args.report_folder)) statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments) def is_flag_set_in_command_line(flag): return any(x.strip('-') == flag for x, y in command_line_arguments) device_name = args.target_device devices = parse_devices(device_name) device_number_streams = parse_value_per_device(devices, args.number_streams, "nstreams") device_infer_precision = parse_value_per_device(devices, args.infer_precision, "infer_precision") config = {} if args.load_config: load_config(args.load_config, config) if is_network_compiled: logger.info("Model is compiled") # ------------------------------ 2. Loading OpenVINO Runtime ------------------------------------------- next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type, args.inference_only, args.maximum_inference_rate) if args.extensions: benchmark.add_extension(path_to_extensions=args.extensions) ## GPU (clDNN) Extensions if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config: if GPU_DEVICE_NAME not in config.keys(): config[GPU_DEVICE_NAME] = {} config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys(): cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE'] benchmark.add_extension(path_to_cldnn_config=cldnn_config) benchmark.print_version_info() # --------------------- 3. Setting device configuration -------------------------------------------------------- next_step() def set_performance_hint(device): perf_hint = properties.hint.PerformanceMode.THROUGHPUT supported_properties = benchmark.core.get_property(device, properties.supported_properties()) if properties.hint.performance_mode() in supported_properties: if is_flag_set_in_command_line('hint'): if args.perf_hint == "throughput" or args.perf_hint == "tput": perf_hint = properties.hint.PerformanceMode.THROUGHPUT elif args.perf_hint == "latency": perf_hint = properties.hint.PerformanceMode.LATENCY elif args.perf_hint == "cumulative_throughput" or args.perf_hint == "ctput": perf_hint = properties.hint.PerformanceMode.CUMULATIVE_THROUGHPUT elif args.perf_hint=='none': # Not set PerformanceMode, and plugin will apply its internal default PerformanceMode return else: raise RuntimeError("Incorrect performance hint. Please set -hint option to" "`throughput`(tput), `latency', 'cumulative_throughput'(ctput) value or 'none'.") else: perf_hint = properties.hint.PerformanceMode.LATENCY if benchmark.api_type == "sync" else properties.hint.PerformanceMode.THROUGHPUT logger.warning(f"Performance hint was not explicitly specified in command line. " + f"Device({device}) performance hint will be set to {perf_hint}.") config[device][properties.hint.performance_mode()] = perf_hint else: logger.warning(f"Device {device} does not support performance hint property(-hint).") def get_device_type_from_name(name) : new_name = str(name) new_name = new_name.split(".", 1)[0] new_name = new_name.split("(", 1)[0] return new_name ## Set default values from dumped config default_devices = set() for device in devices: device_type = get_device_type_from_name(device) if device_type in config and device not in config: config[device] = config[device_type].copy() default_devices.add(device_type) for def_device in default_devices: config.pop(def_device) perf_counts = False # check if using the virtual device hw_devices_list = devices.copy() # Remove the hardware devices if AUTO/MULTI/HETERO appears in the devices list. is_virtual = is_virtual_device_found(devices) if is_virtual: devices.clear() # Parse out the currect virtual device as the target device. virtual_device = device_name.partition(":")[0] hw_devices_list.remove(virtual_device) devices.append(virtual_device) parse_value_for_virtual_device(virtual_device, device_number_streams) parse_value_for_virtual_device(virtual_device, device_infer_precision) for device in devices: supported_properties = benchmark.core.get_property(device, properties.supported_properties()) if device not in config.keys(): config[device] = {} ## high-level performance modes set_performance_hint(device) if is_flag_set_in_command_line('nireq'): config[device][properties.hint.num_requests()] = str(args.number_infer_requests) ## Set performance counter if is_flag_set_in_command_line('pc'): ## set to user defined value config[device][properties.enable_profiling()] = True if args.perf_counts else False elif properties.enable_profiling() in config[device].keys() and config[device][properties.enable_profiling()] == True: logger.warning(f"Performance counters for {device} device is turned on. " + "To print results use -pc option.") elif args.report_type in [ averageCntReport, detailedCntReport ]: logger.warning(f"Turn on performance counters for {device} device " + f"since report type is {args.report_type}.") config[device][properties.enable_profiling()] = True elif args.exec_graph_path is not None: logger.warning(f"Turn on performance counters for {device} device " + "due to execution graph dumping.") config[device][properties.enable_profiling()] = True elif is_flag_set_in_command_line('pcsort'): ## set to default value logger.warning(f"Turn on performance counters for {device} device " + f"since pcsort value is {args.perf_counts_sort}.") config[device][properties.enable_profiling()] = True if args.perf_counts_sort else False else: ## set to default value config[device][properties.enable_profiling()] = args.perf_counts perf_counts = True if config[device][properties.enable_profiling()] == True else perf_counts ## insert or append property into hw device properties list def update_configs(hw_device, property_name, property_value): (key, value) = properties.device.properties({hw_device:{property_name:property_value}}) # add property into hw device properties list. if key not in config[device].keys(): config[device][key] = value else: current_config = config[device][key].get() if hw_device not in current_config.keys(): current_config.update(value.get()) else: current_device_config = current_config[hw_device] for prop in value.get().items(): current_device_config.update(prop[1]) current_config[hw_device].update(current_device_config) config[device][key].set(current_config) def update_device_config_for_virtual_device(value, config, key): # check if the element contains the hardware device property if len(value.split(':')) == 1: config[device][key] = device_infer_precision[device] else: # set device nstreams properties in the AUTO/MULTI plugin value_vec = value[value.find('{') + 1:value.rfind('}')].split(',') device_properties = {value_vec[i].split(':')[0] : value_vec[i].split(':')[1] for i in range(0, len(value_vec))} for hw_device in device_properties.keys(): update_configs(hw_device, key, device_properties[hw_device]) ## infer precision def set_infer_precision(): key = properties.hint.inference_precision() if device in device_infer_precision.keys(): ## set to user defined value if key in supported_properties: config[device][key] = device_infer_precision[device] elif is_virtual_device(device): update_device_config_for_virtual_device(device_infer_precision[device], config, key) else: raise Exception(f"Device {device} doesn't support config key INFERENCE_PRECISION_HINT!" \ " Please specify -infer_precision for correct devices in format" \ " :,: or via configuration file.") return ## the rest are individual per-device settings (overriding the values the device will deduce from perf hint) def set_throughput_streams(): key = get_device_type_from_name(device) + "_THROUGHPUT_STREAMS" if device in device_number_streams.keys(): ## set to user defined value if key in supported_properties: config[device][key] = device_number_streams[device] elif properties.streams.num() in supported_properties: key = properties.streams.num() config[device][key] = device_number_streams[device] elif is_virtual_device(device): key = properties.streams.num() update_device_config_for_virtual_device(device_number_streams[device], config, key) else: raise Exception(f"Device {device} doesn't support config key '{key}'! " + "Please specify -nstreams for correct devices in format :,:") elif key not in config[device].keys() and args.api_type == "async" and key not in config[device].keys() \ and 'PERFORMANCE_HINT' in config[device].keys() and config[device]['PERFORMANCE_HINT'] == '': ## set the _AUTO value for the #streams logger.warning(f"-nstreams default value is determined automatically for {device} device. " + "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README.") if key in supported_properties: config[device][key] = get_device_type_from_name(device) + "_THROUGHPUT_AUTO" elif properties.streams.Num() in supported_properties: key = properties.streams.Num() config[device][key] = "-1" # Set AUTO mode for streams number elif is_virtual_device(device): # Set nstreams to default value auto if no nstreams specified from cmd line. for hw_device in hw_devices_list: hw_supported_properties = benchmark.core.get_property(hw_device, properties.supported_properties()) key = get_device_type_from_name(hw_device) + "_THROUGHPUT_STREAMS" value = get_device_type_from_name(hw_device) + "_THROUGHPUT_AUTO" if key not in hw_supported_properties: key = properties.streams.Num() value = properties.streams.Num.AUTO if key in hw_supported_properties: update_configs(hw_device, key, value) if key in config[device].keys(): device_number_streams[device] = config[device][key] return def set_nthreads_pin(property_name, property_value): if property_name in supported_properties or device_name == AUTO_DEVICE_NAME: # create nthreads/pin primary property for HW device or AUTO if -d is AUTO directly. config[device][property_name] = property_value elif is_virtual: # Create secondary property of -nthreads/-pin only for CPU if CPU device appears in the devices # list specified by -d. if CPU_DEVICE_NAME in hw_devices_list: update_configs(CPU_DEVICE_NAME, property_name, property_value) return if args.number_threads and is_flag_set_in_command_line("nthreads"): # limit threading for CPU portion of inference set_nthreads_pin(properties.inference_num_threads(), str(args.number_threads)) if is_flag_set_in_command_line('pin'): ## set for CPU to user defined value set_nthreads_pin(properties.hint.enable_cpu_pinning(), args.infer_threads_pinning) set_throughput_streams() set_infer_precision() if is_virtual_device(device): if device in device_number_streams.keys(): del device_number_streams[device] device_config = {} # In case of multiple devices found prefer the one given in CLI argument if benchmark.device.find(device_name) == 0 and device_name in config.keys(): device_config = config[device_name] else: for device in config: if benchmark.device.find(device) == 0: device_config = config[device] if args.cache_dir: benchmark.set_cache_dir(args.cache_dir) ## If set batch size, disable the auto batching if args.batch_size: logger.warning("Batch size is set. Auto batching will be disabled") device_config["ALLOW_AUTO_BATCHING"] = False topology_name = "" load_from_file_enabled = is_flag_set_in_command_line('load_from_file') or is_flag_set_in_command_line('lfile') if load_from_file_enabled and not is_network_compiled: if args.mean_values or args.scale_values: raise RuntimeError("--mean_values and --scale_values aren't supported with --load_from_file. " "The values can be set via model_optimizer while generating xml") next_step() print("Skipping the step for loading model from file") next_step() print("Skipping the step for loading model from file") next_step() print("Skipping the step for loading model from file") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_mem_usage = get_peak_memory_usage() start_time = datetime.utcnow() compiled_model = benchmark.core.compile_model(args.path_to_model, benchmark.device, device_config) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" end_mem_usage = get_peak_memory_usage() logger.info(f"Compile model took {duration_ms} ms") log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('compile model time (ms)', duration_ms) ]) app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, compiled_model.inputs) batch_size = get_network_batch_size(app_inputs_info) elif not is_network_compiled: # --------------------- 4. Read the Intermediate Representation of the network ----------------------------- next_step() logger.info("Loading model files") start_time = datetime.utcnow() model = benchmark.read_model(args.path_to_model) topology_name = model.get_name() duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Read model took {duration_ms} ms") logger.info("Original model I/O parameters:") print_inputs_and_outputs_info(model) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('read model time (ms)', duration_ms) ]) # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() for port in model.inputs: if not port.get_names(): port.set_names({port.node.get_friendly_name()}) app_inputs_info, reshape = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, model.inputs) # use batch size according to provided layout and shapes batch_size = get_network_batch_size(app_inputs_info) logger.info(f'Model batch size: {batch_size}') if reshape: start_time = datetime.utcnow() shapes = { info.name : info.partial_shape for info in app_inputs_info } logger.info( 'Reshaping model: {}'.format(', '.join("'{}': {}".format(k, str(v)) for k, v in shapes.items()))) model.reshape(shapes) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Reshape model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('reshape model time (ms)', duration_ms) ]) # --------------------- 6. Configuring inputs and outputs of the model -------------------------------------------------- next_step() pre_post_processing(model, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision) print_inputs_and_outputs_info(model) # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_mem_usage = get_peak_memory_usage() start_time = datetime.utcnow() compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" end_mem_usage = get_peak_memory_usage() logger.info(f"Compile model took {duration_ms} ms") log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('compile model time (ms)', duration_ms) ]) else: if args.mean_values or args.scale_values: raise RuntimeError("--mean_values and --scale_values aren't supported for compiled model. " "The values can be set via model_optimizer while generating xml") next_step() print("Skipping the step for compiled model") next_step() print("Skipping the step for compiled model") next_step() print("Skipping the step for compiled model") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_mem_usage = get_peak_memory_usage() start_time = datetime.utcnow() compiled_model = benchmark.core.import_model(args.path_to_model, benchmark.device, device_config) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" end_mem_usage = get_peak_memory_usage() logger.info(f"Import model took {duration_ms} ms") log_memory_usage(logger, start_mem_usage, end_mem_usage, "import") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('import model time (ms)', duration_ms) ]) app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.scale_values, args.mean_values, compiled_model.inputs) batch_size = get_network_batch_size(app_inputs_info) # --------------------- 8. Querying optimal runtime parameters -------------------------------------------------- next_step() ## actual device-deduced settings keys = compiled_model.get_property(properties.supported_properties()) logger.info("Model:") for k in keys: skip_keys = (properties.supported_properties()) if k not in skip_keys: value = compiled_model.get_property(k) if k == properties.device.properties(): for device_key in value.keys(): logger.info(f' {device_key}:') for k2, value2 in value.get(device_key).items(): if k2 not in skip_keys: logger.info(f' {k2}: {value2}') else: logger.info(f' {k}: {value}') # Update number of streams for device in device_number_streams.keys(): try: key = get_device_type_from_name(device) + '_THROUGHPUT_STREAMS' device_number_streams[device] = compiled_model.get_property(key) except: key = 'NUM_STREAMS' device_number_streams[device] = compiled_model.get_property(key) # ------------------------------------ 9. Creating infer requests and preparing input data ---------------------- next_step() # Create infer requests requests = benchmark.create_infer_requests(compiled_model) # Prepare input data paths_to_input = list() if args.paths_to_input: for path in args.paths_to_input: if ":" in next(iter(path), ""): paths_to_input.extend(path) else: paths_to_input.append(os.path.abspath(*path)) data_queue = get_input_data(paths_to_input, app_inputs_info) static_mode = check_for_static(app_inputs_info) allow_inference_only_or_sync = can_measure_as_static(app_inputs_info) if not allow_inference_only_or_sync and benchmark.api_type == 'sync': raise Exception("Benchmarking of the model with dynamic shapes is available for async API only. " "Please use -api async -hint latency -nireq 1 to emulate sync behavior.") if benchmark.inference_only == None: if static_mode: benchmark.inference_only = True else: benchmark.inference_only = False elif benchmark.inference_only and not allow_inference_only_or_sync: raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!") # update batch size in case dynamic network with one data_shape if allow_inference_only_or_sync and batch_size.is_dynamic: batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id]) benchmark.latency_groups = get_latency_groups(app_inputs_info) if len(benchmark.latency_groups) > 1: logger.info(f"Defined {len(benchmark.latency_groups)} tensor groups:") for group in benchmark.latency_groups: logger.info(f"\t{str(group)}") # Iteration limit benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, max(len(info.shapes) for info in app_inputs_info), benchmark.api_type) # Set input tensors before first inference for request in requests: data_tensors = data_queue.get_next_input() for port, data_tensor in data_tensors.items(): input_tensor = request.get_input_tensor(port) if not static_mode: input_tensor.shape = data_tensor.shape if not len(input_tensor.shape): input_tensor.data.flat[:] = data_tensor.data else: input_tensor.data[:] = data_tensor.data if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ('topology', topology_name), ('target device', device_name), ('API', args.api_type), ('inference_only', benchmark.inference_only), ('precision', "UNSPECIFIED"), ('batch size', str(batch_size)), ('number of iterations', str(benchmark.niter)), ('number of parallel infer requests', str(benchmark.nireq)), ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))), ]) for nstreams in device_number_streams.items(): statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ (f"number of {nstreams[0]} streams", str(nstreams[1])), ]) # ------------------------------------ 10. Measuring performance ----------------------------------------------- output_string = process_help_inference_string(benchmark, device_number_streams) next_step(additional_info=output_string) if benchmark.inference_only: logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).") else: logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).") if not args.no_warmup: duration_ms = f"{benchmark.first_infer(requests):.2f}" logger.info(f"First inference took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('first inference time (ms)', duration_ms) ]) else: logger.info("Skipping warmup inference due to -no_warmup flag") pcseq = args.pcseq if static_mode or len(benchmark.latency_groups) == 1: pcseq = False fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration = benchmark.main_loop(requests, data_queue, batch_size, args.latency_percentile, pcseq) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() if args.dump_config: dump_config(args.dump_config, config) logger.info(f"OpenVINO configuration settings were dumped to {args.dump_config}") if args.exec_graph_path: dump_exec_graph(compiled_model, args.exec_graph_path) if perf_counts: perfs_count_list = [] for request in requests: perfs_count_list.append(request.profiling_info) if args.perf_counts_sort: total_sorted_list = print_perf_counters_sort(perfs_count_list,sort_flag=args.perf_counts_sort) if statistics: statistics.dump_performance_counters_sorted(total_sorted_list) elif args.perf_counts: print_perf_counters(perfs_count_list) if statistics: # if not args.perf_counts_sort: statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('total execution time (ms)', f'{get_duration_in_milliseconds(total_duration_sec):.2f}'), ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: latency_prefix = None if args.latency_percentile == 50: latency_prefix = 'latency (ms)' elif args.latency_percentile != 50: latency_prefix = 'latency (' + str(args.latency_percentile) + ' percentile) (ms)' if latency_prefix: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ (latency_prefix, f'{median_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("avg latency", f'{avg_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("min latency", f'{min_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("max latency", f'{max_latency_ms:.2f}'), ]) if pcseq: for group in benchmark.latency_groups: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("group", str(group)), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("avg latency", f'{group.avg:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("min latency", f'{group.min:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("max latency", f'{group.max:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', f'{fps:.2f}'), ]) statistics.dump() try: exeDevice = compiled_model.get_property("EXECUTION_DEVICES") logger.info(f'Execution Devices:{exeDevice}') except: pass logger.info(f'Count: {iteration} iterations') logger.info(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms') if MULTI_DEVICE_NAME not in device_name: logger.info('Latency:') if args.latency_percentile == 50: logger.info(f' Median: {median_latency_ms:.2f} ms') elif args.latency_percentile != 50: logger.info(f' {args.latency_percentile} percentile: {median_latency_ms:.2f} ms') logger.info(f' Average: {avg_latency_ms:.2f} ms') logger.info(f' Min: {min_latency_ms:.2f} ms') logger.info(f' Max: {max_latency_ms:.2f} ms') if pcseq: logger.info("Latency for each data shape group:") for idx,group in enumerate(benchmark.latency_groups): logger.info(f"{idx+1}.{str(group)}") if args.latency_percentile == 50: logger.info(f' Median: {group.median:.2f} ms') elif args.latency_percentile != 50: logger.info(f' {args.latency_percentile} percentile: {group.median:.2f} ms') logger.info(f' Average: {group.avg:.2f} ms') logger.info(f' Min: {group.min:.2f} ms') logger.info(f' Max: {group.max:.2f} ms') logger.info(f'Throughput: {fps:.2f} FPS') del compiled_model next_step.step_id = 0 except Exception as e: logger.exception(e) if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('error', str(e))] ) statistics.dump() sys.exit(1)