Files
ANSLibs/OpenVINO/python/openvino/tools/benchmark/utils/inputs_filling.py

482 lines
22 KiB
Python

# Copyright (C) 2018-2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import sys
import re
import numpy as np
from collections import defaultdict
from pathlib import Path
from importlib.util import find_spec
from openvino import Tensor, PartialShape, Type
from openvino.utils.types import get_dtype
from .constants import IMAGE_EXTENSIONS, NUMPY_EXTENSIONS, BINARY_EXTENSIONS
from .logging import logger
from .utils import AppInputInfo
if find_spec('cv2') is not None:
try:
import cv2
except ImportError as ex:
raise Exception("Failed to import opencv module. "
"Please try to uninstall opencv-python "
"and install opencv-python-headless instead.") from ex
class DataQueue:
def __init__(self, input_data: dict, batch_sizes: list):
self.input_data = input_data
self.sizes_map = {}
for name, tensors in input_data.items():
self.sizes_map[name] = len(tensors)
self.index_map = defaultdict.fromkeys(input_data.keys(), 0)
self.batch_sizes = batch_sizes
self.size = len(batch_sizes)
self.current_group_id = 0
def get_next_input(self):
data = {}
for input_name, input_tensors in self.input_data.items():
data[input_name] = input_tensors[self.index_map[input_name]]
self.index_map[input_name] = (self.index_map[input_name] + 1) % self.sizes_map[input_name]
self.current_group_id = (self.current_group_id + 1) % self.size
return data
def get_next_batch_size(self):
return self.batch_sizes[self.current_group_id]
def get_group_batch_sizes(app_input_info):
batch_sizes = []
niter = max(len(info.shapes) for info in app_input_info)
for i in range(niter):
batch_size = 0
for info in app_input_info:
batch_index = info.layout.get_index_by_name('N') if info.layout.has_name('N') else -1
if batch_index != -1:
shape = info.shapes[i % len(info.shapes)]
if batch_size == 0:
batch_size = shape[batch_index]
elif batch_size != shape[batch_index]:
raise Exception("Can't deterimine batch size: batch is different for different inputs!")
if batch_size == 0:
batch_size = 1
batch_sizes.append(batch_size)
return batch_sizes
def get_batch_sizes_per_input_map(app_input_info: list[AppInputInfo]):
batch_sizes_map = {}
for info in app_input_info:
if info.layout.has_name('N'):
if info.is_dynamic:
batch_sizes_map[info.name] = info.getDimensionsByLayout('N')
else:
batch_sizes_map[info.name] = [len(info.getDimensionByLayout('N'))]
else:
batch_sizes_map[info.name] = [1] * len(info.shapes)
return batch_sizes_map
def verify_objects_to_be_used(objects_to_be_used_map: dict[str, list[str]], info: AppInputInfo, total_frames: int, input_type_name: str):
if objects_to_be_used_map[info.name] > total_frames and objects_to_be_used_map[info.name] % total_frames != 0:
objects_to_be_used_map[info.name] = objects_to_be_used_map[info.name] - objects_to_be_used_map[info.name] % total_frames
logger.warning(f"Number of provided {input_type_name} for input '{info.name}' is not a multiple of the number of "
f"provided data shapes. Only {objects_to_be_used_map[info.name]} {input_type_name} will be processed for this input.")
elif objects_to_be_used_map[info.name] < total_frames:
logger.warning(f"Some {input_type_name} will be dublicated: {total_frames} is required, "
f"but only {objects_to_be_used_map[info.name]} were provided.")
def get_input_data(paths_to_input, app_input_info):
image_mapping, numpy_mapping, binary_mapping = get_input_file_mappings(paths_to_input, app_input_info)
image_sizes = get_image_sizes(app_input_info)
batch_sizes_map = get_batch_sizes_per_input_map(app_input_info)
images_to_be_used_map = {input_name: len(images)
for input_name, images in image_mapping.items()}
numpys_to_be_used_map = {input_name: len(images)
for input_name, images in numpy_mapping.items()}
binaries_to_be_used_map = {input_name: len(binaries)
for input_name, binaries in binary_mapping.items()}
for info in app_input_info:
if info.shapes:
total_frames = np.sum(batch_sizes_map[info.name])
if info.name in image_mapping:
verify_objects_to_be_used(images_to_be_used_map, info, total_frames, "images")
elif info.name in numpy_mapping:
verify_objects_to_be_used(numpys_to_be_used_map, info, total_frames, "numpy arrays")
elif info.name in binary_mapping:
verify_objects_to_be_used(binaries_to_be_used_map, info, total_frames, "binaries")
else:
if not (info.is_image_info and len(image_sizes) == 1):
logger.warning(f"No input files were given for input '{info.name}'!. This input will be filled with random values!")
else:
if info.name in image_mapping:
logger.info(f"Images given for input '{info.name}' will be processed with original shapes.")
elif info.name in numpy_mapping:
logger.info(f"Numpy arrays given for input '{info.name}' will be processed with original shapes.")
else:
raise Exception(f"Input {info.name} is dynamic. Provide data shapes!")
data = {}
for port, info in enumerate(app_input_info):
if info.name in image_mapping:
data[port] = get_image_tensors(image_mapping[info.name][:images_to_be_used_map[info.name]], info, batch_sizes_map[info.name])
elif info.name in numpy_mapping:
data[port] = get_numpy_tensors(numpy_mapping[info.name][:numpys_to_be_used_map[info.name]], info, batch_sizes_map[info.name])
elif info.name in binary_mapping:
data[port] = get_binary_tensors(binary_mapping[info.name][:binaries_to_be_used_map[info.name]], info, batch_sizes_map[info.name])
elif info.is_image_info and len(image_sizes) == 1:
image_size = image_sizes[0]
logger.info(f"Create input tensors for input '{info.name}' with image sizes: {image_size}")
data[port] = get_image_info_tensors(image_size, info)
else:
logger.info(f"Fill input '{info.name}' with random values ")
data[port] = fill_tensors_with_random(info)
return DataQueue(data, get_group_batch_sizes(app_input_info))
def get_image_tensors(image_paths: list[str], info: AppInputInfo, batch_sizes: list[int]) -> list[Tensor]:
if 'cv2' not in sys.modules:
logger.error("Loading images requires the opencv-python or opencv-python-headless package. "
"Please install it before continuing or run benchmark without "
"the -i flag to fill vectors with random data.")
num_shapes = len(info.shapes)
num_images = len(image_paths)
processed_frames = 0
widths = info.widths if info.is_dynamic else [info.width]
heights = info.heights if info.is_dynamic else [info.height]
process_with_original_shapes = num_shapes == 0
tensors = []
niter = max(num_shapes, num_images)
for i in range(niter):
shape = list(info.shapes[i % num_shapes]) if num_shapes else []
dtype = get_dtype(info.element_type)
images = np.ndarray(shape=shape, dtype=dtype)
image_index = processed_frames
current_batch_size = 1 if process_with_original_shapes else batch_sizes[i % num_shapes]
for b in range(current_batch_size):
image_index %= num_images
image_filename = image_paths[image_index]
logger.info(f'Prepare image {image_filename}')
image = cv2.imread(image_filename)
if process_with_original_shapes:
logger.info(f'Image will be processed with original shape - {image.shape[:-1]}')
elif info.layout.has_name('H') and info.layout.has_name('W'):
new_im_size = (widths[i % num_shapes], heights[i % num_shapes])
if image.shape[:-1] != new_im_size:
logger.warning(f"Image is resized from ({image.shape[:-1]}) to ({new_im_size})")
image = cv2.resize(image, new_im_size)
model_channel = int(str(info.channels))
image_channel = image.shape[-1]
if model_channel == 1 and image_channel == 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if model_channel == image_channel and str(info.layout) in ['[N,C,H,W]', '[C,H,W]']:
image = image.transpose((2, 0, 1))
if process_with_original_shapes:
if len(info.partial_shape) == 4:
image = np.expand_dims(image, 0)
p_shape = PartialShape(image.shape)
if info.partial_shape.compatible(p_shape):
info.data_shapes.append(p_shape.to_shape())
else:
raise Exception(f"Data shape '{str(p_shape)}' provided for input '{info.name}' "
f"is not compatible with partial shape '{str(info.partial_shape)}' for this input.")
tensors.append(Tensor(image.astype(dtype)))
else:
try:
if 3 == images[b].ndim and 1 == images[b].shape[2] and 2 == image.ndim:
# The model last dim has length 1, which means it takes greyscale images.
# Extend input image dims to match it
images[b] = image[:, :, None]
else:
images[b] = image
except ValueError:
raise Exception(f"Image shape {image.shape} is not compatible with input shape {shape}! "
f"Make sure -i parameter is valid.")
image_index += 1
processed_frames += current_batch_size
if not process_with_original_shapes:
tensors.append(Tensor(images))
return tensors
def get_numpy_tensors(numpy_paths: list[str], info: AppInputInfo, batch_sizes: list[int]) -> list[Tensor]:
num_shapes = len(info.shapes)
num_arrays = len(numpy_paths)
processed_frames = 0
process_with_original_shapes = num_shapes == 0
tensors = []
niter = max(num_shapes, num_arrays)
for i in range(niter):
shape = list(info.shapes[i % num_shapes]) if num_shapes else []
dtype = get_dtype(info.element_type)
numpy_arrays = np.ndarray(shape=shape, dtype=dtype)
numpy_index = processed_frames
current_batch_size = 1 if process_with_original_shapes \
else batch_sizes[i % num_shapes]
for b in range(current_batch_size):
numpy_index %= num_arrays
numpy_filename: str = numpy_paths[numpy_index]
extension = numpy_filename.lower().split('.')[-1]
if extension == "npy":
numpy_arr: np.ndarray = np.load(numpy_filename)
if list(numpy_arr.shape) != shape and not process_with_original_shapes:
raise Exception(
f"Numpy array shape mismatch. File {numpy_filename} "
f"has shape: {numpy_arr.shape}, expected: {shape}")
if numpy_arr.dtype != dtype:
raise Exception(
f"Numpy array in file {numpy_filename} is of "
f"{numpy_arr.dtype} format, which does not match "
f"input type {dtype}.")
if process_with_original_shapes:
if len(info.partial_shape) - 1 == len(numpy_arr.shape):
numpy_arr = np.expand_dims(numpy_arr, 0)
p_shape = PartialShape(numpy_arr.shape)
if info.partial_shape.compatible(p_shape):
info.data_shapes.append(p_shape.to_shape())
else:
raise Exception(f"Data shape '{str(p_shape)}' provided for input '{info.name}' "
f"is not compatible with partial shape '{str(info.partial_shape)}' for this input.")
tensors.append(Tensor(numpy_arr))
else:
try:
if info.layout.has_name("N"):
numpy_arrays[[None] * info.layout.get_index_by_name("N") + [b]] = numpy_arr[b]
else:
numpy_arrays = numpy_arr
except ValueError:
raise Exception(f"Numpy array shape {numpy_arr.shape} is not compatible with input shape {shape}! "
f"Make sure -i parameter is valid.")
else:
raise Exception(
f"Unsupported numpy file type: {extension}")
numpy_index += 1
processed_frames += current_batch_size
if not process_with_original_shapes:
tensors.append(Tensor(numpy_arrays))
return tensors
def get_binary_tensors(binary_paths: list[str], info: AppInputInfo, batch_sizes: list[int]) -> list[Tensor]:
num_shapes = len(info.shapes)
num_binaries = len(binary_paths)
niter = max(num_shapes, num_binaries)
processed_frames = 0
tensors = []
for i in range(niter):
shape_id = i % num_shapes
dtype = np.uint8() if info.element_type.bitwidth < 8 else get_dtype(info.element_type)
shape = list(info.shapes[shape_id])
binaries = np.ndarray(shape=shape, dtype=dtype)
binary_index = processed_frames
current_batch_size = batch_sizes[shape_id]
for b in range(current_batch_size):
binary_index %= num_binaries
binary_filename: str = binary_paths[binary_index]
extension = binary_filename.lower().split('.')[-1]
if extension == "bin":
binary_file_bit_size = os.path.getsize(binary_filename) * 8
blob_bit_size = info.element_type.bitwidth * int(np.prod(shape))
if blob_bit_size != binary_file_bit_size:
raise Exception(
f"File {binary_filename} contains {binary_file_bit_size} bites but model expects {blob_bit_size}")
from_file = np.fromfile(binary_filename, dtype)
if info.layout.has_name("N"):
binaries[[None] * info.layout.get_index_by_name("N") + [b]] = from_file[b]
else:
binaries = from_file
else:
raise Exception(
f"Unsupported binary file type: {extension}")
binary_index += 1
processed_frames += current_batch_size
tensors.append(Tensor(binaries, shape, info.element_type))
return tensors
def get_image_sizes(app_input_info):
image_sizes = []
for info in app_input_info:
if info.is_image:
if info.is_static:
image_sizes.append((info.width, info.height))
else:
info_image_sizes = []
for w, h in zip(info.widths, info.heights):
info_image_sizes.append((w, h))
image_sizes.append(info_image_sizes)
return image_sizes
def get_image_info_tensors(image_sizes, layer):
im_infos = []
for shape, image_size in zip(layer.shapes, image_sizes):
im_info = np.ndarray(shape, dtype=get_dtype(layer.element_type))
for b in range(shape[0]):
for i in range(shape[1]):
im_info[b][i] = image_size if i in [0, 1] else 1
im_infos.append(Tensor(im_info))
return im_infos
def get_random_4bit_tensor(shape, element_type, rs):
pack_shape = [x for x in shape]
pack_shape[-1] = pack_shape[-1]*element_type.bitwidth
rand_data = (rs.uniform(0, 15, list(pack_shape)) >= 7).astype(int).flatten()
rr = np.packbits(rand_data)
return Tensor(rr, shape, element_type)
def fill_tensors_with_random(layer):
is_4bit = layer.element_type.bitwidth == 4
dtype = np.uint8 if is_4bit else get_dtype(layer.element_type)
rand_min, rand_max = (0, 1) if dtype == bool else (np.iinfo(np.uint8).min, np.iinfo(np.uint8).max)
# np.random.uniform excludes high: add 1 to have it generated
if np.dtype(dtype).kind in ['i', 'u', 'b']:
rand_max += 1
rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(0)))
input_tensors = []
for shape in layer.shapes:
if shape:
if is_4bit:
ov_tensor = get_random_4bit_tensor(shape, layer.element_type, rs)
else:
ov_tensor = Tensor(rs.uniform(rand_min, rand_max, list(shape)).astype(dtype))
else:
if is_4bit:
ov_tensor = get_random_4bit_tensor([1], layer.element_type, rs)
else:
ov_tensor = Tensor(np.ndarray([], dtype, np.array(rs.uniform(rand_min, rand_max)).astype(dtype)))
input_tensors.append(ov_tensor)
return input_tensors
def get_input_file_mappings(paths_to_inputs, app_input_info):
image_dicts_list = []
numpy_dicts_list = []
binary_dicts_list = []
for path in paths_to_inputs:
image_dict, numpy_dict, binary_dict = parse_path(path, app_input_info)
image_dicts_list.append(image_dict)
numpy_dicts_list.append(numpy_dict)
binary_dicts_list.append(binary_dict)
def merge_dicts(dicts_list):
merged = defaultdict(list)
for dict in dicts_list:
for k, v in dict.items():
merged[k] += v
return merged
def remove_empty_items(dict):
return {k: sorted(v) for k, v in dict.items() if v}
return remove_empty_items(merge_dicts(image_dicts_list)), \
remove_empty_items(merge_dicts(numpy_dicts_list)), \
remove_empty_items(merge_dicts(binary_dicts_list))
def parse_path(path, app_input_info):
"""
Parse "input_1:file1/dir1,file2/dir2,input_2:file3/dir3 or file1/dir1,file2/dir2" into three dicts,
each containing input_name (str) as key and list of strings of binary/numpy/image filepaths as values.
"""
input_names = list(info.name for info in app_input_info)
input_node_names = list(info.node_name for info in app_input_info)
parsed_names = re.findall(r"((?=[^,])(?![a-zA-Z]:\\)[\w\.]+):", path)
wrong_names = list(name for name in parsed_names if name not in input_names + input_node_names)
if wrong_names:
raise Exception(
f"Wrong input mapping! Cannot find inputs: {wrong_names}. "
f"Available inputs: {input_names}. "
"Please check `-i` input data"
)
tensor_names = [parsed_name if parsed_name in input_names else input_names[input_node_names.index(parsed_name)] for parsed_name in parsed_names]
input_pathes = [path for path in re.split(r"(?=[^,])(?![a-zA-Z]:\\)[\w\.]+:", path) if path]
input_path_mapping = defaultdict(list)
# input mapping is used
if tensor_names:
input_path_mapping = {input_: files.strip(",").split(",") for input_, files in zip(tensor_names, input_pathes)}
else:
input_files = list()
_input_pathes = input_pathes[0].strip(",").split(",")
for _input_path in _input_pathes:
input_path = Path(_input_path)
if input_path.exists():
if input_path.is_dir():
input_files += list(str(file_path) for file_path in input_path.iterdir())
elif input_path.is_file():
input_files.append(str(input_path))
else:
raise Exception(f"Path '{str(input_path)}' doesn't exist \n {str(input_path)}")
num_files, num_inputs = len(input_files), len(app_input_info)
if num_inputs > 1:
logger.warning(f"Model has {num_inputs} inputs. It's recommended to use name mapping to specify parameters for each input.")
if num_files > num_inputs and num_files % num_inputs != 0:
input_files = input_files[:num_files - num_files % num_inputs]
logger.warning(f"Number of provided input files '{num_files}' is not a multiple of the number of "
f"model inputs. Only {len(input_files)} files fill be used.")
num_files = len(input_files)
inputs_to_fill = list(info.name for info in app_input_info if not info.is_image_info)
for i in range(num_files):
input_path_mapping[inputs_to_fill[i % len(inputs_to_fill)]].append(input_files[i])
images_mapping = defaultdict(list)
numpy_mapping = defaultdict(list)
binary_mapping = defaultdict(list)
unsupported_files = list()
for input_name, _input_pathes in input_path_mapping.items():
for _input_path in _input_pathes:
input_path = Path(_input_path)
if input_path.exists():
files = list()
if input_path.is_dir():
files = input_path.iterdir()
elif input_path.is_file():
files = [input_path]
for file in files:
if file.suffix.lower() in IMAGE_EXTENSIONS:
images_mapping[input_name].append(str(file))
elif file.suffix.lower() in NUMPY_EXTENSIONS:
numpy_mapping[input_name].append(str(file))
elif file.suffix.lower() in BINARY_EXTENSIONS:
binary_mapping[input_name].append(str(file))
else:
unsupported_files.append(str(file))
else:
raise Exception(f"Path for input '{input_name}' doesn't exist \n {str(input_path)}")
if unsupported_files:
logger.warning(f"This files has unsupported extensions and will "
f"be ignored: {unsupported_files}.\n"
f"Supported extentions:\n"
f"Images: {IMAGE_EXTENSIONS}\n"
f"Binary: {BINARY_EXTENSIONS}\n"
f"Numpy: {NUMPY_EXTENSIONS}")
return images_mapping, numpy_mapping, binary_mapping