Refactor project structure

This commit is contained in:
2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions

View File

@@ -0,0 +1,677 @@
import sys
CPP = r'C:\Projects\ANLS\ANSLIB\ANSODEngine\ANSTENSORRTSEG.cpp'
with open(CPP, 'rb') as f:
data = f.read()
CRLF = b'\r\n' in data
print(f'Line ending: {"CRLF" if CRLF else "LF"}')
def L(s):
if CRLF:
return s.replace(b'\n', b'\r\n')
return s
errors = []
def replace_once(data, old, new, label):
count = data.count(old)
if count != 1:
errors.append(f'ERROR [{label}]: expected 1, found {count}')
print(errors[-1])
return data
print(f'OK [{label}]')
return data.replace(old, new, 1)
def replace_all(data, old, new, expected, label):
count = data.count(old)
if count != expected:
errors.append(f'ERROR [{label}]: expected {expected}, found {count}')
print(errors[-1])
return data
print(f'OK [{label}] ({count} replacements)')
return data.replace(old, new)
orig_size = len(data)
# 1. Add #include <future>
data = replace_once(data,
L(b'#include <opencv2/cudaimgproc.hpp>\n'),
L(b'#include <opencv2/cudaimgproc.hpp>\n#include <future>\n'),
'add future')
# 2. Three buildLoadNetwork calls
data = replace_all(data,
b'buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE)',
b'buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu)',
3, 'buildLoadNetwork 3x')
# 3. RunInference(camera_id) - release mutex before DetectObjects
old_ri = L(
b' std::vector<Object> TENSORRTSEG::RunInference(const cv::Mat& inputImgBGR,const std::string& camera_id)\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b'\n'
b' if (!_modelLoadValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Cannot load TensorRT model", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' if (!_licenseValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' if (!_isInitialized) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Model not initialized", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {\n'
b' return {};\n'
b' }\n'
b'\n'
b' try {\n'
b' return DetectObjects(inputImgBGR, camera_id);\n'
b' }\n'
b' catch (const std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::RunInference", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n')
new_ri = L(
b' std::vector<Object> TENSORRTSEG::RunInference(const cv::Mat& inputImgBGR,const std::string& camera_id)\n'
b' {\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' if (!_modelLoadValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Cannot load TensorRT model", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (!_licenseValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (!_isInitialized) {\n'
b' _logger.LogError("TENSORRTSEG::RunInference",\n'
b' "Model not initialized", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {\n'
b' return {};\n'
b' }\n'
b' }\n'
b' try {\n'
b' return DetectObjects(inputImgBGR, camera_id);\n'
b' }\n'
b' catch (const std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::RunInference", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n')
data = replace_once(data, old_ri, new_ri, 'RunInference refactor')
# 4. DetectObjects - two-phase mutex
old_det = L(
b' std::vector<Object> TENSORRTSEG::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' try {\n'
b' // Preprocess the input image\n'
b' const auto input = Preprocess(inputImage);\n'
b' std::vector<std::vector<std::vector<float>>> featureVectors;\n'
b' auto succ = m_trtEngine->runInference(input, featureVectors);\n'
b' if (succ) {\n'
b' // Check if our model does only object detection or also supports segmentation\n'
b' std::vector<Object> ret;\n'
b' const auto& numOutputs = m_trtEngine->getOutputDims().size();\n'
b' std::vector<std::vector<float>> featureVector;\n'
b' Engine<float>::transformOutput(featureVectors, featureVector);\n'
b' ret = PostProcessSegmentation(featureVector, camera_id);\n'
b' return ret;\n'
b' }\n'
b' else {\n'
b' this->_logger.LogError("TENSORRTSEG::DetectObjects", "Error running inference", __FILE__, __LINE__);\n'
b' std::vector<Object> ret;\n'
b' ret.clear();\n'
b' return ret;\n'
b' }\n'
b' }\n'
b' catch (std::exception& e) {\n'
b' this->_logger.LogFatal("TENSORRTSEG::DetectObjects", e.what(), __FILE__, __LINE__);\n'
b' std::vector<Object> ret;\n'
b' ret.clear();\n'
b' return ret;\n'
b' }\n'
b' }\n')
new_det = L(
b' std::vector<Object> TENSORRTSEG::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {\n'
b' // Phase 1: Preprocess under lock\n'
b' ImageMetadata meta;\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> input;\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' input = Preprocess(inputImage, meta);\n'
b' }\n'
b' if (input.empty()) return {};\n'
b'\n'
b' // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot\n'
b' std::vector<std::vector<std::vector<float>>> featureVectors;\n'
b' auto succ = m_trtEngine->runInference(input, featureVectors);\n'
b' if (!succ) {\n'
b' this->_logger.LogError("TENSORRTSEG::DetectObjects", "Error running inference", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Phase 3: Postprocess under lock\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' std::vector<std::vector<float>> featureVector;\n'
b' Engine<float>::transformOutput(featureVectors, featureVector);\n'
b' return PostProcessSegmentation(featureVector, camera_id, meta);\n'
b' }\n')
data = replace_once(data, old_det, new_det, 'DetectObjects refactor')
# 5. Preprocess - remove inner lock, add outMeta param
old_pre_sig = L(
b' std::vector<std::vector<cv::cuda::GpuMat>> TENSORRTSEG::Preprocess(const cv::Mat& inputImage) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n')
new_pre_sig = L(
b' std::vector<std::vector<cv::cuda::GpuMat>> TENSORRTSEG::Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta) {\n')
data = replace_once(data, old_pre_sig, new_pre_sig, 'Preprocess sig + remove lock')
# 6. Replace member writes in Preprocess
old_pw = L(
b' // Set image size parameters\n'
b' m_imgHeight = imgRGB.rows;\n'
b' m_imgWidth = imgRGB.cols;\n'
b' if (m_imgHeight > 0 && m_imgWidth > 0) {\n'
b' m_ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),\n'
b' inputDims[0].d[1] / static_cast<float>(imgRGB.rows));\n')
new_pw = L(
b' // Set image size parameters\n'
b' outMeta.imgHeight = imgRGB.rows;\n'
b' outMeta.imgWidth = imgRGB.cols;\n'
b' if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {\n'
b' outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),\n'
b' inputDims[0].d[1] / static_cast<float>(imgRGB.rows));\n')
data = replace_once(data, old_pw, new_pw, 'Preprocess member writes')
# 7. Replace error log in Preprocess
old_pe = L(b' this->_logger.LogFatal("TENSORRTCL::Preprocess",\n'
b' "Image height or width is zero after processing (Width: " + std::to_string(m_imgWidth) +\n'
b' ", Height: " + std::to_string(m_imgHeight) + ")",\n')
new_pe = L(b' this->_logger.LogFatal("TENSORRTCL::Preprocess",\n'
b' "Image height or width is zero after processing (Width: " + std::to_string(outMeta.imgWidth) +\n'
b' ", Height: " + std::to_string(outMeta.imgHeight) + ")",\n')
data = replace_once(data, old_pe, new_pe, 'Preprocess error log')
# 8. PostProcessSegmentation:
# 8a. Change signature (remove lock, add meta param)
old_pss_sig = L(
b' std::vector<Object> TENSORRTSEG::PostProcessSegmentation(std::vector<std::vector<float>>& featureVectors, const std::string& camera_id) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n')
new_pss_sig = L(
b' std::vector<Object> TENSORRTSEG::PostProcessSegmentation(std::vector<std::vector<float>>& featureVectors, const std::string& camera_id, const ImageMetadata& meta) {\n')
data = replace_once(data, old_pss_sig, new_pss_sig, 'PostProcessSegmentation sig + remove lock')
# 8b. Fix dead-code bug: flip the if to == (sizes match -> process) so else returns early.
# Also remove the dead inner-if block and its dead return.
# Strategy: replace the opening dead-code block with flipped condition.
old_deadopen = L(
b' if (featureVectors[0].size() != static_cast<size_t>(numChannels) * numAnchors) {\n'
b' std::vector<Object>result;\n'
b' result.clear();\n'
b' return result;\n'
b'\n'
b' if (featureVectors[1].size() != static_cast<size_t>(SEG_CHANNELS) * SEG_H * SEG_W) {\n'
b' std::vector<Object>result;\n'
b' result.clear();\n'
b' return result;\n'
b' }\n'
b'\n')
new_deadopen = L(
b' if (featureVectors[0].size() == static_cast<size_t>(numChannels) * numAnchors &&\n'
b' featureVectors[1].size() == static_cast<size_t>(SEG_CHANNELS) * SEG_H * SEG_W) {\n'
b'\n')
data = replace_once(data, old_deadopen, new_deadopen, 'PostProcessSegmentation dead-code flip')
# 8c. Replace m_ratio/m_imgWidth/m_imgHeight -> meta.* in PostProcessSegmentation
# After steps 6+7, only PostProcessSegmentation body still uses these members.
data = replace_all(data, b'm_ratio', b'meta.ratio', 4, 'meta.ratio (4x in PostProcessSeg)')
data = replace_all(data, b'm_imgWidth', b'meta.imgWidth', 7, 'meta.imgWidth (7x in PostProcessSeg)')
data = replace_all(data, b'm_imgHeight', b'meta.imgHeight', 7, 'meta.imgHeight (7x in PostProcessSeg)')
# 9. Replace DetectObjectsBatch + PreprocessBatch + PostProcessSegmentationBatch bodies
old_batch_all = L(
b' std::vector<Object> TENSORRTSEG::DetectObjectsBatch(const std::vector<cv::Mat>& inputImages, const std::string& camera_id) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' std::vector<Object> allObjects;\n'
b'\n'
b' try {\n'
b' // Validate inputs\n'
b' if (inputImages.empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);\n'
b' return allObjects;\n'
b' }\n'
b'\n'
b' _logger.LogInfo("TENSORRTSEG::DetectObjectsBatch",\n'
b' "Processing batch of " + std::to_string(inputImages.size()) + " images",\n'
b' __FILE__, __LINE__);\n'
b'\n'
b' // Preprocess all images in batch\n'
b' const auto inputs = PreprocessBatch(inputImages);\n'
b'\n'
b' if (inputs.empty() || inputs[0].empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__);\n'
b' return allObjects;\n'
b' }\n'
b'\n'
b' // Run batch inference\n'
b' std::vector<std::vector<std::vector<float>>> featureVectors;\n'
b' auto succ = m_trtEngine->runInference(inputs, featureVectors);\n'
b'\n'
b' if (!succ) {\n'
b' _logger.LogError("TENSORRTSEG::DetectObjectsBatch", "Error running inference", __FILE__, __LINE__);\n'
b' return allObjects;\n'
b' }\n'
b'\n'
b' // Process each image\'s results\n'
b' for (size_t batchIdx = 0; batchIdx < featureVectors.size(); ++batchIdx) {\n'
b' // Transform output for this batch element\n'
b' std::vector<std::vector<float>> featureVector;\n'
b'\n'
b' // featureVectors[batchIdx] contains the outputs for this image\n'
b' // For segmentation: [detection_output, segmentation_masks]\n'
b' if (!featureVectors[batchIdx].empty()) {\n'
b' featureVector = featureVectors[batchIdx];\n'
b' }\n'
b'\n'
b' // Get detections for this image\n'
b' std::vector<Object> imageObjects = PostProcessSegmentationBatch(featureVector, camera_id, batchIdx);\n'
b'\n'
b' // Combine all detections into single vector\n'
b' allObjects.insert(allObjects.end(),\n'
b' std::make_move_iterator(imageObjects.begin()),\n'
b' std::make_move_iterator(imageObjects.end()));\n'
b' }\n'
b'\n'
b' _logger.LogInfo("TENSORRTSEG::DetectObjectsBatch",\n'
b' "Batch processing complete. Total detections: " + std::to_string(allObjects.size()),\n'
b' __FILE__, __LINE__);\n'
b'\n'
b' return allObjects;\n'
b' }\n'
b' catch (std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::DetectObjectsBatch", e.what(), __FILE__, __LINE__);\n'
b' return allObjects;\n'
b' }\n'
b' }\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> TENSORRTSEG::PreprocessBatch(const std::vector<cv::Mat>& inputImages) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b'\n'
b' try {\n'
b' if (!_licenseValid) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch", "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' const auto& inputDims = m_trtEngine->getInputDims();\n'
b' const int inputH = inputDims[0].d[1];\n'
b' const int inputW = inputDims[0].d[2];\n'
b'\n'
b' // Store original image dimensions for each image in batch\n'
b' m_batchImgHeights.resize(inputImages.size());\n'
b' m_batchImgWidths.resize(inputImages.size());\n'
b' m_batchRatios.resize(inputImages.size());\n'
b'\n'
b' std::vector<cv::cuda::GpuMat> batchProcessed;\n'
b' batchProcessed.reserve(inputImages.size());\n'
b'\n'
b' cv::cuda::Stream stream;\n'
b'\n'
b' // Process each image\n'
b' for (size_t i = 0; i < inputImages.size(); ++i) {\n'
b' const auto& inputImage = inputImages[i];\n'
b'\n'
b' if (inputImage.empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch",\n'
b' "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Upload to GPU\n'
b' cv::cuda::GpuMat img;\n'
b'\n'
b' // Convert grayscale to BGR if needed\n'
b' if (inputImage.channels() == 1) {\n'
b' cv::Mat img3Channel;\n'
b' cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);\n'
b' img.upload(img3Channel, stream);\n'
b' }\n'
b' else {\n'
b' img.upload(inputImage, stream);\n'
b' }\n'
b'\n'
b' // Convert to RGB\n'
b' cv::cuda::GpuMat imgRGB;\n'
b' cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);\n'
b'\n'
b' // Store original dimensions\n'
b' m_batchImgHeights[i] = imgRGB.rows;\n'
b' m_batchImgWidths[i] = imgRGB.cols;\n'
b'\n'
b' if (m_batchImgHeights[i] <= 0 || m_batchImgWidths[i] <= 0) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch",\n'
b' "Image " + std::to_string(i) + " has invalid dimensions (Width: " +\n'
b' std::to_string(m_batchImgWidths[i]) + ", Height: " +\n'
b' std::to_string(m_batchImgHeights[i]) + ")",\n'
b' __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Calculate ratio for this image\n'
b' m_batchRatios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),\n'
b' inputH / static_cast<float>(imgRGB.rows));\n'
b'\n'
b' // Resize with padding\n'
b' cv::cuda::GpuMat resized = imgRGB;\n'
b' if (resized.rows != inputH || resized.cols != inputW) {\n'
b' resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);\n'
b' }\n'
b'\n'
b' batchProcessed.push_back(std::move(resized));\n'
b' }\n'
b'\n'
b' stream.waitForCompletion();\n'
b'\n'
b' // Return as required format\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> inputs;\n'
b' inputs.push_back(std::move(batchProcessed));\n'
b'\n'
b' return inputs;\n'
b' }\n'
b' catch (const std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b' std::vector<Object> TENSORRTSEG::PostProcessSegmentationBatch(std::vector<std::vector<float>>& featureVectors,const std::string& camera_id,size_t batchIdx) {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b'\n'
b' try {\n'
b' if (!_licenseValid) {\n'
b' _logger.LogFatal("TENSORRTSEG::PostProcessSegmentationBatch", "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' const auto& outputDims = m_trtEngine->getOutputDims();\n'
b'\n'
b' int numChannels = outputDims[0].d[1];\n'
b' int numAnchors = outputDims[0].d[2];\n'
b'\n'
b' const auto numClasses = numChannels - SEG_CHANNELS - 4;\n'
b'\n'
b' // Get batch-specific dimensions\n'
b' float ratio = m_batchRatios[batchIdx];\n'
b' float imgWidth = static_cast<float>(m_batchImgWidths[batchIdx]);\n'
b' float imgHeight = static_cast<float>(m_batchImgHeights[batchIdx]);\n')
new_batch_all = L(
b' std::vector<std::vector<Object>> TENSORRTSEG::DetectObjectsBatch(const std::vector<cv::Mat>& inputImages, const std::string& camera_id) {\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' if (inputImages.empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b'\n'
b' // Auto-split if batch exceeds engine capacity\n'
b' const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1;\n'
b' if (static_cast<int>(inputImages.size()) > maxBatch) {\n'
b' const size_t numImages = inputImages.size();\n'
b' std::vector<std::vector<cv::Mat>> chunks;\n'
b' for (size_t start = 0; start < numImages; start += static_cast<size_t>(maxBatch)) {\n'
b' const size_t end = std::min(start + static_cast<size_t>(maxBatch), numImages);\n'
b' chunks.emplace_back(inputImages.begin() + start, inputImages.begin() + end);\n'
b' }\n'
b' std::vector<std::future<std::vector<std::vector<Object>>>> futures;\n'
b' futures.reserve(chunks.size());\n'
b' for (size_t i = 0; i < chunks.size(); ++i) {\n'
b' futures.push_back(std::async(std::launch::async,\n'
b' [this, c = chunks[i], cid = camera_id]() {\n'
b' return DetectObjectsBatch(c, cid);\n'
b' }));\n'
b' }\n'
b' std::vector<std::vector<Object>> allResults;\n'
b' allResults.reserve(numImages);\n'
b' for (auto& fut : futures) {\n'
b' auto chunkResults = fut.get();\n'
b' for (auto& r : chunkResults) allResults.push_back(std::move(r));\n'
b' }\n'
b' return allResults;\n'
b' }\n'
b'\n'
b' _logger.LogInfo("TENSORRTSEG::DetectObjectsBatch",\n'
b' "Processing batch of " + std::to_string(inputImages.size()) + " images",\n'
b' __FILE__, __LINE__);\n'
b'\n'
b' // Phase 1: Preprocess under brief lock\n'
b' BatchMetadata metadata;\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> inputs;\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' inputs = PreprocessBatch(inputImages, metadata);\n'
b' }\n'
b' if (inputs.empty() || inputs[0].empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot\n'
b' std::vector<std::vector<std::vector<float>>> featureVectors;\n'
b' auto succ = m_trtEngine->runInference(inputs, featureVectors);\n'
b' if (!succ) {\n'
b' _logger.LogError("TENSORRTSEG::DetectObjectsBatch", "Error running inference", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Phase 3: Parallel postprocessing -- each image is independent\n'
b' const size_t numBatch = featureVectors.size();\n'
b' std::vector<std::vector<Object>> batchDetections(numBatch);\n'
b' std::vector<std::future<std::vector<Object>>> postFutures;\n'
b' postFutures.reserve(numBatch);\n'
b' for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) {\n'
b' const auto& batchOutput = featureVectors[batchIdx];\n'
b' std::vector<std::vector<float>> fv =\n'
b' batchOutput.empty() ? std::vector<std::vector<float>>{} : batchOutput;\n'
b' postFutures.push_back(std::async(std::launch::async,\n'
b' [this, fv = std::move(fv), cid = camera_id,\n'
b' idx = batchIdx, &metadata]() mutable {\n'
b' return PostProcessSegmentationBatch(fv, cid, idx, metadata);\n'
b' }));\n'
b' }\n'
b' for (size_t i = 0; i < numBatch; ++i)\n'
b' batchDetections[i] = postFutures[i].get();\n'
b'\n'
b' _logger.LogInfo("TENSORRTSEG::DetectObjectsBatch",\n'
b' "Batch processing complete. Images: " + std::to_string(numBatch),\n'
b' __FILE__, __LINE__);\n'
b' return batchDetections;\n'
b' }\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> TENSORRTSEG::PreprocessBatch(const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata) {\n'
b' try {\n'
b' if (!_licenseValid) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch", "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' const auto& inputDims = m_trtEngine->getInputDims();\n'
b' const int inputH = inputDims[0].d[1];\n'
b' const int inputW = inputDims[0].d[2];\n'
b'\n'
b' // Store original image dimensions for each image in batch\n'
b' outMetadata.imgHeights.resize(inputImages.size());\n'
b' outMetadata.imgWidths.resize(inputImages.size());\n'
b' outMetadata.ratios.resize(inputImages.size());\n'
b'\n'
b' std::vector<cv::cuda::GpuMat> batchProcessed;\n'
b' batchProcessed.reserve(inputImages.size());\n'
b'\n'
b' cv::cuda::Stream stream;\n'
b'\n'
b' // Process each image\n'
b' for (size_t i = 0; i < inputImages.size(); ++i) {\n'
b' const auto& inputImage = inputImages[i];\n'
b'\n'
b' if (inputImage.empty()) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch",\n'
b' "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Upload to GPU\n'
b' cv::cuda::GpuMat img;\n'
b'\n'
b' // Convert grayscale to BGR if needed\n'
b' if (inputImage.channels() == 1) {\n'
b' cv::Mat img3Channel;\n'
b' cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);\n'
b' img.upload(img3Channel, stream);\n'
b' }\n'
b' else {\n'
b' img.upload(inputImage, stream);\n'
b' }\n'
b'\n'
b' // Convert to RGB\n'
b' cv::cuda::GpuMat imgRGB;\n'
b' cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);\n'
b'\n'
b' // Store original dimensions\n'
b' outMetadata.imgHeights[i] = imgRGB.rows;\n'
b' outMetadata.imgWidths[i] = imgRGB.cols;\n'
b'\n'
b' if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch",\n'
b' "Image " + std::to_string(i) + " has invalid dimensions (Width: " +\n'
b' std::to_string(outMetadata.imgWidths[i]) + ", Height: " +\n'
b' std::to_string(outMetadata.imgHeights[i]) + ")",\n'
b' __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' // Calculate ratio for this image\n'
b' outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),\n'
b' inputH / static_cast<float>(imgRGB.rows));\n'
b'\n'
b' // Resize with padding\n'
b' cv::cuda::GpuMat resized = imgRGB;\n'
b' if (resized.rows != inputH || resized.cols != inputW) {\n'
b' resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);\n'
b' }\n'
b'\n'
b' batchProcessed.push_back(std::move(resized));\n'
b' }\n'
b'\n'
b' stream.waitForCompletion();\n'
b'\n'
b' // Return as required format\n'
b' std::vector<std::vector<cv::cuda::GpuMat>> inputs;\n'
b' inputs.push_back(std::move(batchProcessed));\n'
b'\n'
b' return inputs;\n'
b' }\n'
b' catch (const std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::PreprocessBatch", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b' std::vector<Object> TENSORRTSEG::PostProcessSegmentationBatch(std::vector<std::vector<float>>& featureVectors, const std::string& camera_id, size_t batchIdx, const BatchMetadata& metadata) {\n'
b' try {\n'
b' if (!_licenseValid) {\n'
b' _logger.LogFatal("TENSORRTSEG::PostProcessSegmentationBatch", "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b'\n'
b' const auto& outputDims = m_trtEngine->getOutputDims();\n'
b'\n'
b' int numChannels = outputDims[0].d[1];\n'
b' int numAnchors = outputDims[0].d[2];\n'
b'\n'
b' const auto numClasses = numChannels - SEG_CHANNELS - 4;\n'
b'\n'
b' // Get batch-specific dimensions\n'
b' float ratio = metadata.ratios[batchIdx];\n'
b' float imgWidth = static_cast<float>(metadata.imgWidths[batchIdx]);\n'
b' float imgHeight = static_cast<float>(metadata.imgHeights[batchIdx]);\n')
data = replace_once(data, old_batch_all, new_batch_all, 'DetectObjectsBatch+PreprocessBatch+PostProcessSegmentationBatch rewrite')
# 10. Add RunInferencesBatch before namespace close
old_ns_close = L(
b' return objs;\n'
b' }\n'
b' catch (std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::PostProcessSegmentationBatch", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b' }\n')
new_ns_close = L(
b' return objs;\n'
b' }\n'
b' catch (std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::PostProcessSegmentationBatch", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b' std::vector<std::vector<Object>> TENSORRTSEG::RunInferencesBatch(\n'
b' const std::vector<cv::Mat>& inputs, const std::string& camera_id)\n'
b' {\n'
b' {\n'
b' std::lock_guard<std::recursive_mutex> lock(_mutex);\n'
b' if (!_modelLoadValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInferencesBatch",\n'
b' "Model not loaded", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (!_licenseValid) {\n'
b' _logger.LogError("TENSORRTSEG::RunInferencesBatch",\n'
b' "Invalid license", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (!_isInitialized) {\n'
b' _logger.LogError("TENSORRTSEG::RunInferencesBatch",\n'
b' "Engine not initialized", __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' if (inputs.empty()) return {};\n'
b' }\n'
b' try {\n'
b' return DetectObjectsBatch(inputs, camera_id);\n'
b' }\n'
b' catch (const std::exception& e) {\n'
b' _logger.LogFatal("TENSORRTSEG::RunInferencesBatch", e.what(), __FILE__, __LINE__);\n'
b' return {};\n'
b' }\n'
b' }\n'
b'}\n')
data = replace_once(data, old_ns_close, new_ns_close, 'Add RunInferencesBatch + fix namespace close')
# Summary
print(f'\nOriginal size: {orig_size} bytes')
print(f'New size: {len(data)} bytes')
if errors:
print(f'\n{len(errors)} ERROR(S) -- file NOT written')
sys.exit(1)
else:
with open(CPP, 'wb') as f:
f.write(data)
print('\nOK -- file written')