import sys CPP = r'C:\Projects\ANLS\ANSLIB\ANSODEngine\ANSTENSORRTPOSE.cpp' with open(CPP, 'rb') as f: data = f.read() CRLF = b'\r\n' in data print(f'Line ending: {"CRLF" if CRLF else "LF"}') def L(s): if CRLF: return s.replace(b'\n', b'\r\n') return s errors = [] def replace_once(data, old, new, label): count = data.count(old) if count != 1: errors.append(f'ERROR [{label}]: expected 1, found {count}') print(errors[-1]) return data print(f'OK [{label}]') return data.replace(old, new, 1) def replace_all(data, old, new, expected, label): count = data.count(old) if count != expected: errors.append(f'ERROR [{label}]: expected {expected}, found {count}') print(errors[-1]) return data print(f'OK [{label}] ({count} replacements)') return data.replace(old, new) def replace_slice(data, start_anchor, end_anchor, new_content, label): """Find start_anchor, then end_anchor after it, replace [start..end+len] with new_content.""" s = data.find(start_anchor) if s == -1: errors.append(f'ERROR [{label}]: start_anchor not found: {start_anchor[:50]!r}') print(errors[-1]) return data e = data.find(end_anchor, s + len(start_anchor)) if e == -1: errors.append(f'ERROR [{label}]: end_anchor not found: {end_anchor[:50]!r}') print(errors[-1]) return data e += len(end_anchor) old_chunk = data[s:e] if data.count(old_chunk) != 1: errors.append(f'ERROR [{label}]: extracted chunk not unique (count={data.count(old_chunk)})') print(errors[-1]) return data print(f'OK [{label}]') return data[:s] + new_content + data[e:] orig_size = len(data) # 1. Add #include data = replace_once(data, L(b'#include \n'), L(b'#include \n#include \n'), 'add future') # 2. Three buildLoadNetwork calls -> add m_maxSlotsPerGpu data = replace_all(data, b'buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE)', b'buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu)', 3, 'buildLoadNetwork 3x') # 3. RunInference(camera_id) - release mutex before DetectObjects # Old body: single lock scope containing all validation then try/catch # New body: brief lock scope for validation, then try { return DetectObjects; } catch { ... } old_ri = L( b' std::vector ANSTENSORRTPOSE::RunInference(const cv::Mat& inputImgBGR,const std::string& camera_id)\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b'\n' b' // Validation checks\n' b' if (!_modelLoadValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Cannot load the TensorRT model. Please check if it exists",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (!_licenseValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Runtime license is not valid or expired. Please contact ANSCENTER",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (!_isInitialized) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Model is not initialized",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {\n' b' return {};\n' b' }\n' b'\n' b' try {\n' b' return DetectObjects(inputImgBGR, camera_id);\n' b' }\n' b' catch (const std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::RunInference", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n') new_ri = L( b' std::vector ANSTENSORRTPOSE::RunInference(const cv::Mat& inputImgBGR,const std::string& camera_id)\n' b' {\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b'\n' b' // Validation checks\n' b' if (!_modelLoadValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Cannot load the TensorRT model. Please check if it exists",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (!_licenseValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Runtime license is not valid or expired. Please contact ANSCENTER",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (!_isInitialized) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInference",\n' b' "Model is not initialized",\n' b' __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {\n' b' return {};\n' b' }\n' b' }\n' b' try {\n' b' return DetectObjects(inputImgBGR, camera_id);\n' b' }\n' b' catch (const std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::RunInference", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n') data = replace_once(data, old_ri, new_ri, 'RunInference refactor') # 4. DetectObjects - three-phase mutex # Old body: single lock, try { Preprocess -> runInference -> if(succ) PostProcessPose } # New body: Phase1 lock+Preprocess, Phase2 inference, Phase3 lock+PostProcessPose old_det = L( b' // private\n' b' std::vector ANSTENSORRTPOSE::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {\n' b' std::lock_guard lock(_mutex);\n' b' try {\n' b' // Preprocess the input image\n' b' const auto input = Preprocess(inputImage);\n' b' std::vector>> featureVectors;\n' b' auto succ = m_trtEngine->runInference(input, featureVectors);\n' b' if (succ) {\n' b' // Check if our model does only object detection or also supports segmentation\n' b' std::vector ret;\n' b' const auto& numOutputs = m_trtEngine->getOutputDims().size();\n' b' std::vector featureVector;\n' b' Engine::transformOutput(featureVectors, featureVector);\n' b' const auto& outputDims = m_trtEngine->getOutputDims();\n' b' int numChannels = outputDims[outputDims.size() - 1].d[1];\n' b' ret = PostProcessPose(featureVector, camera_id);\n' b' return ret;\n' b' }\n' b' else {\n' b' this->_logger.LogError("ANSTENSORRTPOSE::DetectObjects", "Error running inference", __FILE__, __LINE__);\n' b' std::vector ret;\n' b' ret.clear();\n' b' return ret;\n' b' }\n' b' }\n' b' catch (std::exception& e) {\n' b' this->_logger.LogFatal("ANSTENSORRTPOSE::DetectObjects", e.what(), __FILE__, __LINE__);\n' b' std::vector ret;\n' b' ret.clear();\n' b' return ret;\n' b' }\n' b' }\n') new_det = L( b' // private\n' b' std::vector ANSTENSORRTPOSE::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {\n' b' // Phase 1: Preprocess under lock\n' b' ImageMetadata meta;\n' b' std::vector> input;\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b' input = Preprocess(inputImage, meta);\n' b' }\n' b' if (input.empty()) return {};\n' b'\n' b' // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot\n' b' std::vector>> featureVectors;\n' b' auto succ = m_trtEngine->runInference(input, featureVectors);\n' b' if (!succ) {\n' b' this->_logger.LogError("ANSTENSORRTPOSE::DetectObjects", "Error running inference", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' // Phase 3: Postprocess under lock\n' b' std::lock_guard lock(_mutex);\n' b' std::vector featureVector;\n' b' Engine::transformOutput(featureVectors, featureVector);\n' b' return PostProcessPose(featureVector, camera_id, meta);\n' b' }\n') data = replace_once(data, old_det, new_det, 'DetectObjects refactor') # 5a. Preprocess - remove inner lock, add outMeta param old_pre_sig = L( b' std::vector> ANSTENSORRTPOSE::Preprocess(const cv::Mat& inputImage) {\n' b' std::lock_guard lock(_mutex);\n') new_pre_sig = L( b' std::vector> ANSTENSORRTPOSE::Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta) {\n') data = replace_once(data, old_pre_sig, new_pre_sig, 'Preprocess sig + remove lock') # 5b. Preprocess member writes -> outMeta # Actual file has blank line + "// Set image size parameters" comment before the member writes old_pw = L( b' stream.waitForCompletion();\n' b'\n' b' // Set image size parameters\n' b' m_imgHeight = imgRGB.rows;\n' b' m_imgWidth = imgRGB.cols;\n' b' if (m_imgHeight > 0 && m_imgWidth > 0) {\n' b' m_ratio = 1.f / std::min(inputDims[0].d[2] / static_cast(imgRGB.cols),\n' b' inputDims[0].d[1] / static_cast(imgRGB.rows));\n') new_pw = L( b' stream.waitForCompletion();\n' b'\n' b' // Set image size parameters\n' b' outMeta.imgHeight = imgRGB.rows;\n' b' outMeta.imgWidth = imgRGB.cols;\n' b' if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {\n' b' outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast(imgRGB.cols),\n' b' inputDims[0].d[1] / static_cast(imgRGB.rows));\n') data = replace_once(data, old_pw, new_pw, 'Preprocess member writes') # 5c. Preprocess error log old_pe = L( b' this->_logger.LogFatal("TENSORRTCL::Preprocess",\n' b' "Image height or width is zero after processing (Width: " + std::to_string(m_imgWidth) +\n' b' ", Height: " + std::to_string(m_imgHeight) + ")",\n') new_pe = L( b' this->_logger.LogFatal("TENSORRTCL::Preprocess",\n' b' "Image height or width is zero after processing (Width: " + std::to_string(outMeta.imgWidth) +\n' b' ", Height: " + std::to_string(outMeta.imgHeight) + ")",\n') data = replace_once(data, old_pe, new_pe, 'Preprocess error log') # 6a. PostProcessPose - remove lock, add const ImageMetadata& meta param old_ppp_sig = L( b' std::vector ANSTENSORRTPOSE::PostProcessPose(std::vector& featureVector, const std::string& camera_id) {\n' b' std::lock_guard lock(_mutex);\n') new_ppp_sig = L( b' std::vector ANSTENSORRTPOSE::PostProcessPose(std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta) {\n') data = replace_once(data, old_ppp_sig, new_ppp_sig, 'PostProcessPose sig + remove lock') # 6b/c/d. Replace m_ratio/m_imgWidth/m_imgHeight -> meta.* in PostProcessPose # After steps 5b+5c fix Preprocess (removes 1/3/3), remaining counts: 6/4/4 (all in PostProcessPose) data = replace_all(data, b'm_ratio', b'meta.ratio', 6, 'meta.ratio (6x in PostProcessPose)') data = replace_all(data, b'm_imgWidth', b'meta.imgWidth', 4, 'meta.imgWidth (4x in PostProcessPose)') data = replace_all(data, b'm_imgHeight', b'meta.imgHeight', 4, 'meta.imgHeight (4x in PostProcessPose)') # 7. Rewrite DetectObjectsBatch + PreprocessBatch + PostProcessPoseBatch beginning # Use anchor-based slice replacement to handle the exact bytes without hardcoding the full block new_batch_all = L( b' std::vector> ANSTENSORRTPOSE::DetectObjectsBatch(const std::vector& inputImages, const std::string& camera_id) {\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b' if (inputImages.empty()) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n' b'\n' b' // Auto-split if batch exceeds engine capacity\n' b' const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1;\n' b' if (static_cast(inputImages.size()) > maxBatch) {\n' b' const size_t numImages = inputImages.size();\n' b' std::vector> chunks;\n' b' for (size_t start = 0; start < numImages; start += static_cast(maxBatch)) {\n' b' const size_t end = std::min(start + static_cast(maxBatch), numImages);\n' b' chunks.emplace_back(inputImages.begin() + start, inputImages.begin() + end);\n' b' }\n' b' std::vector>>> futures;\n' b' futures.reserve(chunks.size());\n' b' for (size_t i = 0; i < chunks.size(); ++i) {\n' b' futures.push_back(std::async(std::launch::async,\n' b' [this, c = chunks[i], cid = camera_id]() {\n' b' return DetectObjectsBatch(c, cid);\n' b' }));\n' b' }\n' b' std::vector> allResults;\n' b' allResults.reserve(numImages);\n' b' for (auto& fut : futures) {\n' b' auto chunkResults = fut.get();\n' b' for (auto& r : chunkResults) allResults.push_back(std::move(r));\n' b' }\n' b' return allResults;\n' b' }\n' b'\n' b' _logger.LogInfo("ANSTENSORRTPOSE::DetectObjectsBatch",\n' b' "Processing batch of " + std::to_string(inputImages.size()) + " images", __FILE__, __LINE__);\n' b'\n' b' // Phase 1: Preprocess under brief lock\n' b' BatchMetadata metadata;\n' b' std::vector> inputs;\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b' inputs = PreprocessBatch(inputImages, metadata);\n' b' }\n' b' if (inputs.empty() || inputs[0].empty()) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot\n' b' std::vector>> featureVectors;\n' b' auto succ = m_trtEngine->runInference(inputs, featureVectors);\n' b' if (!succ) {\n' b' _logger.LogError("ANSTENSORRTPOSE::DetectObjectsBatch", "Error running inference", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b'\n' b' // Phase 3: Parallel postprocessing -- each image is independent\n' b' const size_t numBatch = featureVectors.size();\n' b' std::vector> batchDetections(numBatch);\n' b' std::vector>> postFutures;\n' b' postFutures.reserve(numBatch);\n' b' for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) {\n' b' const auto& batchOutput = featureVectors[batchIdx];\n' b' std::vector fv =\n' b' batchOutput.empty() ? std::vector{} : batchOutput[0];\n' b' postFutures.push_back(std::async(std::launch::async,\n' b' [this, fv = std::move(fv), cid = camera_id,\n' b' idx = batchIdx, &metadata]() mutable {\n' b' return PostProcessPoseBatch(fv, cid, idx, metadata);\n' b' }));\n' b' }\n' b' for (size_t i = 0; i < numBatch; ++i)\n' b' batchDetections[i] = postFutures[i].get();\n' b'\n' b' _logger.LogInfo("ANSTENSORRTPOSE::DetectObjectsBatch",\n' b' "Batch processing complete. Images: " + std::to_string(numBatch), __FILE__, __LINE__);\n' b' return batchDetections;\n' b' }\n' b' std::vector> ANSTENSORRTPOSE::PreprocessBatch(const std::vector& inputImages, BatchMetadata& outMetadata) {\n' b' try {\n' b' if (!_licenseValid) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch", "Invalid license", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' const auto& inputDims = m_trtEngine->getInputDims();\n' b' const int inputH = inputDims[0].d[1];\n' b' const int inputW = inputDims[0].d[2];\n' b' outMetadata.imgHeights.resize(inputImages.size());\n' b' outMetadata.imgWidths.resize(inputImages.size());\n' b' outMetadata.ratios.resize(inputImages.size());\n' b' std::vector batchProcessed;\n' b' batchProcessed.reserve(inputImages.size());\n' b' cv::cuda::Stream stream;\n' b' for (size_t i = 0; i < inputImages.size(); ++i) {\n' b' const auto& inputImage = inputImages[i];\n' b' if (inputImage.empty()) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch",\n' b' "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' cv::cuda::GpuMat img;\n' b' if (inputImage.channels() == 1) {\n' b' cv::Mat img3Channel;\n' b' cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);\n' b' img.upload(img3Channel, stream);\n' b' }\n' b' else {\n' b' img.upload(inputImage, stream);\n' b' }\n' b' cv::cuda::GpuMat imgRGB;\n' b' cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);\n' b' outMetadata.imgHeights[i] = imgRGB.rows;\n' b' outMetadata.imgWidths[i] = imgRGB.cols;\n' b' if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch",\n' b' "Image " + std::to_string(i) + " has invalid dimensions (Width: " +\n' b' std::to_string(outMetadata.imgWidths[i]) + ", Height: " +\n' b' std::to_string(outMetadata.imgHeights[i]) + ")", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast(imgRGB.cols),\n' b' inputH / static_cast(imgRGB.rows));\n' b' cv::cuda::GpuMat resized = imgRGB;\n' b' if (resized.rows != inputH || resized.cols != inputW) {\n' b' resized = Engine::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);\n' b' }\n' b' batchProcessed.push_back(std::move(resized));\n' b' }\n' b' stream.waitForCompletion();\n' b' std::vector> inputs;\n' b' inputs.push_back(std::move(batchProcessed));\n' b' return inputs;\n' b' }\n' b' catch (const std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n' b' std::vector ANSTENSORRTPOSE::PostProcessPoseBatch(std::vector& featureVector, const std::string& camera_id, size_t batchIdx, const BatchMetadata& metadata) {\n' b' try {\n' b' const auto& outputDims = m_trtEngine->getOutputDims();\n' b' auto numChannels = outputDims[0].d[1];\n' b' auto numAnchors = outputDims[0].d[2];\n' b' float ratio = metadata.ratios[batchIdx];\n' b' float imgWidth = static_cast(metadata.imgWidths[batchIdx]);\n' b' float imgHeight = static_cast(metadata.imgHeights[batchIdx]);\n') data = replace_slice(data, start_anchor=L(b' std::vector ANSTENSORRTPOSE::DetectObjectsBatch('), end_anchor=L(b' float imgHeight = static_cast(m_batchImgHeights[batchIdx]);\n'), new_content=new_batch_all, label='DetectObjectsBatch+PreprocessBatch+PostProcessPoseBatch rewrite') # 8. Add RunInferencesBatch before namespace close old_ns_close = L( b' catch (std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PostProcessPoseBatch", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n' b'}\n') new_ns_close = L( b' catch (std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::PostProcessPoseBatch", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n' b' std::vector> ANSTENSORRTPOSE::RunInferencesBatch(\n' b' const std::vector& inputs, const std::string& camera_id)\n' b' {\n' b' {\n' b' std::lock_guard lock(_mutex);\n' b' if (!_modelLoadValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInferencesBatch",\n' b' "Model not loaded", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' if (!_licenseValid) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInferencesBatch",\n' b' "Invalid license", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' if (!_isInitialized) {\n' b' _logger.LogError("ANSTENSORRTPOSE::RunInferencesBatch",\n' b' "Engine not initialized", __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' if (inputs.empty()) return {};\n' b' }\n' b' try {\n' b' return DetectObjectsBatch(inputs, camera_id);\n' b' }\n' b' catch (const std::exception& e) {\n' b' _logger.LogFatal("ANSTENSORRTPOSE::RunInferencesBatch", e.what(), __FILE__, __LINE__);\n' b' return {};\n' b' }\n' b' }\n' b'}\n') data = replace_once(data, old_ns_close, new_ns_close, 'Add RunInferencesBatch + namespace close') # Summary print(f'\nOriginal size: {orig_size} bytes') print(f'New size: {len(data)} bytes') if errors: print(f'\n{len(errors)} ERROR(S) -- file NOT written') sys.exit(1) else: with open(CPP, 'wb') as f: f.write(data) print('\nOK -- file written')