Use software decoder by default

This commit is contained in:
2026-04-04 20:19:54 +11:00
parent 3a21026790
commit e134ebdf15
24 changed files with 693 additions and 215 deletions

View File

@@ -607,6 +607,7 @@ bool Engine<T>::runInferenceFromPool(
// harmless — the second one finds a fresh slot immediately.
InferenceSlot* slot = nullptr;
bool kickedGrowth = false;
auto _poolAcquireStart = std::chrono::steady_clock::now();
{
std::unique_lock<std::mutex> lock(m_slotMutex);
@@ -630,6 +631,8 @@ bool Engine<T>::runInferenceFromPool(
}
if (!slot) {
ANS_DBG("TRT_Pool", "ALL SLOTS BUSY: %zu slots, active=%d — waiting for free slot",
n, m_activeCount.load());
// All slots busy. In elastic mode, proactively grow the
// pool in the background so the next request has a slot
// on a different GPU. We only kick once per wait cycle.
@@ -672,7 +675,17 @@ bool Engine<T>::runInferenceFromPool(
}
// -- 3. Still no slot => reject ---------------------------------------
{
double _acquireMs = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - _poolAcquireStart).count();
if (_acquireMs > 100.0) {
ANS_DBG("TRT_Pool", "SLOW slot acquire: %.1fms slot=%p gpu=%d active=%d/%zu",
_acquireMs, (void*)slot, slot ? slot->deviceIndex : -1,
m_activeCount.load(), m_slots.size());
}
}
if (!slot) {
ANS_DBG("TRT_Pool", "ERROR: No slot available — all %zu slots busy, timeout", m_slots.size());
std::string errMsg = "[Engine] runInferenceFromPool FAIL: Capacity reached -- all "
+ std::to_string(m_activeCount.load()) + "/" + std::to_string(m_totalCapacity)
+ " slot(s) busy"
@@ -699,12 +712,23 @@ bool Engine<T>::runInferenceFromPool(
if (currentDev != slot->deviceIndex) {
cudaSetDevice(slot->deviceIndex);
}
ANS_DBG("TRT_Pool", "Slot dispatch: gpu=%d active=%d/%zu",
slot->deviceIndex, m_activeCount.load(), m_slots.size());
auto _slotStart = std::chrono::steady_clock::now();
result = slot->engine->runInference(inputs, featureVectors);
auto _slotEnd = std::chrono::steady_clock::now();
double _slotMs = std::chrono::duration<double, std::milli>(_slotEnd - _slotStart).count();
if (_slotMs > 500.0) {
ANS_DBG("TRT_Pool", "SLOW slot inference: %.1fms gpu=%d active=%d/%zu",
_slotMs, slot->deviceIndex, m_activeCount.load(), m_slots.size());
}
}
catch (const std::exception& ex) {
ANS_DBG("TRT_Pool", "ERROR: runInference threw: %s", ex.what());
std::cout << "Error [Pool]: runInference threw: " << ex.what() << std::endl;
}
catch (...) {
ANS_DBG("TRT_Pool", "ERROR: runInference threw unknown exception");
std::cout << "Error [Pool]: runInference threw unknown exception" << std::endl;
}