From b15f8141e467dcddbeb53f16c2edfa64080d390d Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 28 Mar 2026 14:57:49 +0100 Subject: [PATCH] DPL: exponential back-off for missing resources. --- Framework/Core/src/DataProcessingDevice.cxx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index 0fa70947bf18c..04ecdea12db6e 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -1377,6 +1377,7 @@ void DataProcessingDevice::Run() std::atomic numberOfUnscheduledSinceLastScheduled = 0; std::atomic numberOfUnscheduled = 0; std::atomic numberOfScheduled = 0; + std::atomic nextWarnAt = 1; }; static SchedulingStats schedulingStats; O2_SIGNPOST_ID_GENERATE(sid, scheduling); @@ -1387,6 +1388,7 @@ void DataProcessingDevice::Run() schedulingStats.lastScheduled = uv_now(state.loop); schedulingStats.numberOfScheduled++; schedulingStats.numberOfUnscheduledSinceLastScheduled = 0; + schedulingStats.nextWarnAt = 1; O2_SIGNPOST_EVENT_EMIT(scheduling, sid, "Run", "Enough resources to schedule computation on stream %d", streamRef.index); if (dplEnableMultithreding) [[unlikely]] { stream.task = &handle; @@ -1396,12 +1398,12 @@ void DataProcessingDevice::Run() run_completion(&handle, 0); } } else { - if (schedulingStats.numberOfUnscheduledSinceLastScheduled > 100 || - (uv_now(state.loop) - schedulingStats.lastScheduled) > 30000) { + if (schedulingStats.numberOfUnscheduledSinceLastScheduled >= schedulingStats.nextWarnAt) { O2_SIGNPOST_EVENT_EMIT_WARN(scheduling, sid, "Run", "Not enough resources to schedule computation. %zu skipped so far. Last scheduled at %zu. Data is not lost and it will be scheduled again.", schedulingStats.numberOfUnscheduledSinceLastScheduled.load(), schedulingStats.lastScheduled.load()); + schedulingStats.nextWarnAt = schedulingStats.nextWarnAt * 2; } else { O2_SIGNPOST_EVENT_EMIT(scheduling, sid, "Run", "Not enough resources to schedule computation. %zu skipped so far. Last scheduled at %zu. Data is not lost and it will be scheduled again.",