You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat: add error handler callbacks for batch span processor
Add optional error handler callbacks to BatchSpanProcessor to provide
programmatic access to background errors while maintaining internal
SDK logging for observability.
Changes:
- Add error_handler callback to BatchSpanProcessor for background errors
(span drops, export failures during timer-based flushes)
- Return errors from shutdown_with_timeout when spans were dropped
- Maintain all internal otel_debug/otel_warn/otel_error logging
- Update BatchSpanProcessor builder with with_error_handler() method
- Change dropped_spans_count to Arc<AtomicUsize> for shared ownership
- Both log AND invoke error handler when errors occur
This gives users explicit control over error handling via opt-in callbacks
while preserving default observability through internal SDK logging. The
error handler is a supplement to logging, not a replacement.
Addresses error handling requirements from the OpenTelemetry specification
which states that SDKs MAY expose callbacks for self-diagnostics AND that
errors should be logged when they cannot be returned to the caller.
message = "BatchSpanProcessor dropped a Span due to queue full. No further log will be emitted for further drops until Shutdown. During Shutdown time, a log will be emitted with exact count of total spans dropped.");
582
609
}
610
+
611
+
// Also invoke error handler if registered
612
+
ifletSome(ref handler) = self.error_handler{
613
+
handler(OTelSdkError::InternalFailure(format!(
614
+
"Span dropped due to full queue (total dropped: {})",
// Given background thread is the only receiver, and it's
586
621
// disconnected, it indicates the thread is shutdown
587
-
otel_warn!(
588
-
name:"BatchSpanProcessor.OnEnd.AfterShutdown",
589
-
message = "Spans are being emitted even after Shutdown. This indicates incorrect lifecycle management of TracerProvider in application. Spans will not be exported."
590
-
);
622
+
let previous_count = self.dropped_spans_count.fetch_add(1,Ordering::Relaxed);
623
+
if previous_count == 0{
624
+
otel_warn!(
625
+
name:"BatchSpanProcessor.OnEnd.AfterShutdown",
626
+
message = "Spans are being emitted even after Shutdown. This indicates incorrect lifecycle management of TracerProvider in application. Spans will not be exported."
627
+
);
628
+
}
629
+
630
+
// Also invoke error handler if registered
631
+
ifletSome(ref handler) = self.error_handler{
632
+
handler(OTelSdkError::InternalFailure(format!(
633
+
"Span dropped due to processor already shut down (total dropped: {}). This indicates incorrect lifecycle management of TracerProvider.",
634
+
previous_count + 1
635
+
)));
636
+
}
591
637
}
592
638
}
593
639
}
@@ -609,7 +655,7 @@ impl SpanProcessor for BatchSpanProcessor {
609
655
}
610
656
})?,
611
657
Err(std::sync::mpsc::TrySendError::Full(_)) => {
612
-
// If the control message could not be sent, emit a warning.
658
+
// If the control message could not be sent, emit a debug log.
message = "Control message to flush the worker thread could not be sent as the control channel is full. This can occur if user repeatedly calls force_flush/shutdown without finishing the previous call."
@@ -621,7 +667,7 @@ impl SpanProcessor for BatchSpanProcessor {
621
667
// disconnected, it indicates the thread is shutdown
let dropped_spans = self.dropped_spans_count.load(Ordering::Relaxed);
635
-
let max_queue_size = self.max_queue_size;
636
681
if dropped_spans > 0{
682
+
// Log warning for observability (always happens)
637
683
otel_warn!(
638
-
name:"BatchSpanProcessor.SpansDropped",
639
-
dropped_span_count = dropped_spans,
640
-
max_queue_size = max_queue_size,
641
-
message = "Spans were dropped due to a queue being full. The count represents the total count of spans dropped in the lifetime of this BatchSpanProcessor. Consider increasing the queue size and/or decrease delay between intervals."
684
+
name:"BatchSpanProcessor.Shutdown",
685
+
dropped_spans = dropped_spans,
686
+
max_queue_size = self.max_queue_size,
687
+
message = "Spans were dropped due to a full queue. The count represents the total count of span records dropped in the lifetime of the BatchSpanProcessor. Consider increasing the queue size and/or decrease delay between intervals."
642
688
);
689
+
690
+
// Also return error so user code can handle it programmatically
691
+
returnErr(OTelSdkError::InternalFailure(format!(
692
+
"BatchSpanProcessor dropped {} spans during its lifetime due to full queue (max queue size: {}). Consider increasing queue size or decreasing delay between intervals.",
0 commit comments