From 23691f4e2f723c5937254408cb96b7aca005c1e0 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 25 Mar 2026 19:21:16 +0100 Subject: [PATCH 01/12] fix(endpoint): add parentSpanId and real duration to datadog.Endpoint --- ddprof-lib/src/main/cpp/event.h | 10 ++++++--- ddprof-lib/src/main/cpp/flightRecorder.cpp | 5 +++-- ddprof-lib/src/main/cpp/javaApi.cpp | 7 ++++--- ddprof-lib/src/main/cpp/jfrMetadata.cpp | 3 ++- .../com/datadoghq/profiler/JavaProfiler.java | 21 ++++++++++++++++--- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index e9363165f..bd3f9fabe 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -155,12 +155,16 @@ class WallClockEpochEvent { class TraceRootEvent { public: u64 _local_root_span_id; + u64 _parent_span_id; + u64 _start_ticks; u32 _label; u32 _operation; - TraceRootEvent(u64 local_root_span_id, u32 label, u32 operation) - : _local_root_span_id(local_root_span_id), _label(label), - _operation(operation){}; + TraceRootEvent(u64 local_root_span_id, u64 parent_span_id, u64 start_ticks, + u32 label, u32 operation) + : _local_root_span_id(local_root_span_id), + _parent_span_id(parent_span_id), _start_ticks(start_ticks), + _label(label), _operation(operation){}; }; typedef struct QueueTimeEvent { diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 4adc5f727..054b6e40a 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1531,13 +1531,14 @@ void Recording::recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event) { flushIfNeeded(buf); int start = buf->skip(1); buf->putVar64(T_ENDPOINT); - buf->putVar64(TSC::ticks()); - buf->put8(0); + buf->putVar64(event->_start_ticks); + buf->putVar64(TSC::ticks() - event->_start_ticks); buf->putVar32(tid); buf->put8(0); buf->putVar32(event->_label); buf->putVar32(event->_operation); buf->putVar64(event->_local_root_span_id); + buf->putVar64(event->_parent_span_id); writeEventSizePrefix(buf, start); flushIfNeeded(buf); } diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index a354274c6..f040e8a83 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -199,8 +199,8 @@ Java_com_datadoghq_profiler_JavaProfiler_filterThreadRemove0(JNIEnv *env, extern "C" DLLEXPORT jboolean JNICALL Java_com_datadoghq_profiler_JavaProfiler_recordTrace0( - JNIEnv *env, jclass unused, jlong rootSpanId, jstring endpoint, - jstring operation, jint sizeLimit) { + JNIEnv *env, jclass unused, jlong rootSpanId, jlong parentSpanId, + jlong startTicks, jstring endpoint, jstring operation, jint sizeLimit) { JniString endpoint_str(env, endpoint); u32 endpointLabel = Profiler::instance()->stringLabelMap()->bounded_lookup( endpoint_str.c_str(), endpoint_str.length(), sizeLimit); @@ -212,7 +212,8 @@ Java_com_datadoghq_profiler_JavaProfiler_recordTrace0( operationLabel = Profiler::instance()->contextValueMap()->bounded_lookup( operation_str.c_str(), operation_str.length(), 1 << 16); } - TraceRootEvent event(rootSpanId, endpointLabel, operationLabel); + TraceRootEvent event(rootSpanId, (u64)parentSpanId, (u64)startTicks, + endpointLabel, operationLabel); int tid = ProfiledThread::currentTid(); Profiler::instance()->recordTraceRoot(tid, &event); } diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index 6991a8a12..e4ed169e0 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -176,7 +176,8 @@ void JfrMetadata::initialize( << field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL) << field("endpoint", T_STRING, "Endpoint", F_CPOOL) << field("operation", T_ATTRIBUTE_VALUE, "Operation", F_CPOOL) - << field("localRootSpanId", T_LONG, "Local Root Span ID")) + << field("localRootSpanId", T_LONG, "Local Root Span ID") + << field("parentSpanId", T_LONG, "Parent Span ID")) << (type("datadog.QueueTime", T_QUEUE_TIME, "Queue Time") << category("Datadog") diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index c49b4479e..5221ec634 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -161,11 +161,26 @@ public String execute(String command) throws IllegalArgumentException, IllegalSt return execute0(command); } + /** + * Records the completion of the trace root with causal DAG metadata. + * @param rootSpanId the local root span ID + * @param parentSpanId the parent span ID (0 if none) + * @param startTicks TSC tick captured at span start via {@link #getCurrentTicks()} + * @param endpoint the endpoint/resource name + * @param operation the operation name + * @param sizeLimit max number of distinct endpoints to track + */ + public boolean recordTraceRoot(long rootSpanId, long parentSpanId, long startTicks, + String endpoint, String operation, int sizeLimit) { + return recordTrace0(rootSpanId, parentSpanId, startTicks, endpoint, operation, sizeLimit); + } + /** * Records the completion of the trace root */ + @Deprecated public boolean recordTraceRoot(long rootSpanId, String endpoint, String operation, int sizeLimit) { - return recordTrace0(rootSpanId, endpoint, operation, sizeLimit); + return recordTrace0(rootSpanId, 0L, 0L, endpoint, operation, sizeLimit); } /** @@ -173,7 +188,7 @@ public boolean recordTraceRoot(long rootSpanId, String endpoint, String operatio */ @Deprecated public boolean recordTraceRoot(long rootSpanId, String endpoint, int sizeLimit) { - return recordTrace0(rootSpanId, endpoint, null, sizeLimit); + return recordTrace0(rootSpanId, 0L, 0L, endpoint, null, sizeLimit); } /** @@ -323,7 +338,7 @@ private static ThreadContext initializeThreadContext() { private static native int getTid0(); - private static native boolean recordTrace0(long rootSpanId, String endpoint, String operation, int sizeLimit); + private static native boolean recordTrace0(long rootSpanId, long parentSpanId, long startTicks, String endpoint, String operation, int sizeLimit); private static native int registerConstant0(String value); From 40a3000f6529132d50e9fe27e8bd052264894c57 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 25 Mar 2026 19:28:32 +0100 Subject: [PATCH 02/12] feat(jfr): add datadog.TaskBlock event for blocking intervals --- ddprof-lib/src/main/cpp/event.h | 9 ++++++ ddprof-lib/src/main/cpp/flightRecorder.cpp | 30 +++++++++++++++++++ ddprof-lib/src/main/cpp/flightRecorder.h | 2 ++ ddprof-lib/src/main/cpp/javaApi.cpp | 16 ++++++++++ ddprof-lib/src/main/cpp/jfrMetadata.cpp | 11 +++++++ ddprof-lib/src/main/cpp/jfrMetadata.h | 1 + ddprof-lib/src/main/cpp/profiler.cpp | 12 ++++++++ ddprof-lib/src/main/cpp/profiler.h | 1 + .../com/datadoghq/profiler/JavaProfiler.java | 15 ++++++++++ 9 files changed, 97 insertions(+) diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index bd3f9fabe..45279eaf7 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -167,6 +167,15 @@ class TraceRootEvent { _label(label), _operation(operation){}; }; +typedef struct TaskBlockEvent { + u64 _start_ticks; + u64 _end_ticks; + u64 _span_id; + u64 _root_span_id; + uintptr_t _blocker; + u64 _unblocking_span_id; +} TaskBlockEvent; + typedef struct QueueTimeEvent { u64 _start; u64 _end; diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 054b6e40a..123299f49 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1543,6 +1543,23 @@ void Recording::recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event) { flushIfNeeded(buf); } + +void Recording::recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event) { + flushIfNeeded(buf); + int start = buf->skip(1); + buf->putVar64(T_TASK_BLOCK); + buf->putVar64(event->_start_ticks); + buf->putVar64(event->_end_ticks - event->_start_ticks); + buf->putVar32(tid); + buf->put8(0); + buf->putVar64(event->_span_id); + buf->putVar64(event->_root_span_id); + buf->putVar64((u64)event->_blocker); + buf->putVar64(event->_unblocking_span_id); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { int start = buf->skip(1); buf->putVar64(T_QUEUE_TIME); @@ -1762,6 +1779,19 @@ void FlightRecorder::recordQueueTime(int lock_index, int tid, } } + +void FlightRecorder::recordTaskBlock(int lock_index, int tid, + TaskBlockEvent *event) { + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->recordTaskBlock(buf, tid, event); + } + } +} + void FlightRecorder::recordDatadogSetting(int lock_index, int length, const char *name, const char *value, const char *unit) { diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index c1ab88262..c2e255a1c 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -278,6 +278,7 @@ class Recording { void recordWallClockEpoch(Buffer *buf, WallClockEpochEvent *event); void recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event); void recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event); + void recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event); void recordAllocation(RecordingBuffer *buf, int tid, u64 call_trace_id, AllocEvent *event); void recordHeapLiveObject(Buffer *buf, int tid, u64 call_trace_id, @@ -344,6 +345,7 @@ class FlightRecorder { void wallClockEpoch(int lock_index, WallClockEpochEvent *event); void recordTraceRoot(int lock_index, int tid, TraceRootEvent *event); void recordQueueTime(int lock_index, int tid, QueueTimeEvent *event); + void recordTaskBlock(int lock_index, int tid, TaskBlockEvent *event); bool active() const { return _rec != NULL; } diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index f040e8a83..95eb87925 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -220,6 +220,22 @@ Java_com_datadoghq_profiler_JavaProfiler_recordTrace0( return acceptValue; } + +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_JavaProfiler_recordTaskBlock0( + JNIEnv *env, jclass unused, jlong startTicks, jlong endTicks, + jlong spanId, jlong rootSpanId, jlong blocker, jlong unblockingSpanId) { + TaskBlockEvent event; + event._start_ticks = (u64)startTicks; + event._end_ticks = (u64)endTicks; + event._span_id = (u64)spanId; + event._root_span_id = (u64)rootSpanId; + event._blocker = (uintptr_t)blocker; + event._unblocking_span_id = (u64)unblockingSpanId; + int tid = ProfiledThread::currentTid(); + Profiler::instance()->recordTaskBlock(tid, &event); +} + extern "C" DLLEXPORT jint JNICALL Java_com_datadoghq_profiler_JavaProfiler_registerConstant0(JNIEnv *env, jclass unused, diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index e4ed169e0..c0c785b38 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -179,6 +179,17 @@ void JfrMetadata::initialize( << field("localRootSpanId", T_LONG, "Local Root Span ID") << field("parentSpanId", T_LONG, "Parent Span ID")) + << (type("datadog.TaskBlock", T_TASK_BLOCK, "Task Block") + << category("Datadog") + << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) + << field("duration", T_LONG, "Duration", F_DURATION_TICKS) + << field("eventThread", T_THREAD, "Event Thread", F_CPOOL) + << field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL) + << field("spanId", T_LONG, "Span ID") + << field("localRootSpanId", T_LONG, "Local Root Span ID") + << field("blocker", T_LONG, "Blocker Object Hash", F_UNSIGNED) + << field("unblockingSpanId", T_LONG, "Unblocking Span ID")) + << (type("datadog.QueueTime", T_QUEUE_TIME, "Queue Time") << category("Datadog") << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.h b/ddprof-lib/src/main/cpp/jfrMetadata.h index 77da96d3f..ecedade47 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.h +++ b/ddprof-lib/src/main/cpp/jfrMetadata.h @@ -78,6 +78,7 @@ enum JfrType { T_DATADOG_CLASSREF_CACHE = 124, T_DATADOG_COUNTER = 125, T_UNWIND_FAILURE = 126, + T_TASK_BLOCK = 127, T_ANNOTATION = 200, T_LABEL = 201, T_CATEGORY = 202, diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index cbcfb7284..f258ecf37 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -940,6 +940,18 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) { _locks[lock_index].unlock(); } + +void Profiler::recordTaskBlock(int tid, TaskBlockEvent *event) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + return; + } + _jfr.recordTaskBlock(lock_index, tid, event); + _locks[lock_index].unlock(); +} + void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, ASGCT_CallFrame *frames, bool truncated, jint event_type, Event *event) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 285c64805..cbe876306 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -375,6 +375,7 @@ class alignas(alignof(SpinLock)) Profiler { void recordWallClockEpoch(int tid, WallClockEpochEvent *event); void recordTraceRoot(int tid, TraceRootEvent *event); void recordQueueTime(int tid, QueueTimeEvent *event); + void recordTaskBlock(int tid, TaskBlockEvent *event); void writeLog(LogLevel level, const char *message); void writeLog(LogLevel level, const char *message, size_t len); void writeDatadogProfilerSetting(int tid, int length, const char *name, diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 5221ec634..0dc81982a 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -191,6 +191,19 @@ public boolean recordTraceRoot(long rootSpanId, String endpoint, int sizeLimit) return recordTrace0(rootSpanId, 0L, 0L, endpoint, null, sizeLimit); } + /** + * Records a blocking interval for the current span. + * @param startTicks TSC tick at block entry + * @param endTicks TSC tick at block exit + * @param spanId the span that was blocked + * @param rootSpanId the local root span ID + * @param blocker identity hash code of the blocking object + */ + public void recordTaskBlock(long startTicks, long endTicks, + long spanId, long rootSpanId, long blocker, long unblockingSpanId) { + recordTaskBlock0(startTicks, endTicks, spanId, rootSpanId, blocker, unblockingSpanId); + } + /** * Add the given thread to the set of profiled threads. * 'filter' option must be enabled to use this method. @@ -340,6 +353,8 @@ private static ThreadContext initializeThreadContext() { private static native boolean recordTrace0(long rootSpanId, long parentSpanId, long startTicks, String endpoint, String operation, int sizeLimit); + private static native void recordTaskBlock0(long startTicks, long endTicks, long spanId, long rootSpanId, long blocker, long unblockingSpanId); + private static native int registerConstant0(String value); private static native void dump0(String recordingFilePath); From 63b44a5fbd0dff3712489f7c8735cd7384994af6 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 25 Mar 2026 19:30:13 +0100 Subject: [PATCH 03/12] feat(jfr): add datadog.SpanNode event for per-span DAG nodes --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 30 +++++++++++++++++-- ddprof-lib/src/main/cpp/flightRecorder.h | 4 +++ ddprof-lib/src/main/cpp/javaApi.cpp | 13 +++++++- ddprof-lib/src/main/cpp/jfrMetadata.cpp | 10 +++++++ ddprof-lib/src/main/cpp/jfrMetadata.h | 1 + ddprof-lib/src/main/cpp/profiler.cpp | 13 +++++++- ddprof-lib/src/main/cpp/profiler.h | 2 ++ .../com/datadoghq/profiler/JavaProfiler.java | 9 ++++++ 8 files changed, 78 insertions(+), 4 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 123299f49..c8b8e3893 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1543,7 +1543,6 @@ void Recording::recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event) { flushIfNeeded(buf); } - void Recording::recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event) { flushIfNeeded(buf); int start = buf->skip(1); @@ -1560,6 +1559,22 @@ void Recording::recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event) { flushIfNeeded(buf); } +void Recording::recordSpanNode(Buffer *buf, int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource) { + flushIfNeeded(buf); + int start = buf->skip(1); + buf->putVar64(T_SPAN_NODE); + buf->putVar64(spanId); + buf->putVar64(parentSpanId); + buf->putVar64(rootSpanId); + buf->putVar64(startNanos); + buf->putVar64(durationNanos); + buf->putVar32(encodedOperation); + buf->putVar32(encodedResource); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { int start = buf->skip(1); buf->putVar64(T_QUEUE_TIME); @@ -1779,7 +1794,6 @@ void FlightRecorder::recordQueueTime(int lock_index, int tid, } } - void FlightRecorder::recordTaskBlock(int lock_index, int tid, TaskBlockEvent *event) { OptionalSharedLockGuard locker(&_rec_lock); @@ -1792,6 +1806,18 @@ void FlightRecorder::recordTaskBlock(int lock_index, int tid, } } +void FlightRecorder::recordSpanNode(int lock_index, int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource) { + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->recordSpanNode(buf, tid, spanId, parentSpanId, rootSpanId, startNanos, durationNanos, encodedOperation, encodedResource); + } + } +} + void FlightRecorder::recordDatadogSetting(int lock_index, int length, const char *name, const char *value, const char *unit) { diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index c2e255a1c..3fa4fdf13 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -279,6 +279,8 @@ class Recording { void recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event); void recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event); void recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event); + void recordSpanNode(Buffer *buf, int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource); void recordAllocation(RecordingBuffer *buf, int tid, u64 call_trace_id, AllocEvent *event); void recordHeapLiveObject(Buffer *buf, int tid, u64 call_trace_id, @@ -346,6 +348,8 @@ class FlightRecorder { void recordTraceRoot(int lock_index, int tid, TraceRootEvent *event); void recordQueueTime(int lock_index, int tid, QueueTimeEvent *event); void recordTaskBlock(int lock_index, int tid, TaskBlockEvent *event); + void recordSpanNode(int lock_index, int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource); bool active() const { return _rec != NULL; } diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 95eb87925..baa6ea607 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -220,7 +220,6 @@ Java_com_datadoghq_profiler_JavaProfiler_recordTrace0( return acceptValue; } - extern "C" DLLEXPORT void JNICALL Java_com_datadoghq_profiler_JavaProfiler_recordTaskBlock0( JNIEnv *env, jclass unused, jlong startTicks, jlong endTicks, @@ -236,6 +235,18 @@ Java_com_datadoghq_profiler_JavaProfiler_recordTaskBlock0( Profiler::instance()->recordTaskBlock(tid, &event); } +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_JavaProfiler_recordSpanNode0( + JNIEnv *env, jclass unused, + jlong spanId, jlong parentSpanId, jlong rootSpanId, + jlong startNanos, jlong durationNanos, + jint encodedOperation, jint encodedResource) { + int tid = ProfiledThread::currentTid(); + Profiler::instance()->recordSpanNode(tid, (u64)spanId, (u64)parentSpanId, (u64)rootSpanId, + (u64)startNanos, (u64)durationNanos, + (u32)encodedOperation, (u32)encodedResource); +} + extern "C" DLLEXPORT jint JNICALL Java_com_datadoghq_profiler_JavaProfiler_registerConstant0(JNIEnv *env, jclass unused, diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index c0c785b38..d7777c257 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -190,6 +190,16 @@ void JfrMetadata::initialize( << field("blocker", T_LONG, "Blocker Object Hash", F_UNSIGNED) << field("unblockingSpanId", T_LONG, "Unblocking Span ID")) + << (type("datadog.SpanNode", T_SPAN_NODE, "Span Node") + << category("Datadog") + << field("spanId", T_LONG, "Span ID") + << field("parentSpanId", T_LONG, "Parent Span ID") + << field("localRootSpanId", T_LONG, "Local Root Span ID") + << field("startNanos", T_LONG, "Start Time (epoch ns)") + << field("durationNanos", T_LONG, "Duration (ns)") + << field("encodedOperation", T_INT, "Encoded Operation Name") + << field("encodedResource", T_INT, "Encoded Resource Name")) + << (type("datadog.QueueTime", T_QUEUE_TIME, "Queue Time") << category("Datadog") << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.h b/ddprof-lib/src/main/cpp/jfrMetadata.h index ecedade47..bc2997e2e 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.h +++ b/ddprof-lib/src/main/cpp/jfrMetadata.h @@ -79,6 +79,7 @@ enum JfrType { T_DATADOG_COUNTER = 125, T_UNWIND_FAILURE = 126, T_TASK_BLOCK = 127, + T_SPAN_NODE = 128, T_ANNOTATION = 200, T_LABEL = 201, T_CATEGORY = 202, diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f258ecf37..47ca7ca7d 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -940,7 +940,6 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) { _locks[lock_index].unlock(); } - void Profiler::recordTaskBlock(int tid, TaskBlockEvent *event) { u32 lock_index = getLockIndex(tid); if (!_locks[lock_index].tryLock() && @@ -952,6 +951,18 @@ void Profiler::recordTaskBlock(int tid, TaskBlockEvent *event) { _locks[lock_index].unlock(); } +void Profiler::recordSpanNode(int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + return; + } + _jfr.recordSpanNode(lock_index, tid, spanId, parentSpanId, rootSpanId, startNanos, durationNanos, encodedOperation, encodedResource); + _locks[lock_index].unlock(); +} + void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, ASGCT_CallFrame *frames, bool truncated, jint event_type, Event *event) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index cbe876306..c6f1936e9 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -376,6 +376,8 @@ class alignas(alignof(SpinLock)) Profiler { void recordTraceRoot(int tid, TraceRootEvent *event); void recordQueueTime(int tid, QueueTimeEvent *event); void recordTaskBlock(int tid, TaskBlockEvent *event); + void recordSpanNode(int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, + u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource); void writeLog(LogLevel level, const char *message); void writeLog(LogLevel level, const char *message, size_t len); void writeDatadogProfilerSetting(int tid, int length, const char *name, diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 0dc81982a..87cb63a9f 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -204,6 +204,13 @@ public void recordTaskBlock(long startTicks, long endTicks, recordTaskBlock0(startTicks, endTicks, spanId, rootSpanId, blocker, unblockingSpanId); } + public void recordSpanNode(long spanId, long parentSpanId, long rootSpanId, + long startNanos, long durationNanos, + int encodedOperation, int encodedResource) { + recordSpanNode0(spanId, parentSpanId, rootSpanId, startNanos, durationNanos, + encodedOperation, encodedResource); + } + /** * Add the given thread to the set of profiled threads. * 'filter' option must be enabled to use this method. @@ -355,6 +362,8 @@ private static ThreadContext initializeThreadContext() { private static native void recordTaskBlock0(long startTicks, long endTicks, long spanId, long rootSpanId, long blocker, long unblockingSpanId); + private static native void recordSpanNode0(long spanId, long parentSpanId, long rootSpanId, long startNanos, long durationNanos, int encodedOperation, int encodedResource); + private static native int registerConstant0(String value); private static native void dump0(String recordingFilePath); From 8a18576dd54af59f874b766a257e7417086b0248 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 8 Apr 2026 16:07:26 +0200 Subject: [PATCH 04/12] feat(jfr): add standard startTime/duration/eventThread to datadog.SpanNode --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 15 +++++++++++++-- ddprof-lib/src/main/cpp/jfrMetadata.cpp | 3 +++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index c8b8e3893..08c8abf4e 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1561,14 +1561,25 @@ void Recording::recordTaskBlock(Buffer *buf, int tid, TaskBlockEvent *event) { void Recording::recordSpanNode(Buffer *buf, int tid, u64 spanId, u64 parentSpanId, u64 rootSpanId, u64 startNanos, u64 durationNanos, u32 encodedOperation, u32 encodedResource) { + // Convert epoch nanoseconds to JFR ticks so that standard JFR tooling (JMC, Mission + // Control) can correlate SpanNode events with other events on the recording timeline. + // _start_time is in microseconds; multiply by 1000 to get the recording epoch in nanos. + u64 start_epoch_nanos = _start_time * 1000ULL; + u64 startTicks = _start_ticks + (u64)((double)(long long)(startNanos - start_epoch_nanos) + * TSC::frequency() / NANOTIME_FREQ); + u64 durationTicks = (u64)((double)durationNanos * TSC::frequency() / NANOTIME_FREQ); + flushIfNeeded(buf); int start = buf->skip(1); buf->putVar64(T_SPAN_NODE); + buf->putVar64(startTicks); // startTime (F_TIME_TICKS) + buf->putVar64(durationTicks); // duration (F_DURATION_TICKS) + buf->putVar32(tid); // eventThread (F_CPOOL) buf->putVar64(spanId); buf->putVar64(parentSpanId); buf->putVar64(rootSpanId); - buf->putVar64(startNanos); - buf->putVar64(durationNanos); + buf->putVar64(startNanos); // startNanos — epoch ns, used by backend extractor + buf->putVar64(durationNanos); // durationNanos — ns buf->putVar32(encodedOperation); buf->putVar32(encodedResource); writeEventSizePrefix(buf, start); diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index d7777c257..e832ddadf 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -192,6 +192,9 @@ void JfrMetadata::initialize( << (type("datadog.SpanNode", T_SPAN_NODE, "Span Node") << category("Datadog") + << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) + << field("duration", T_LONG, "Duration", F_DURATION_TICKS) + << field("eventThread", T_THREAD, "Event Thread", F_CPOOL) << field("spanId", T_LONG, "Span ID") << field("parentSpanId", T_LONG, "Parent Span ID") << field("localRootSpanId", T_LONG, "Local Root Span ID") From 068770c58b4cb4d113c1250528cdc54d685e14fe Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 8 Apr 2026 16:13:20 +0200 Subject: [PATCH 05/12] feat(jfr): add submittingSpanId to datadog.QueueTime for causal DAG edges --- ddprof-lib/src/main/cpp/event.h | 1 + ddprof-lib/src/main/cpp/flightRecorder.cpp | 1 + ddprof-lib/src/main/cpp/javaApi.cpp | 4 +++- ddprof-lib/src/main/cpp/jfrMetadata.cpp | 3 ++- .../src/main/java/com/datadoghq/profiler/JavaProfiler.java | 7 ++++--- .../java/com/datadoghq/profiler/queue/QueueTimeTest.java | 2 +- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index 45279eaf7..0f96d251b 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -184,6 +184,7 @@ typedef struct QueueTimeEvent { u32 _origin; u32 _queueType; u32 _queueLength; + u64 _submitting_span_id; } QueueTimeEvent; #endif // _EVENT_H diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 08c8abf4e..62d8589a8 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1598,6 +1598,7 @@ void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { buf->putVar64(event->_queueType); buf->putVar64(event->_queueLength); writeContext(buf, Contexts::get()); + buf->putVar64(event->_submitting_span_id); writeEventSizePrefix(buf, start); flushIfNeeded(buf); } diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index baa6ea607..b47cd159a 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -319,7 +319,8 @@ static int dictionarizeClassName(JNIEnv* env, jstring className) { extern "C" DLLEXPORT void JNICALL Java_com_datadoghq_profiler_JavaProfiler_recordQueueEnd0( JNIEnv *env, jclass unused, jlong startTime, jlong endTime, jstring task, - jstring scheduler, jthread origin, jstring queueType, jint queueLength) { + jstring scheduler, jthread origin, jstring queueType, jint queueLength, + jlong submittingSpanId) { int tid = ProfiledThread::currentTid(); if (tid < 0) { return; @@ -349,6 +350,7 @@ Java_com_datadoghq_profiler_JavaProfiler_recordQueueEnd0( event._origin = origin_tid; event._queueType = queue_type_offset; event._queueLength = queueLength; + event._submitting_span_id = (u64)submittingSpanId; Profiler::instance()->recordQueueTime(tid, &event); } diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index e832ddadf..15d185a89 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -214,7 +214,8 @@ void JfrMetadata::initialize( << field("queueType", T_CLASS, "Queue Type", F_CPOOL) << field("queueLength", T_INT, "Queue Length on Entry") << field("spanId", T_LONG, "Span ID") - << field("localRootSpanId", T_LONG, "Local Root Span ID") || + << field("localRootSpanId", T_LONG, "Local Root Span ID") + << field("submittingSpanId", T_LONG, "Submitting Span ID") || contextAttributes) << (type("datadog.HeapUsage", T_HEAP_USAGE, "JVM Heap Usage") diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 87cb63a9f..1a2fb55f8 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -314,8 +314,9 @@ public void recordQueueTime(long startTicks, Class scheduler, Class queueType, int queueLength, - Thread origin) { - recordQueueEnd0(startTicks, endTicks, task.getName(), scheduler.getName(), origin, queueType.getName(), queueLength); + Thread origin, + long submittingSpanId) { + recordQueueEnd0(startTicks, endTicks, task.getName(), scheduler.getName(), origin, queueType.getName(), queueLength, submittingSpanId); } /** @@ -374,7 +375,7 @@ private static ThreadContext initializeThreadContext() { private static native void recordSettingEvent0(String name, String value, String unit); - private static native void recordQueueEnd0(long startTicks, long endTicks, String task, String scheduler, Thread origin, String queueType, int queueLength); + private static native void recordQueueEnd0(long startTicks, long endTicks, String task, String scheduler, Thread origin, String queueType, int queueLength, long submittingSpanId); private static native long currentTicks0(); diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java index f16912930..f35dbde3c 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java @@ -45,7 +45,7 @@ public void run() { profiler.setContext(1, 2); long now = profiler.getCurrentTicks(); if (profiler.isThresholdExceeded(9, start, now)) { - profiler.recordQueueTime(start, now, getClass(), QueueTimeTest.class, ArrayBlockingQueue.class, 10, origin); + profiler.recordQueueTime(start, now, getClass(), QueueTimeTest.class, ArrayBlockingQueue.class, 10, origin, 0L); } profiler.clearContext(); } From 7eda6902c95ddea1fe9c72ed9a2cb4b92d029c54 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Fri, 17 Apr 2026 13:27:51 +0200 Subject: [PATCH 06/12] fix(jfr): write QueueTime submittingSpanId before context attributes --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 62d8589a8..96716c3e8 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1597,8 +1597,8 @@ void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { buf->putVar64(event->_scheduler); buf->putVar64(event->_queueType); buf->putVar64(event->_queueLength); - writeContext(buf, Contexts::get()); buf->putVar64(event->_submitting_span_id); + writeContext(buf, Contexts::get()); writeEventSizePrefix(buf, start); flushIfNeeded(buf); } From b228ebc83c5551b692ef8712edd348a5579221c4 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Sun, 19 Apr 2026 12:48:19 +0200 Subject: [PATCH 07/12] feat(jfr): emit TaskBlock events for synchronized contention via JVMTI monitor callbacks --- ddprof-lib/src/main/cpp/profiler.cpp | 30 ++++++++++++++++++++++++++++ ddprof-lib/src/main/cpp/profiler.h | 5 +++++ ddprof-lib/src/main/cpp/thread.h | 13 +++++++++++- ddprof-lib/src/main/cpp/vmEntry.cpp | 4 ++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 47ca7ca7d..14634ebb2 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -940,6 +940,36 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) { _locks[lock_index].unlock(); } +void JNICALL Profiler::MonitorContendedEnterCallback(jvmtiEnv *jvmti, JNIEnv *jni, + jthread thread, jobject object) { + ProfiledThread* thrd = ProfiledThread::current(); + if (thrd == nullptr) return; + Context& ctx = Contexts::get(); + if (ctx.spanId == 0) return; + thrd->_monitor_block.start_ticks = TSC::ticks(); + thrd->_monitor_block.span_id = ctx.spanId; + thrd->_monitor_block.root_span_id = ctx.rootSpanId; + thrd->_monitor_block.obj_addr = (uintptr_t)(void*)object; +} + +void JNICALL Profiler::MonitorContendedEnteredCallback(jvmtiEnv *jvmti, JNIEnv *jni, + jthread thread, jobject object) { + ProfiledThread* thrd = ProfiledThread::current(); + if (thrd == nullptr || thrd->_monitor_block.obj_addr == 0) return; + + TaskBlockEvent event; + event._start_ticks = thrd->_monitor_block.start_ticks; + event._end_ticks = TSC::ticks(); + event._span_id = thrd->_monitor_block.span_id; + event._root_span_id = thrd->_monitor_block.root_span_id; + event._blocker = thrd->_monitor_block.obj_addr; + event._unblocking_span_id = 0; + + thrd->_monitor_block.obj_addr = 0; + + instance()->recordTaskBlock(ProfiledThread::currentTid(), &event); +} + void Profiler::recordTaskBlock(int tid, TaskBlockEvent *event) { u32 lock_index = getLockIndex(tid); if (!_locks[lock_index].tryLock() && diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index c6f1936e9..53e513490 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -423,6 +423,11 @@ class alignas(alignof(SpinLock)) Profiler { instance()->onThreadEnd(jvmti, jni, thread); } + static void JNICALL MonitorContendedEnterCallback(jvmtiEnv *jvmti, JNIEnv *jni, + jthread thread, jobject object); + static void JNICALL MonitorContendedEnteredCallback(jvmtiEnv *jvmti, JNIEnv *jni, + jthread thread, jobject object); + // Keep backward compatibility with the upstream async-profiler inline CodeCache* findLibraryByAddress(const void *address) { #ifdef DEBUG diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index 4cb12d0ca..7614a6e7d 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -54,6 +54,16 @@ class ProfiledThread : public ThreadLocalData { static int popFreeSlot(); // Returns -1 if no free slots static void pushFreeSlot(int slot_index); + // In-flight state for monitor contention tracking (MonitorContendedEnter → + // MonitorContendedEntered). Keyed on obj_addr == 0 meaning "no contention in + // progress". Object addresses are never 0 in the JVM. + struct MonitorBlockState { + u64 start_ticks; + u64 span_id; + u64 root_span_id; + uintptr_t obj_addr; + }; + u64 _pc; u64 _sp; u64 _span_id; @@ -71,10 +81,11 @@ class ProfiledThread : public ThreadLocalData { bool _ctx_tls_initialized; bool _crash_protection_active; Context* _ctx_tls_ptr; + MonitorBlockState _monitor_block{}; ProfiledThread(int buffer_pos, int tid) : ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _root_span_id(0), _crash_depth(0), _buffer_pos(buffer_pos), _tid(tid), _cpu_epoch(0), - _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _ctx_tls_initialized(false), _crash_protection_active(false), _ctx_tls_ptr(nullptr) {}; + _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _ctx_tls_initialized(false), _crash_protection_active(false), _ctx_tls_ptr(nullptr), _monitor_block{} {}; void releaseFromBuffer(); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 272c6e053..0c040200a 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -435,6 +435,8 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { callbacks.SampledObjectAlloc = ObjectSampler::SampledObjectAlloc; callbacks.GarbageCollectionFinish = LivenessTracker::GarbageCollectionFinish; callbacks.NativeMethodBind = VMStructs::NativeMethodBind; + callbacks.MonitorContendedEnter = Profiler::MonitorContendedEnterCallback; + callbacks.MonitorContendedEntered = Profiler::MonitorContendedEnteredCallback; _jvmti->SetEventCallbacks(&callbacks, sizeof(callbacks)); _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_DEATH, NULL); @@ -445,6 +447,8 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_NATIVE_METHOD_BIND, NULL); + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTER, NULL); + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTERED, NULL); if (hotspot_version() == 0 || !CodeHeap::available()) { // Workaround for JDK-8173361: avoid CompiledMethodLoad events when possible From cf3bfd64c19c1098b3384889aff8afbf7ca2a964 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Sun, 19 Apr 2026 12:48:35 +0200 Subject: [PATCH 08/12] feat(jfr): resolve unblocking span for synchronized via direct ObjectMonitor::_owner read --- ddprof-lib/src/main/cpp/profiler.cpp | 11 +++++---- ddprof-lib/src/main/cpp/thread.cpp | 9 +++++++ ddprof-lib/src/main/cpp/thread.h | 24 ++++++++++--------- ddprof-lib/src/main/cpp/vmStructs.cpp | 34 +++++++++++++++++++++++++++ ddprof-lib/src/main/cpp/vmStructs.h | 8 +++++++ 5 files changed, 70 insertions(+), 16 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 14634ebb2..a3f58bcc6 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -946,10 +946,11 @@ void JNICALL Profiler::MonitorContendedEnterCallback(jvmtiEnv *jvmti, JNIEnv *jn if (thrd == nullptr) return; Context& ctx = Contexts::get(); if (ctx.spanId == 0) return; - thrd->_monitor_block.start_ticks = TSC::ticks(); - thrd->_monitor_block.span_id = ctx.spanId; - thrd->_monitor_block.root_span_id = ctx.rootSpanId; - thrd->_monitor_block.obj_addr = (uintptr_t)(void*)object; + thrd->_monitor_block.start_ticks = TSC::ticks(); + thrd->_monitor_block.span_id = ctx.spanId; + thrd->_monitor_block.root_span_id = ctx.rootSpanId; + thrd->_monitor_block.obj_addr = (uintptr_t)(void*)object; + thrd->_monitor_block.unblocking_span_id = VMThread::monitorOwnerSpanId((const void*)object); } void JNICALL Profiler::MonitorContendedEnteredCallback(jvmtiEnv *jvmti, JNIEnv *jni, @@ -963,7 +964,7 @@ void JNICALL Profiler::MonitorContendedEnteredCallback(jvmtiEnv *jvmti, JNIEnv * event._span_id = thrd->_monitor_block.span_id; event._root_span_id = thrd->_monitor_block.root_span_id; event._blocker = thrd->_monitor_block.obj_addr; - event._unblocking_span_id = 0; + event._unblocking_span_id = thrd->_monitor_block.unblocking_span_id; thrd->_monitor_block.obj_addr = 0; diff --git a/ddprof-lib/src/main/cpp/thread.cpp b/ddprof-lib/src/main/cpp/thread.cpp index 5457f3fab..b73a0dea4 100644 --- a/ddprof-lib/src/main/cpp/thread.cpp +++ b/ddprof-lib/src/main/cpp/thread.cpp @@ -133,6 +133,15 @@ ProfiledThread *ProfiledThread::currentSignalSafe() { return __atomic_load_n(&_tls_key_initialized, __ATOMIC_ACQUIRE) ? (ProfiledThread *)pthread_getspecific(_tls_key) : nullptr; } +ProfiledThread* ProfiledThread::findByTid(int tid) { + int size = __atomic_load_n(&_running_buffer_pos, __ATOMIC_ACQUIRE); + for (int i = 0; i < size; i++) { + ProfiledThread* t = _buffer[i]; + if (t != nullptr && t->_tid == tid) return t; + } + return nullptr; +} + int ProfiledThread::popFreeSlot() { int current_top; int new_top; diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index 7614a6e7d..d991d0558 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -54,16 +54,6 @@ class ProfiledThread : public ThreadLocalData { static int popFreeSlot(); // Returns -1 if no free slots static void pushFreeSlot(int slot_index); - // In-flight state for monitor contention tracking (MonitorContendedEnter → - // MonitorContendedEntered). Keyed on obj_addr == 0 meaning "no contention in - // progress". Object addresses are never 0 in the JVM. - struct MonitorBlockState { - u64 start_ticks; - u64 span_id; - u64 root_span_id; - uintptr_t obj_addr; - }; - u64 _pc; u64 _sp; u64 _span_id; @@ -81,7 +71,6 @@ class ProfiledThread : public ThreadLocalData { bool _ctx_tls_initialized; bool _crash_protection_active; Context* _ctx_tls_ptr; - MonitorBlockState _monitor_block{}; ProfiledThread(int buffer_pos, int tid) : ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _root_span_id(0), _crash_depth(0), _buffer_pos(buffer_pos), _tid(tid), _cpu_epoch(0), @@ -90,7 +79,20 @@ class ProfiledThread : public ThreadLocalData { void releaseFromBuffer(); public: + // In-flight state for monitor contention tracking (MonitorContendedEnter → + // MonitorContendedEntered). Keyed on obj_addr == 0 meaning "no contention in + // progress". Object addresses are never 0 in the JVM. + struct MonitorBlockState { + u64 start_ticks; + u64 span_id; + u64 root_span_id; + uintptr_t obj_addr; + u64 unblocking_span_id; + }; + MonitorBlockState _monitor_block{}; + static ProfiledThread *forTid(int tid) { return new ProfiledThread(-1, tid); } + static ProfiledThread* findByTid(int tid); static ProfiledThread *inBuffer(int buffer_pos) { return new ProfiledThread(buffer_pos, 0); } diff --git a/ddprof-lib/src/main/cpp/vmStructs.cpp b/ddprof-lib/src/main/cpp/vmStructs.cpp index 00ed4f991..3d8700683 100644 --- a/ddprof-lib/src/main/cpp/vmStructs.cpp +++ b/ddprof-lib/src/main/cpp/vmStructs.cpp @@ -9,6 +9,8 @@ #include #include "vmStructs.h" #include "vmEntry.h" +#include "context.h" +#include "thread.h" #include "j9Ext.h" #include "jniHelper.h" #include "jvmHeap.h" @@ -675,6 +677,38 @@ int VMThread::osThreadId() { return -1; } +u64 VMThread::monitorOwnerSpanId(const void* object) { + if (_monitor_owner_offset == 0) return 0; + + // Read the mark word from the object header. + uintptr_t mark = (uintptr_t)SafeAccess::load((void**)object, nullptr); + + // At MonitorContendedEnter time the monitor must be inflated (MONITOR_BIT set + // in bits [1:0]). Guard defensively for any narrow races. + if ((mark & 0x3) != MONITOR_BIT) return 0; + + // Extract ObjectMonitor* (pointer stored in bits 63:2, 4-byte aligned). + const char* monitor = (const char*)(mark & ~(uintptr_t)0x3); + + // Read ObjectMonitor::_owner (JavaThread*). + const char* owner_thread = (const char*)SafeAccess::load( + (void**)(monitor + _monitor_owner_offset), nullptr); + if (owner_thread == nullptr) return 0; + + // JavaThread* -> OS thread ID via existing VMThread infrastructure. + int owner_tid = VMThread::cast(owner_thread)->osThreadId(); + if (owner_tid <= 0) return 0; + + // OS thread ID -> ProfiledThread* -> span ID. + ProfiledThread* owner = ProfiledThread::findByTid(owner_tid); + if (owner == nullptr || !owner->isContextTlsInitialized()) return 0; + + Context* ctx = owner->getContextTlsPtr(); + if (ctx == nullptr) return 0; + + return ctx->spanId; // volatile u64, safe to read cross-thread on x86/aarch64 +} + JNIEnv* VMThread::jni() { if (_env_offset < 0) { return VM::jni(); // fallback for non-HotSpot JVM diff --git a/ddprof-lib/src/main/cpp/vmStructs.h b/ddprof-lib/src/main/cpp/vmStructs.h index 9a51fdc08..6a47edd03 100644 --- a/ddprof-lib/src/main/cpp/vmStructs.h +++ b/ddprof-lib/src/main/cpp/vmStructs.h @@ -196,6 +196,9 @@ typedef void* address; field(_osthread_id_offset, offset, MATCH_SYMBOLS("_thread_id")) \ field_with_version(_osthread_state_offset, offset, 10, MAX_VERSION, MATCH_SYMBOLS("_state")) \ type_end() \ + type_begin(VMObjectMonitor, MATCH_SYMBOLS("ObjectMonitor")) \ + field(_monitor_owner_offset, offset, MATCH_SYMBOLS("_owner")) \ + type_end() \ type_begin(VMThreadShadow, MATCH_SYMBOLS("ThreadShadow")) \ field(_thread_exception_offset, offset, MATCH_SYMBOLS("_exception_file")) \ type_end() \ @@ -714,6 +717,11 @@ DECLARE(VMThread) int osThreadId(); + // Returns the span ID of the JavaThread currently owning the given monitor + // object, reading ObjectMonitor::_owner directly without a safepoint. + // Returns 0 if the owner cannot be determined. + static u64 monitorOwnerSpanId(const void* object); + JNIEnv* jni(); const void** vtable() { From bb71389b515146e7d1d35912b5df5a384b81e1c2 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Mon, 20 Apr 2026 15:09:07 +0200 Subject: [PATCH 09/12] fix(jfr): use signed arithmetic in recordSpanNode to avoid UB for pre-chunk spans --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 96716c3e8..c4b877926 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1565,8 +1565,14 @@ void Recording::recordSpanNode(Buffer *buf, int tid, u64 spanId, u64 parentSpanI // Control) can correlate SpanNode events with other events on the recording timeline. // _start_time is in microseconds; multiply by 1000 to get the recording epoch in nanos. u64 start_epoch_nanos = _start_time * 1000ULL; - u64 startTicks = _start_ticks + (u64)((double)(long long)(startNanos - start_epoch_nanos) - * TSC::frequency() / NANOTIME_FREQ); + // Use signed arithmetic: a span that started in a previous JFR chunk has + // startNanos < start_epoch_nanos. Unsigned subtraction would wrap around and + // produce a huge positive u64, making (u64)(negative_double) undefined behaviour. + // With signed delta the result is a negative tick offset, placing the event just + // before the chunk boundary, which is the correct behaviour for pre-chunk spans. + long long delta_nanos = (long long)startNanos - (long long)start_epoch_nanos; + long long delta_ticks = (long long)((double)delta_nanos * TSC::frequency() / NANOTIME_FREQ); + u64 startTicks = (u64)((long long)_start_ticks + delta_ticks); u64 durationTicks = (u64)((double)durationNanos * TSC::frequency() / NANOTIME_FREQ); flushIfNeeded(buf); From 2dbbfc244e41f8db11766c38e0f48a8c0b35905c Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Tue, 21 Apr 2026 02:18:18 +0200 Subject: [PATCH 10/12] fix(jfr): correct QueueTime submittingSpanId field write order --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 5 +- .../profiler/queue/QueueTimeTest.java | 48 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index c4b877926..d6b73ec30 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1603,8 +1603,11 @@ void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { buf->putVar64(event->_scheduler); buf->putVar64(event->_queueType); buf->putVar64(event->_queueLength); - buf->putVar64(event->_submitting_span_id); + // Schema order: spanId (pos 9) and localRootSpanId (pos 10) come from the thread context, + // then submittingSpanId (pos 11). Writing submittingSpanId first was wrong — it landed at + // position 9 while the schema expects the consuming spanId there. writeContext(buf, Contexts::get()); + buf->putVar64(event->_submitting_span_id); writeEventSizePrefix(buf, start); flushIfNeeded(buf); } diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java index f35dbde3c..f023eb4da 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/queue/QueueTimeTest.java @@ -51,6 +51,54 @@ public void run() { } } + /** + * Regression test for the QueueTime field serialization order bug: submittingSpanId was + * written before writeContext(), placing it at field position 9 (where the schema expects + * the consuming spanId). This caused the backend to read completely wrong span IDs. + *

+ * Verifies that all three span-ID fields (spanId, localRootSpanId, submittingSpanId) round-trip + * correctly when they are all distinct and non-zero. + */ + @Test + public void testQueueTimeFieldOrder() throws Exception { + IAttribute submittingSpanIdAttr = attr("submittingSpanId", "", "", NUMBER); + + Thread origin = Thread.currentThread(); + origin.setName("origin-field-order"); + // Use distinct, non-zero values so any field-order swap is detectable: + // consuming spanId=7, rootSpanId=42 (set via setContext), submittingSpanId=99. + long start = profiler.getCurrentTicks(); + Runnable worker = () -> { + profiler.setContext(7, 42); + long now = profiler.getCurrentTicks(); + profiler.recordQueueTime(start, now, QueueTimeTest.class, QueueTimeTest.class, + ArrayBlockingQueue.class, 1, origin, 99L); + profiler.clearContext(); + }; + Thread thread = new Thread(worker, "destination-field-order"); + Thread.sleep(10); + thread.start(); + thread.join(); + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.QueueTime"); + boolean found = false; + for (IItemIterable it : events) { + IMemberAccessor spanIdAccessor = SPAN_ID.getAccessor(it.getType()); + IMemberAccessor rootSpanIdAccessor = LOCAL_ROOT_SPAN_ID.getAccessor(it.getType()); + IMemberAccessor submittingAccessor = submittingSpanIdAttr.getAccessor(it.getType()); + for (IItem item : it) { + if (spanIdAccessor.getMember(item).longValue() == 7) { + found = true; + assertEquals(7, spanIdAccessor.getMember(item).longValue(), "spanId must be the consuming span (not the submitting span)"); + assertEquals(42, rootSpanIdAccessor.getMember(item).longValue(), "localRootSpanId must be the root"); + assertEquals(99, submittingAccessor.getMember(item).longValue(), "submittingSpanId must be the submitting span (not the root)"); + } + } + } + assertTrue(found, "Expected at least one QueueTime event with spanId=7"); + } + @Test public void testRecordQueueTime() throws Exception { Thread origin = Thread.currentThread(); From c615d1a75e52d72821a592912e520f146c493540 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Tue, 21 Apr 2026 03:30:06 +0200 Subject: [PATCH 11/12] fix(jfr): register emitting thread in CPOOL for causal DAG events --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index d6b73ec30..f49baaa74 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1799,6 +1799,7 @@ void FlightRecorder::recordTraceRoot(int lock_index, int tid, if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); rec->recordTraceRoot(buf, tid, event); + rec->addThread(lock_index, tid); } } } @@ -1811,6 +1812,7 @@ void FlightRecorder::recordQueueTime(int lock_index, int tid, if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); rec->recordQueueTime(buf, tid, event); + rec->addThread(lock_index, tid); } } } @@ -1823,6 +1825,7 @@ void FlightRecorder::recordTaskBlock(int lock_index, int tid, if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); rec->recordTaskBlock(buf, tid, event); + rec->addThread(lock_index, tid); } } } @@ -1835,6 +1838,11 @@ void FlightRecorder::recordSpanNode(int lock_index, int tid, u64 spanId, u64 par if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); rec->recordSpanNode(buf, tid, spanId, parentSpanId, rootSpanId, startNanos, durationNanos, encodedOperation, encodedResource); + // Register the emitting thread in the JFR thread CPOOL so that JMC can resolve + // the eventThread reference. Without this, threads that emit SpanNode events but + // have no CPU/wall profiling samples in the current chunk are absent from the + // CPOOL, causing IMCThread to be null in the backend and threadId=0 ("unknown span"). + rec->addThread(lock_index, tid); } } } From 3090e994f00732f2836e17ccca8a5d4b207de002 Mon Sep 17 00:00:00 2001 From: Paul Fournillon Date: Wed, 22 Apr 2026 10:16:58 +0200 Subject: [PATCH 12/12] fix(jfr): correct QueueTime submittingSpanId write position in binary --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 27 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index f49baaa74..aa001778f 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1603,11 +1603,28 @@ void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { buf->putVar64(event->_scheduler); buf->putVar64(event->_queueType); buf->putVar64(event->_queueLength); - // Schema order: spanId (pos 9) and localRootSpanId (pos 10) come from the thread context, - // then submittingSpanId (pos 11). Writing submittingSpanId first was wrong — it landed at - // position 9 while the schema expects the consuming spanId there. - writeContext(buf, Contexts::get()); - buf->putVar64(event->_submitting_span_id); + // The schema declares fields in this order: + // spanId, localRootSpanId, submittingSpanId, contextAttr[0..n] + // writeContext() would emit spanId + localRootSpanId + contextAttrs[0..n] in one shot, + // leaving no room to insert submittingSpanId between localRootSpanId and contextAttrs. + // Inline the context fields manually so submittingSpanId lands at the correct position. + Context &ctx = Contexts::get(); + u64 spanId = 0, rootSpanId = 0; + u64 stored = ctx.checksum; + if (stored != 0) { + spanId = ctx.spanId; + rootSpanId = ctx.rootSpanId; + if (stored != Contexts::checksum(spanId, rootSpanId)) { + spanId = 0; + rootSpanId = 0; + } + } + buf->putVar64(spanId); // schema pos: spanId + buf->putVar64(rootSpanId); // schema pos: localRootSpanId + buf->putVar64(event->_submitting_span_id); // schema pos: submittingSpanId (CORRECT position) + for (size_t i = 0; i < Profiler::instance()->numContextAttributes(); i++) { + buf->putVar32(ctx.get_tag(i).value); // schema pos: contextAttr[i] + } writeEventSizePrefix(buf, start); flushIfNeeded(buf); }