Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
f20f5e4
fix: restore ContextSetter snapshot/restore via readContextValue()
jbachorik Apr 22, 2026
30456f0
perf: zero-allocation fast path for readContextAttribute via per-slot…
jbachorik Apr 22, 2026
40edc1a
fix: cold-path readContextAttribute validity guard and cache population
jbachorik Apr 22, 2026
dd1b619
fix: cache absent-key sentinel in readContextAttribute
jbachorik Apr 23, 2026
8c01712
fix: register thread in testSnapshotRestore
jbachorik Apr 23, 2026
502daa1
address: clear sidecar on attrs_data overflow in setContextAttributeD…
jbachorik Apr 23, 2026
9eda525
address: apply muse review findings (docs, tests, comments)
jbachorik Apr 23, 2026
96ab30c
fix: use Java 8-compatible String fill; fix JUnit5 assertTrue arg order
jbachorik Apr 23, 2026
8efd902
fix: correct JUnit5 assertNull arg order in testPutClearsCustomSlots
jbachorik Apr 23, 2026
97739a7
fix: use session-unique tag values to avoid attrCache hits across ret…
jbachorik Apr 23, 2026
b8029cf
Potential fix for pull request finding
jbachorik Apr 23, 2026
3514d25
feat: bulk snapshot/restore of ThreadContext via combined buffer
jbachorik Apr 23, 2026
d83ef11
refactor: drop Java-side value cache; reject oversized values up front
jbachorik Apr 23, 2026
a756848
address: muse chorus findings on d83ef11c7
jbachorik Apr 23, 2026
bb8dc7b
refactor: collapse record/sidecar/combined ByteBuffers into one ctxBu…
jbachorik Apr 23, 2026
0dd0c63
fix: preserve pre-snapshot valid state in ThreadContext.snapshot/restore
jbachorik Apr 23, 2026
551a140
address: Copilot review comments on PR #496
jbachorik Apr 24, 2026
357267e
address: clarify readContextAttribute is test-only
jbachorik Apr 24, 2026
0aefdc5
remove test-only readback plumbing from public API
jbachorik Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions ddprof-lib/src/main/cpp/javaApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ Java_com_datadoghq_profiler_OTelContext_readProcessCtx0(JNIEnv *env, jclass unus
#endif
}

extern "C" DLLEXPORT jobjectArray JNICALL
extern "C" DLLEXPORT jobject JNICALL
Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jclass unused, jlongArray metadata) {
ProfiledThread* thrd = ProfiledThread::current();
assert(thrd != nullptr);
Expand All @@ -541,35 +541,37 @@ Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jcla

OtelThreadContextRecord* record = thrd->getOtelContextRecord();

// Contiguity of record + tag_encodings + LRS is enforced by alignas(8) on _otel_ctx_record
// plus sizeof(OtelThreadContextRecord) being a multiple of 8 (see thread.h).
// Compile-time alignment check always runs; runtime pointer-layout check is debug-only.
static_assert(DD_TAGS_CAPACITY * sizeof(u32) % alignof(u64) == 0,
"tag encodings array size must be aligned to u64 for contiguous sidecar layout");
#ifdef DEBUG
uint8_t* record_start = reinterpret_cast<uint8_t*>(record);
uint8_t* sidecar_start = reinterpret_cast<uint8_t*>(thrd->getOtelTagEncodingsPtr());
assert(sidecar_start == record_start + OTEL_MAX_RECORD_SIZE
&& "_otel_ctx_record and _otel_tag_encodings must be contiguous");
#endif

// Fill metadata[6]: [VALID_OFFSET, TRACE_ID_OFFSET, SPAN_ID_OFFSET,
// ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_SIDECAR_OFFSET]
// ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_OFFSET].
// All offsets are absolute within the unified buffer returned below.
if (metadata != nullptr && env->GetArrayLength(metadata) >= 6) {
jlong meta[6];
meta[0] = (jlong)offsetof(OtelThreadContextRecord, valid);
meta[1] = (jlong)offsetof(OtelThreadContextRecord, trace_id);
meta[2] = (jlong)offsetof(OtelThreadContextRecord, span_id);
meta[3] = (jlong)offsetof(OtelThreadContextRecord, attrs_data_size);
meta[4] = (jlong)offsetof(OtelThreadContextRecord, attrs_data);
meta[5] = (jlong)(DD_TAGS_CAPACITY * sizeof(u32)); // LRS sidecar offset in sidecar buffer
meta[5] = (jlong)(OTEL_MAX_RECORD_SIZE + DD_TAGS_CAPACITY * sizeof(u32));
env->SetLongArrayRegion(metadata, 0, 6, meta);
}

// Create 2 DirectByteBuffers: [record, sidecar]
jclass bbClass = env->FindClass("java/nio/ByteBuffer");
jobjectArray result = env->NewObjectArray(2, bbClass, nullptr);

// recordBuffer: 640 bytes over the OtelThreadContextRecord
jobject recordBuf = env->NewDirectByteBuffer((void*)record, (jlong)OTEL_MAX_RECORD_SIZE);
env->SetObjectArrayElement(result, 0, recordBuf);

// sidecarBuffer: covers _otel_tag_encodings[DD_TAGS_CAPACITY] + _otel_local_root_span_id (contiguous)
static_assert(DD_TAGS_CAPACITY * sizeof(u32) % alignof(u64) == 0,
"tag encodings array size must be aligned to u64 for contiguous sidecar layout");
size_t sidecarSize = DD_TAGS_CAPACITY * sizeof(u32) + sizeof(u64);
jobject sidecarBuf = env->NewDirectByteBuffer((void*)thrd->getOtelTagEncodingsPtr(), (jlong)sidecarSize);
env->SetObjectArrayElement(result, 1, sidecarBuf);

return result;
// Single contiguous view over [record | tag_encodings | LRS] — used for per-field
// access and for bulk snapshot/restore. All three regions are in one ProfiledThread
// memory block.
size_t totalSize = OTEL_MAX_RECORD_SIZE + DD_TAGS_CAPACITY * sizeof(u32) + sizeof(u64);
return env->NewDirectByteBuffer((void*)record, (jlong)totalSize);
}

extern "C" DLLEXPORT jint JNICALL
Expand Down
7 changes: 6 additions & 1 deletion ddprof-lib/src/main/cpp/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,15 @@ class ProfiledThread : public ThreadLocalData {
UnwindFailures _unwind_failures;
bool _otel_ctx_initialized;
bool _crash_protection_active;
OtelThreadContextRecord _otel_ctx_record;
// alignas(8) + sizeof(OtelThreadContextRecord)==640 (multiple of 8) guarantee
// _otel_tag_encodings sits at +640 with no padding, so the three fields form one
// 688-byte contiguous region exposed as a combined DirectByteBuffer.
alignas(8) OtelThreadContextRecord _otel_ctx_record;
// These two fields MUST be contiguous and 8-byte aligned — the JNI layer
// exposes them as a single DirectByteBuffer (sidecar), and VarHandle long
// views require 8-byte alignment for the buffer base address.
// Read invariant: sidecar readers must gate on record->valid (see ContextApi::get).
// ThreadContext.restore() relies on this to perform a bulk memcpy under valid=0.
alignas(8) u32 _otel_tag_encodings[DD_TAGS_CAPACITY];
u64 _otel_local_root_span_id;

Expand Down
26 changes: 13 additions & 13 deletions ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,6 @@ void copyTags(int[] snapshot) {
tlsContextStorage.get().copyCustoms(snapshot);
}

/**
/**
* Dumps the JFR recording at the provided path
* @param recording the path to the recording
Expand Down Expand Up @@ -305,11 +304,11 @@ public Map<String, Long> getDebugCounters() {

private static ThreadContext initializeThreadContext() {
long[] metadata = new long[6];
ByteBuffer[] buffers = initializeContextTLS0(metadata);
if (buffers == null) {
ByteBuffer buffer = initializeContextTLS0(metadata);
if (buffer == null) {
throw new IllegalStateException("Failed to initialize OTEL TLS — ProfiledThread not available");
}
return new ThreadContext(buffers[0], buffers[1], metadata);
return new ThreadContext(buffer, metadata);
}

private static native boolean init0();
Expand Down Expand Up @@ -342,19 +341,20 @@ private static ThreadContext initializeThreadContext() {
private static native String getStatus0();

/**
* Initializes context TLS for the current thread and returns 2 DirectByteBuffers.
* Sets otel_thread_ctx_v1 permanently to the thread's OtelThreadContextRecord.
* Initializes context TLS for the current thread and returns a single DirectByteBuffer
* spanning the OTEP record + tag-encoding sidecar + LRS (688 bytes, contiguous in
* ProfiledThread). Sets otel_thread_ctx_v1 permanently to the thread's
* OtelThreadContextRecord.
*
* @param metadata output array filled with:
* [0] VALID_OFFSET — offset of 'valid' field in the record
* [1] TRACE_ID_OFFSET — offset of 'trace_id' field in the record
* [2] SPAN_ID_OFFSET — offset of 'span_id' field in the record
* @param metadata output array filled with absolute offsets into the returned buffer:
* [0] VALID_OFFSET — offset of 'valid' field
* [1] TRACE_ID_OFFSET — offset of 'trace_id' field
* [2] SPAN_ID_OFFSET — offset of 'span_id' field
* [3] ATTRS_DATA_SIZE_OFFSET — offset of 'attrs_data_size' field
* [4] ATTRS_DATA_OFFSET — offset of 'attrs_data' field
* [5] LRS_SIDECAR_OFFSET — offset of local_root_span_id in sidecar buffer
* @return array of 2 ByteBuffers: [recordBuffer, sidecarBuffer]
* [5] LRS_OFFSET — offset of local_root_span_id
*/
private static native ByteBuffer[] initializeContextTLS0(long[] metadata);
private static native ByteBuffer initializeContextTLS0(long[] metadata);

public ThreadContext getThreadContext() {
return tlsContextStorage.get();
Expand Down
92 changes: 92 additions & 0 deletions ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright 2026 Datadog, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
package com.datadoghq.profiler;

import java.util.Arrays;

/**
* Per-thread stack of {@link ThreadContext} snapshots for nested scopes.
*
* <p>Provides bulk save/restore of the full OTEP record + sidecar state via one memcpy per
* transition. Not thread-safe: a single stack instance must be accessed only from its
* owning thread.
*
* <p>Storage is tiered to keep shallow nesting allocation-free:
* <ul>
* <li>Depths 0 .. {@value #FAST_DEPTH}-1: one contiguous byte[] allocated eagerly.</li>
* <li>Depths {@value #FAST_DEPTH} and beyond: lazily allocated {@value #CHUNK_DEPTH}-slot
* chunks, each a single byte[]. Chunks are allocated once per depth band and reused.</li>
* </ul>
*/
public final class ScopeStack {
private static final int FAST_DEPTH = 6;
private static final int CHUNK_DEPTH = 12;
private static final int SLOT_SIZE = ThreadContext.SNAPSHOT_SIZE;

private final byte[] fast = new byte[FAST_DEPTH * SLOT_SIZE];
// chunks[i] covers depths [FAST_DEPTH + i*CHUNK_DEPTH .. FAST_DEPTH + (i+1)*CHUNK_DEPTH).
private byte[][] chunks;
private int depth;

public void enter(ThreadContext ctx) {
int d = depth;
ctx.snapshot(bufferFor(d), offsetFor(d));
depth = d + 1;
}

public void exit(ThreadContext ctx) {
int d = depth - 1;
if (d < 0) {
throw new IllegalStateException("ScopeStack underflow");
}
ctx.restore(bufferFor(d), offsetFor(d));
depth = d;
}

/** Current nesting depth (number of outstanding {@link #enter} calls). */
public int depth() {
return depth;
}

private byte[] bufferFor(int d) {
if (d < FAST_DEPTH) {
return fast;
}
// chunkFor is idempotent: if this depth was previously populated (via a matching enter),
// it returns the existing chunk without allocating.
return chunkFor((d - FAST_DEPTH) / CHUNK_DEPTH);
}

private static int offsetFor(int d) {
int slot = d < FAST_DEPTH ? d : (d - FAST_DEPTH) % CHUNK_DEPTH;
return slot * SLOT_SIZE;
}

private byte[] chunkFor(int idx) {
byte[][] cs = chunks;
if (cs == null) {
cs = new byte[4][];
chunks = cs;
} else if (idx >= cs.length) {
int newLen = cs.length;
while (newLen <= idx) {
newLen <<= 1;
}
cs = Arrays.copyOf(cs, newLen);
chunks = cs;
}
byte[] c = cs[idx];
if (c == null) {
c = new byte[CHUNK_DEPTH * SLOT_SIZE];
cs[idx] = c;
}
return c;
}
}
Loading
Loading