From f20f5e49eee973a19636db3e56d34a38c796d830 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 22 Apr 2026 17:53:36 +0200 Subject: [PATCH 01/19] fix: restore ContextSetter snapshot/restore via readContextValue() --- .../com/datadoghq/profiler/ContextSetter.java | 18 ++++++++++ .../com/datadoghq/profiler/JavaProfiler.java | 5 ++- .../com/datadoghq/profiler/ThreadContext.java | 6 +++- .../profiler/context/TagContextTest.java | 34 +++++++++++++++++++ 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java index bff497b80..cf9cf9d21 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java @@ -50,6 +50,24 @@ public boolean setContextValue(int offset, String value) { return false; } + /** + * Returns the string value currently set at the given attribute offset on this thread, + * or {@code null} if no value is set or the offset is negative. + * + *

Reads from thread-local storage — caller and setter must be the same thread. + * Allocates a {@code String} per call; avoid in tight inner loops. + */ + public String readContextValue(int offset) { + if (offset >= 0) { + return profiler.readContextAttribute(offset); + } + return null; + } + + public String readContextValue(String attribute) { + return readContextValue(offsetOf(attribute)); + } + public boolean clearContextValue(String attribute) { return clearContextValue(offsetOf(attribute)); } diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index c690b9e47..3b9d57d1d 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -225,7 +225,10 @@ void copyTags(int[] snapshot) { tlsContextStorage.get().copyCustoms(snapshot); } - /** + String readContextAttribute(int offset) { + return tlsContextStorage.get().readContextAttribute(offset); + } + /** * Dumps the JFR recording at the provided path * @param recording the path to the recording diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index ed699d57c..12f7d15e5 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -412,7 +412,11 @@ private int compactOtepAttribute(int otepKeyIndex) { /** * Reads a custom attribute value from attrs_data by key index. * Scans the attrs_data entries and returns the UTF-8 string for the matching key. - * Intended for tests only. + * Used by {@code ContextSetter.readContextValue()} to support snapshot/restore of + * nested profiling scopes. + * + *

Allocates a {@code byte[]} and a {@code String} per call. Not safe for + * signal handlers. Fine at scope-open/close granularity; avoid in tight inner loops. * * @param keyIndex 0-based user key index (same as passed to setContextAttribute) * @return the attribute value string, or null if not set diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 244865576..89434bece 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -138,6 +138,40 @@ public void test() throws InterruptedException { } } + @Test + public void testSnapshotRestore() throws Exception { + // J9 does not initialize ThreadContext for non-profiled threads; skip. + Assumptions.assumeTrue(!Platform.isJ9()); + ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); + + // Initially both slots are empty + assertNull(contextSetter.readContextValue(0)); + assertNull(contextSetter.readContextValue("tag2")); + + // Set a value and read it back + assertTrue(contextSetter.setContextValue("tag1", "before")); + assertEquals("before", contextSetter.readContextValue(0)); + assertEquals("before", contextSetter.readContextValue("tag1")); + + // Snapshot the string, overwrite, then restore + String saved = contextSetter.readContextValue("tag1"); + assertTrue(contextSetter.setContextValue("tag1", "inside")); + assertEquals("inside", contextSetter.readContextValue("tag1")); + + // Restore via setContextValue + assertTrue(contextSetter.setContextValue("tag1", saved)); + assertEquals("before", contextSetter.readContextValue("tag1")); + + // put/clear/put cycle: verify offset stability across state transitions + assertTrue(contextSetter.clearContextValue("tag1")); + assertNull(contextSetter.readContextValue("tag1")); + assertTrue(contextSetter.setContextValue("tag1", "after")); + assertEquals("after", contextSetter.readContextValue("tag1")); + + // tag2 was never set; readContextValue by name returns null + assertNull(contextSetter.readContextValue("tag2")); + } + private void work(ContextSetter contextSetter, String contextAttribute, String contextValue) throws InterruptedException { assertTrue(contextSetter.setContextValue(contextAttribute, contextValue)); From 30456f08f9d423a2bc5e0bb662ebe235d9a01ed9 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 22 Apr 2026 18:29:15 +0200 Subject: [PATCH 02/19] perf: zero-allocation fast path for readContextAttribute via per-slot cache --- .../com/datadoghq/profiler/ContextSetter.java | 1 - .../com/datadoghq/profiler/ThreadContext.java | 26 +++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java index cf9cf9d21..38545c0f8 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java @@ -55,7 +55,6 @@ public boolean setContextValue(int offset, String value) { * or {@code null} if no value is set or the offset is negative. * *

Reads from thread-local storage — caller and setter must be the same thread. - * Allocates a {@code String} per call; avoid in tight inner loops. */ public String readContextValue(int offset) { if (offset >= 0) { diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index 12f7d15e5..19a3ec22e 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -59,6 +59,13 @@ public final class ThreadContext { private final int[] attrCacheEncodings = new int[CACHE_SIZE]; private final byte[][] attrCacheBytes = new byte[CACHE_SIZE][]; + // Per-slot read-back cache: indexedValueCache[keyIndex] = the String last + // successfully written to attrs_data at that slot. Kept in sync by every + // write (setContextAttributeDirect) and clear (clearContextAttribute, + // setContextDirect). Allows readContextAttribute to return without scanning + // attrs_data or allocating on the warm path. + private final String[] indexedValueCache = new String[MAX_CUSTOM_SLOTS]; + // OTEP record field offsets (from packed struct) private final int validOffset; private final int traceIdOffset; @@ -174,6 +181,7 @@ public void clearContextAttribute(int keyIndex) { sidecarBuffer.putInt(keyIndex * Integer.BYTES, 0); removeOtepAttribute(otepKeyIndex); attach(); + indexedValueCache[keyIndex] = null; } public void copyCustoms(int[] value) { @@ -246,6 +254,9 @@ private boolean setContextAttributeDirect(int keyIndex, String value) { sidecarBuffer.putInt(keyIndex * Integer.BYTES, encoding); boolean written = replaceOtepAttribute(otepKeyIndex, utf8); attach(); + // Keep the read-back cache in sync. Only cache on successful attrs_data write; + // on overflow (written=false) the slot was compacted out so null is correct. + indexedValueCache[keyIndex] = written ? value : null; return written; } @@ -272,6 +283,7 @@ private void setContextDirect(long localRootSpanId, long spanId, long trHi, long for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) { // i * Integer.BYTES: byte offset into sidecar buffer for int slot i sidecarBuffer.putInt(i * Integer.BYTES, 0); + indexedValueCache[i] = null; } // Reset attrs_data_size to contain only the fixed LRS entry, discarding // any custom attribute entries written during the previous span. @@ -307,6 +319,7 @@ private void clearContextDirect() { writeLrsHex(0); for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) { sidecarBuffer.putInt(i * Integer.BYTES, 0); + indexedValueCache[i] = null; } sidecarBuffer.putLong(lrsSidecarOffset, 0); } @@ -410,14 +423,12 @@ private int compactOtepAttribute(int otepKeyIndex) { } /** - * Reads a custom attribute value from attrs_data by key index. - * Scans the attrs_data entries and returns the UTF-8 string for the matching key. + * Reads a custom attribute value by key index. + * Returns the cached String from the last successful write on this thread (zero allocation). + * Falls back to scanning attrs_data when the cache is cold (e.g. after a session restart). * Used by {@code ContextSetter.readContextValue()} to support snapshot/restore of * nested profiling scopes. * - *

Allocates a {@code byte[]} and a {@code String} per call. Not safe for - * signal handlers. Fine at scope-open/close granularity; avoid in tight inner loops. - * * @param keyIndex 0-based user key index (same as passed to setContextAttribute) * @return the attribute value string, or null if not set */ @@ -425,6 +436,11 @@ public String readContextAttribute(int keyIndex) { if (keyIndex < 0 || keyIndex >= MAX_CUSTOM_SLOTS) { return null; } + String cached = indexedValueCache[keyIndex]; + if (cached != null) { + return cached; + } + // Cold path: scan attrs_data (only on first read after session restart). int otepKeyIndex = keyIndex + 1; int size = recordBuffer.getShort(attrsDataSizeOffset) & 0xFFFF; int pos = 0; From 40edc1a056288d2312e5c1e00dd8a95c1af1aa82 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 22 Apr 2026 19:13:34 +0200 Subject: [PATCH 03/19] fix: cold-path readContextAttribute validity guard and cache population MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Return null when valid=0 (after clearContext, attrs_data_size is intentionally stale — scanning it would return cleared attributes) - Cache cold-path result in indexedValueCache to avoid repeated scans after resetThreadContext() Co-Authored-By: Claude Sonnet 4.6 --- .../main/java/com/datadoghq/profiler/ThreadContext.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index 19a3ec22e..14d3eb882 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -441,6 +441,11 @@ public String readContextAttribute(int keyIndex) { return cached; } // Cold path: scan attrs_data (only on first read after session restart). + // Guard against stale attrs_data after clearContext(): valid=0 means the record + // was cleared and attrs_data_size was intentionally not reset (see clearContextDirect). + if (recordBuffer.get(validOffset) == 0) { + return null; + } int otepKeyIndex = keyIndex + 1; int size = recordBuffer.getShort(attrsDataSizeOffset) & 0xFFFF; int pos = 0; @@ -455,7 +460,9 @@ public String readContextAttribute(int keyIndex) { for (int i = 0; i < len; i++) { bytes[i] = recordBuffer.get(attrsDataOffset + pos + 2 + i); } - return new String(bytes, StandardCharsets.UTF_8); + String value = new String(bytes, StandardCharsets.UTF_8); + indexedValueCache[keyIndex] = value; + return value; } pos += 2 + len; } From dd1b619459ac89a71fd2a9b195c25dce573e12e7 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 09:27:28 +0200 Subject: [PATCH 04/19] fix: cache absent-key sentinel in readContextAttribute Co-Authored-By: muse --- .../com/datadoghq/profiler/ThreadContext.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index 14d3eb882..ca39afd50 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -64,6 +64,8 @@ public final class ThreadContext { // write (setContextAttributeDirect) and clear (clearContextAttribute, // setContextDirect). Allows readContextAttribute to return without scanning // attrs_data or allocating on the warm path. + // null = not yet scanned; ABSENT = scanned/cleared, known absent; other = cached value. + private static final String ABSENT = new String(""); private final String[] indexedValueCache = new String[MAX_CUSTOM_SLOTS]; // OTEP record field offsets (from packed struct) @@ -181,7 +183,7 @@ public void clearContextAttribute(int keyIndex) { sidecarBuffer.putInt(keyIndex * Integer.BYTES, 0); removeOtepAttribute(otepKeyIndex); attach(); - indexedValueCache[keyIndex] = null; + indexedValueCache[keyIndex] = ABSENT; } public void copyCustoms(int[] value) { @@ -255,8 +257,8 @@ private boolean setContextAttributeDirect(int keyIndex, String value) { boolean written = replaceOtepAttribute(otepKeyIndex, utf8); attach(); // Keep the read-back cache in sync. Only cache on successful attrs_data write; - // on overflow (written=false) the slot was compacted out so null is correct. - indexedValueCache[keyIndex] = written ? value : null; + // on overflow (written=false) the slot was compacted out; ABSENT prevents a futile rescan. + indexedValueCache[keyIndex] = written ? value : ABSENT; return written; } @@ -283,7 +285,7 @@ private void setContextDirect(long localRootSpanId, long spanId, long trHi, long for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) { // i * Integer.BYTES: byte offset into sidecar buffer for int slot i sidecarBuffer.putInt(i * Integer.BYTES, 0); - indexedValueCache[i] = null; + indexedValueCache[i] = ABSENT; } // Reset attrs_data_size to contain only the fixed LRS entry, discarding // any custom attribute entries written during the previous span. @@ -319,7 +321,7 @@ private void clearContextDirect() { writeLrsHex(0); for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) { sidecarBuffer.putInt(i * Integer.BYTES, 0); - indexedValueCache[i] = null; + indexedValueCache[i] = ABSENT; } sidecarBuffer.putLong(lrsSidecarOffset, 0); } @@ -437,6 +439,9 @@ public String readContextAttribute(int keyIndex) { return null; } String cached = indexedValueCache[keyIndex]; + if (cached == ABSENT) { + return null; + } if (cached != null) { return cached; } @@ -466,6 +471,7 @@ public String readContextAttribute(int keyIndex) { } pos += 2 + len; } + indexedValueCache[keyIndex] = ABSENT; return null; } From 8c01712ca8ed54fa2606aa96bdd54afa5299c2e6 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 09:40:00 +0200 Subject: [PATCH 05/19] fix: register thread in testSnapshotRestore Co-Authored-By: muse --- .../test/java/com/datadoghq/profiler/context/TagContextTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 89434bece..69afb8896 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -142,6 +142,7 @@ public void test() throws InterruptedException { public void testSnapshotRestore() throws Exception { // J9 does not initialize ThreadContext for non-profiled threads; skip. Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); // Initially both slots are empty From 502daa172e530b40a20b8fdbb58f37ab17f468d8 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 10:06:46 +0200 Subject: [PATCH 06/19] address: clear sidecar on attrs_data overflow in setContextAttributeDirect Co-Authored-By: muse --- .../main/java/com/datadoghq/profiler/ThreadContext.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index ca39afd50..b8590a4e2 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -255,9 +255,12 @@ private boolean setContextAttributeDirect(int keyIndex, String value) { detach(); sidecarBuffer.putInt(keyIndex * Integer.BYTES, encoding); boolean written = replaceOtepAttribute(otepKeyIndex, utf8); + if (!written) { + // attrs_data overflow: the old entry was compacted out and the new one + // couldn't fit. Zero the sidecar so both views agree there is no value. + sidecarBuffer.putInt(keyIndex * Integer.BYTES, 0); + } attach(); - // Keep the read-back cache in sync. Only cache on successful attrs_data write; - // on overflow (written=false) the slot was compacted out; ABSENT prevents a futile rescan. indexedValueCache[keyIndex] = written ? value : ABSENT; return written; } From 9eda525ae6f2902f3ed70990699a6fcfabff1d86 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 11:40:14 +0200 Subject: [PATCH 07/19] address: apply muse review findings (docs, tests, comments) - Add Javadoc to JavaProfiler.readContextAttribute and ContextSetter.readContextValue(String) - Document valid=0 invariant and ABSENT ClassLoader-singleton requirement in ThreadContext - Expand readContextAttribute Javadoc (warm/cold path, allocation contract) - Document orphan Dictionary encoding in setContextAttribute Javadoc - Add tests: cold-path scan, attrs_data overflow, put() cache clear, cross-slot isolation - Fix raw-offset assumption in testSnapshotRestore (use offsetOf instead of hardcoded 0) Co-Authored-By: Claude Sonnet 4.6 --- .../com/datadoghq/profiler/ContextSetter.java | 11 +++ .../com/datadoghq/profiler/JavaProfiler.java | 4 + .../com/datadoghq/profiler/ThreadContext.java | 38 ++++++++-- .../profiler/context/TagContextTest.java | 75 ++++++++++++++++++- 4 files changed, 120 insertions(+), 8 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java index 38545c0f8..b1c289fe3 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ContextSetter.java @@ -63,6 +63,17 @@ public String readContextValue(int offset) { return null; } + /** + * Reads the current value of the named context attribute. + * Resolves the attribute name to its registered offset via {@link #offsetOf(String)}, + * then delegates to {@link #readContextValue(int)}. Returns {@code null} if the + * attribute was not set or the name is not registered. + * + *

Reads from thread-local storage — caller and setter must be the same thread. + * + * @param attribute the attribute name + * @return the current value, or null if absent or unregistered + */ public String readContextValue(String attribute) { return readContextValue(offsetOf(attribute)); } diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 3b9d57d1d..ce462724d 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -225,6 +225,10 @@ void copyTags(int[] snapshot) { tlsContextStorage.get().copyCustoms(snapshot); } + /** Reads a custom attribute value by key index. Same-thread-only: delegates to the + * calling thread's {@link ThreadContext} via TLS. + * @param offset the attribute key index (from {@link ContextSetter#offsetOf}) + * @return the attribute value, or null if not set */ String readContextAttribute(int offset) { return tlsContextStorage.get().readContextAttribute(offset); } diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index b8590a4e2..2e80f3866 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -65,6 +65,11 @@ public final class ThreadContext { // setContextDirect). Allows readContextAttribute to return without scanning // attrs_data or allocating on the warm path. // null = not yet scanned; ABSENT = scanned/cleared, known absent; other = cached value. + // ABSENT uses new String("") (not "") so it has a unique identity not shared with any + // interned literal. The == check in readContextAttribute relies on reference identity + // within a single ClassLoader. ThreadContext must not be loaded by multiple ClassLoaders; + // doing so would produce distinct ABSENT instances, causing the identity check to always + // fall through to the cold scan path. private static final String ABSENT = new String(""); private final String[] indexedValueCache = new String[MAX_CUSTOM_SLOTS]; @@ -208,10 +213,21 @@ public void copyCustoms(int[] value) { * request IDs, and other per-request-unique strings will exhaust the * Dictionary and cause attributes to be silently dropped. * + *

Overflow and orphan encodings: When {@code attrs_data} overflows + * (buffer full), the old entry for {@code keyIndex} is compacted out and the + * new value cannot be written. Both the sidecar and {@code indexedValueCache} + * are cleared so {@link #readContextAttribute} returns {@code null} for this + * slot. However, if the value was not already cached in the per-thread encoding + * cache, {@code registerConstant0} may have already registered it in the native + * Dictionary before the overflow was detected. That Dictionary entry cannot be + * removed — the native Dictionary is write-only for the JVM lifetime. The orphan + * encoding is harmless: it will not appear in JFR events because the sidecar is + * zeroed. + * * @param keyIndex Index into the registered attribute key map (0-based) * @param value The string value for this attribute * @return true if the attribute was set successfully, false if the - * Dictionary is full or the keyIndex is out of range + * Dictionary is full, attrs_data overflows, or keyIndex is out of range */ public boolean setContextAttribute(int keyIndex, String value) { if (keyIndex < 0 || keyIndex >= MAX_CUSTOM_SLOTS || value == null) { @@ -429,9 +445,17 @@ private int compactOtepAttribute(int otepKeyIndex) { /** * Reads a custom attribute value by key index. - * Returns the cached String from the last successful write on this thread (zero allocation). - * Falls back to scanning attrs_data when the cache is cold (e.g. after a session restart). - * Used by {@code ContextSetter.readContextValue()} to support snapshot/restore of + * + *

Warm path: O(1), zero-allocation — returns the cached value from + * {@code indexedValueCache[keyIndex]} when the slot was populated by a prior write + * on this thread. The cache is kept in sync by every write ({@link #setContextAttributeDirect}) + * and clear ({@link #clearContextAttribute}, {@link #setContextDirect}). + * + *

Cold path: scans {@code attrs_data} on first read after profiler restart + * (when the Java cache is unpopulated but the native buffer has pre-existing data). + * Populates the cache slot on success to make subsequent reads warm. + * + *

Used by {@code ContextSetter.readContextValue()} to support snapshot/restore of * nested profiling scopes. * * @param keyIndex 0-based user key index (same as passed to setContextAttribute) @@ -449,8 +473,10 @@ public String readContextAttribute(int keyIndex) { return cached; } // Cold path: scan attrs_data (only on first read after session restart). - // Guard against stale attrs_data after clearContext(): valid=0 means the record - // was cleared and attrs_data_size was intentionally not reset (see clearContextDirect). + // valid=0 means the record has not been published yet by attach(), or was cleared + // by clearContextDirect() without resetting attrs_data_size. Either way there are + // no valid user attribute entries — only the LRS prefix may exist. Any future path + // that writes user attributes must set valid=1 via attach() first. if (recordBuffer.get(validOffset) == 0) { return null; } diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 69afb8896..226911453 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -1,8 +1,11 @@ package com.datadoghq.profiler.context; +import java.lang.reflect.Field; import java.util.Arrays; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; @@ -26,6 +29,7 @@ import com.datadoghq.profiler.AbstractProfilerTest; import com.datadoghq.profiler.ContextSetter; +import com.datadoghq.profiler.ThreadContext; import static com.datadoghq.profiler.MoreAssertions.DICTIONARY_PAGE_SIZE; import static com.datadoghq.profiler.MoreAssertions.assertBoundedBy; import com.datadoghq.profiler.Platform; @@ -146,12 +150,12 @@ public void testSnapshotRestore() throws Exception { ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); // Initially both slots are empty - assertNull(contextSetter.readContextValue(0)); + assertNull(contextSetter.readContextValue(contextSetter.offsetOf("tag1"))); assertNull(contextSetter.readContextValue("tag2")); // Set a value and read it back assertTrue(contextSetter.setContextValue("tag1", "before")); - assertEquals("before", contextSetter.readContextValue(0)); + assertEquals("before", contextSetter.readContextValue(contextSetter.offsetOf("tag1"))); assertEquals("before", contextSetter.readContextValue("tag1")); // Snapshot the string, overwrite, then restore @@ -173,6 +177,73 @@ public void testSnapshotRestore() throws Exception { assertNull(contextSetter.readContextValue("tag2")); } + @Test + public void testColdPathScan() throws Exception { + // Simulate profiler restart: null the Java cache slot so the next read + // goes through the cold scan path and recovers the value from attrs_data. + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); + + assertTrue(contextSetter.setContextValue("tag1", "cold-value")); + assertEquals("cold-value", contextSetter.readContextValue("tag1")); // warm path + + Field cacheField = ThreadContext.class.getDeclaredField("indexedValueCache"); + cacheField.setAccessible(true); + String[] cache = (String[]) cacheField.get(profiler.getThreadContext()); + cache[0] = null; // null = cold, triggers attrs_data scan on next read + + assertEquals("cold-value", contextSetter.readContextValue("tag1")); // cold path + } + + @Test + public void testAttrsDataOverflow() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + List attrs = new ArrayList<>(); + for (int i = 1; i <= 10; i++) { + attrs.add("tag" + i); + } + ContextSetter contextSetter = new ContextSetter(profiler, attrs); + String bigValue = "x".repeat(255); + int overflowIndex = -1; + for (int i = 1; i <= 10; i++) { + if (!contextSetter.setContextValue("tag" + i, bigValue)) { + overflowIndex = i; + break; + } + } + assertTrue("Expected at least one write to overflow attrs_data", overflowIndex >= 0); + assertNull("Overflowed slot must read null via cache", contextSetter.readContextValue("tag" + overflowIndex)); + } + + @Test + public void testPutClearsCustomSlots() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); + + assertTrue(contextSetter.setContextValue("tag1", "before-put")); + assertEquals("before-put", contextSetter.readContextValue("tag1")); + + // setContext() triggers setContextDirect which sets indexedValueCache[i]=ABSENT for all slots + profiler.setContext(1L, 42L, 0L, 42L); + assertNull("tag1 must be null after setContext clears cache", contextSetter.readContextValue("tag1")); + } + + @Test + public void testCrossSlotIsolation() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); + + assertTrue(contextSetter.setContextValue("tag1", "v1")); + assertTrue(contextSetter.setContextValue("tag2", "v2")); + assertTrue(contextSetter.clearContextValue("tag2")); + assertEquals("v1", contextSetter.readContextValue("tag1")); + assertNull(contextSetter.readContextValue("tag2")); + } + private void work(ContextSetter contextSetter, String contextAttribute, String contextValue) throws InterruptedException { assertTrue(contextSetter.setContextValue(contextAttribute, contextValue)); From 96ab30c5010ab0273f18a6a2274edf169800e155 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 13:06:38 +0200 Subject: [PATCH 08/19] fix: use Java 8-compatible String fill; fix JUnit5 assertTrue arg order --- .../com/datadoghq/profiler/context/TagContextTest.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 226911453..70099cdaa 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -205,7 +205,9 @@ public void testAttrsDataOverflow() throws Exception { attrs.add("tag" + i); } ContextSetter contextSetter = new ContextSetter(profiler, attrs); - String bigValue = "x".repeat(255); + char[] chars = new char[255]; + java.util.Arrays.fill(chars, 'x'); + String bigValue = new String(chars); int overflowIndex = -1; for (int i = 1; i <= 10; i++) { if (!contextSetter.setContextValue("tag" + i, bigValue)) { @@ -213,8 +215,8 @@ public void testAttrsDataOverflow() throws Exception { break; } } - assertTrue("Expected at least one write to overflow attrs_data", overflowIndex >= 0); - assertNull("Overflowed slot must read null via cache", contextSetter.readContextValue("tag" + overflowIndex)); + assertTrue(overflowIndex >= 0, "Expected at least one write to overflow attrs_data"); + assertNull(contextSetter.readContextValue("tag" + overflowIndex), "Overflowed slot must read null via cache"); } @Test From 8efd902830b1ee6e6c3af5e5a7b99974b1ff9416 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 13:23:21 +0200 Subject: [PATCH 09/19] fix: correct JUnit5 assertNull arg order in testPutClearsCustomSlots --- .../java/com/datadoghq/profiler/context/TagContextTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 70099cdaa..6b353bbe7 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -230,7 +230,7 @@ public void testPutClearsCustomSlots() throws Exception { // setContext() triggers setContextDirect which sets indexedValueCache[i]=ABSENT for all slots profiler.setContext(1L, 42L, 0L, 42L); - assertNull("tag1 must be null after setContext clears cache", contextSetter.readContextValue("tag1")); + assertNull(contextSetter.readContextValue("tag1"), "tag1 must be null after setContext clears cache"); } @Test From 97739a75bd9eecb2074708896ddcbb01fe39a5fa Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 15:06:02 +0200 Subject: [PATCH 10/19] fix: use session-unique tag values to avoid attrCache hits across retries On musl (no JVM fork) the per-thread attrCacheKeys persists across @RetryingTest retries. Once "0"-"9" are cached in the native Dictionary and the Java cache, subsequent retries hit the cache, skip registerConstant0(), and leave dictionary_context_keys=0. Co-Authored-By: Claude Sonnet 4.6 --- .../com/datadoghq/profiler/context/TagContextTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java index 6b353bbe7..e0bd80192 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/TagContextTest.java @@ -42,7 +42,12 @@ public void test() throws InterruptedException { registerCurrentThreadForWallClockProfiling(); ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2", "tag1")); - String[] strings = IntStream.range(0, 10).mapToObj(String::valueOf).toArray(String[]::new); + // Use session-unique prefix so each @RetryingTest attempt registers fresh values in the + // native Dictionary. Without this, on musl (no JVM fork) the per-thread attrCacheKeys + // persists across retries: cache hits skip registerConstant0(), leaving + // dictionary_context_keys=0 on every retry after the first. + String pfx = Long.toHexString(System.nanoTime()) + "_"; + String[] strings = IntStream.range(0, 10).mapToObj(i -> pfx + i).toArray(String[]::new); for (int i = 0; i < strings.length * 10; i++) { work(contextSetter, "tag1", strings[i % strings.length]); } From b8029cf9280149abc5c12e3f86d364b1c7be7e5b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 15:44:11 +0200 Subject: [PATCH 11/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../main/java/com/datadoghq/profiler/ThreadContext.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index 2e80f3866..ec8b0bace 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -65,11 +65,9 @@ public final class ThreadContext { // setContextDirect). Allows readContextAttribute to return without scanning // attrs_data or allocating on the warm path. // null = not yet scanned; ABSENT = scanned/cleared, known absent; other = cached value. - // ABSENT uses new String("") (not "") so it has a unique identity not shared with any - // interned literal. The == check in readContextAttribute relies on reference identity - // within a single ClassLoader. ThreadContext must not be loaded by multiple ClassLoaders; - // doing so would produce distinct ABSENT instances, causing the identity check to always - // fall through to the cold scan path. + // ABSENT uses new String("") (not "") so it has a unique identity distinct from the + // interned empty-string literal. The == check in readContextAttribute relies on that + // unique sentinel identity to distinguish "known absent" from actual cached values. private static final String ABSENT = new String(""); private final String[] indexedValueCache = new String[MAX_CUSTOM_SLOTS]; From 3514d25158ab93df8f0af84b61fff33d32d206d9 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 18:32:17 +0200 Subject: [PATCH 12/19] feat: bulk snapshot/restore of ThreadContext via combined buffer Adds a 688-byte DirectByteBuffer aliasing record+sidecar for one-memcpy save/restore of nested scope state. New ScopeStack provides tiered scratch (6-slot eager + lazy 12-slot chunks) for allocation-free per-thread use. Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/javaApi.cpp | 18 +++- ddprof-lib/src/main/cpp/thread.h | 7 +- .../com/datadoghq/profiler/JavaProfiler.java | 7 +- .../com/datadoghq/profiler/ScopeStack.java | 92 +++++++++++++++++++ .../com/datadoghq/profiler/ThreadContext.java | 48 +++++++++- 5 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index b886681fe..485d20282 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -554,9 +554,9 @@ Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jcla env->SetLongArrayRegion(metadata, 0, 6, meta); } - // Create 2 DirectByteBuffers: [record, sidecar] + // Create 3 DirectByteBuffers: [record, sidecar, combined] jclass bbClass = env->FindClass("java/nio/ByteBuffer"); - jobjectArray result = env->NewObjectArray(2, bbClass, nullptr); + jobjectArray result = env->NewObjectArray(3, bbClass, nullptr); // recordBuffer: 640 bytes over the OtelThreadContextRecord jobject recordBuf = env->NewDirectByteBuffer((void*)record, (jlong)OTEL_MAX_RECORD_SIZE); @@ -569,6 +569,20 @@ Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jcla jobject sidecarBuf = env->NewDirectByteBuffer((void*)thrd->getOtelTagEncodingsPtr(), (jlong)sidecarSize); env->SetObjectArrayElement(result, 1, sidecarBuf); + // combinedBuffer: single contiguous view over [record | tag_encodings | LRS] for bulk + // snapshot/restore. Contiguity is enforced by alignas(8) on _otel_ctx_record plus + // sizeof(OtelThreadContextRecord) being a multiple of 8 (see thread.h). The debug-only + // assert below traps any future struct reordering in debug CI runs. +#ifdef DEBUG + uint8_t* record_start = reinterpret_cast(record); + uint8_t* sidecar_start = reinterpret_cast(thrd->getOtelTagEncodingsPtr()); + assert(sidecar_start == record_start + OTEL_MAX_RECORD_SIZE + && "_otel_ctx_record and _otel_tag_encodings must be contiguous"); +#endif + size_t combinedSize = OTEL_MAX_RECORD_SIZE + sidecarSize; + jobject combinedBuf = env->NewDirectByteBuffer((void*)record, (jlong)combinedSize); + env->SetObjectArrayElement(result, 2, combinedBuf); + return result; } diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index 852129bc0..b3c721bfb 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -73,10 +73,15 @@ class ProfiledThread : public ThreadLocalData { UnwindFailures _unwind_failures; bool _otel_ctx_initialized; bool _crash_protection_active; - OtelThreadContextRecord _otel_ctx_record; + // alignas(8) + sizeof(OtelThreadContextRecord)==640 (multiple of 8) guarantee + // _otel_tag_encodings sits at +640 with no padding, so the three fields form one + // 688-byte contiguous region exposed as a combined DirectByteBuffer. + alignas(8) OtelThreadContextRecord _otel_ctx_record; // These two fields MUST be contiguous and 8-byte aligned — the JNI layer // exposes them as a single DirectByteBuffer (sidecar), and VarHandle long // views require 8-byte alignment for the buffer base address. + // Read invariant: sidecar readers must gate on record->valid (see ContextApi::get). + // ThreadContext.restore() relies on this to perform a bulk memcpy under valid=0. alignas(8) u32 _otel_tag_encodings[DD_TAGS_CAPACITY]; u64 _otel_local_root_span_id; diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index ce462724d..425edc81e 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -316,7 +316,7 @@ private static ThreadContext initializeThreadContext() { if (buffers == null) { throw new IllegalStateException("Failed to initialize OTEL TLS — ProfiledThread not available"); } - return new ThreadContext(buffers[0], buffers[1], metadata); + return new ThreadContext(buffers[0], buffers[1], buffers[2], metadata); } private static native boolean init0(); @@ -349,7 +349,7 @@ private static ThreadContext initializeThreadContext() { private static native String getStatus0(); /** - * Initializes context TLS for the current thread and returns 2 DirectByteBuffers. + * Initializes context TLS for the current thread and returns 3 DirectByteBuffers. * Sets otel_thread_ctx_v1 permanently to the thread's OtelThreadContextRecord. * * @param metadata output array filled with: @@ -359,7 +359,8 @@ private static ThreadContext initializeThreadContext() { * [3] ATTRS_DATA_SIZE_OFFSET — offset of 'attrs_data_size' field * [4] ATTRS_DATA_OFFSET — offset of 'attrs_data' field * [5] LRS_SIDECAR_OFFSET — offset of local_root_span_id in sidecar buffer - * @return array of 2 ByteBuffers: [recordBuffer, sidecarBuffer] + * @return array of 3 ByteBuffers: [recordBuffer, sidecarBuffer, combinedBuffer] + * where combinedBuffer aliases record + sidecar as one 688-byte contiguous region */ private static native ByteBuffer[] initializeContextTLS0(long[] metadata); diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java new file mode 100644 index 000000000..a4474731d --- /dev/null +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java @@ -0,0 +1,92 @@ +/* + * Copyright 2026 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package com.datadoghq.profiler; + +import java.util.Arrays; + +/** + * Per-thread stack of {@link ThreadContext} snapshots for nested scopes. + * + *

Provides bulk save/restore of the full OTEP record + sidecar state via one memcpy per + * transition, avoiding per-attribute TLS resolution on scope enter/exit. Not thread-safe: + * a single stack instance must be accessed only from its owning thread. + * + *

Storage is tiered to keep shallow nesting allocation-free: + *

+ */ +public final class ScopeStack { + private static final int FAST_DEPTH = 6; + private static final int CHUNK_DEPTH = 12; + private static final int SLOT_SIZE = ThreadContext.SNAPSHOT_SIZE; + + private final byte[] fast = new byte[FAST_DEPTH * SLOT_SIZE]; + // chunks[i] covers depths [FAST_DEPTH + i*CHUNK_DEPTH .. FAST_DEPTH + (i+1)*CHUNK_DEPTH). + private byte[][] chunks; + private int depth; + + public void enter(ThreadContext ctx) { + int d = depth; + ctx.snapshot(bufferFor(d), offsetFor(d)); + depth = d + 1; + } + + public void exit(ThreadContext ctx) { + int d = depth - 1; + if (d < 0) { + throw new IllegalStateException("ScopeStack underflow"); + } + ctx.restore(bufferFor(d), offsetFor(d)); + depth = d; + } + + /** Current nesting depth (number of outstanding {@link #enter} calls). */ + public int depth() { + return depth; + } + + private byte[] bufferFor(int d) { + if (d < FAST_DEPTH) { + return fast; + } + // chunkFor is idempotent: if this depth was previously populated (via a matching enter), + // it returns the existing chunk without allocating. + return chunkFor((d - FAST_DEPTH) / CHUNK_DEPTH); + } + + private static int offsetFor(int d) { + int slot = d < FAST_DEPTH ? d : (d - FAST_DEPTH) % CHUNK_DEPTH; + return slot * SLOT_SIZE; + } + + private byte[] chunkFor(int idx) { + byte[][] cs = chunks; + if (cs == null) { + cs = new byte[4][]; + chunks = cs; + } else if (idx >= cs.length) { + int newLen = cs.length; + while (newLen <= idx) { + newLen <<= 1; + } + cs = Arrays.copyOf(cs, newLen); + chunks = cs; + } + byte[] c = cs[idx]; + if (c == null) { + c = new byte[CHUNK_DEPTH * SLOT_SIZE]; + cs[idx] = c; + } + return c; + } +} diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java index ec8b0bace..35362cf6c 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java @@ -18,6 +18,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Thread-local context for trace/span identification. @@ -29,6 +30,12 @@ public final class ThreadContext { private static final int MAX_CUSTOM_SLOTS = 10; private static final int OTEL_MAX_RECORD_SIZE = 640; + private static final int SIDECAR_SIZE = MAX_CUSTOM_SLOTS * Integer.BYTES + Long.BYTES; // 48 + /** + * Total bytes covered by the combined snapshot buffer: OTEP record + tag encodings + LRS. + * Used by {@link #snapshot(byte[], int)} / {@link #restore(byte[], int)}. + */ + public static final int SNAPSHOT_SIZE = OTEL_MAX_RECORD_SIZE + SIDECAR_SIZE; // 688 private static final int LRS_OTEP_KEY_INDEX = 0; // LRS is always a fixed 16-hex-char value in attrs_data (zero-padded u64). // The entry header is 2 bytes (key_index + length), giving 18 bytes total. @@ -82,21 +89,26 @@ public final class ThreadContext { private final ByteBuffer recordBuffer; // 640 bytes, OtelThreadContextRecord private final ByteBuffer sidecarBuffer; // tag encodings + LRS + // Aliases record + sidecar as one 688-byte contiguous region for bulk snapshot/restore + // via byte[] scratch. Byte-granular only; position state is thread-confined to this field. + private final ByteBuffer combinedBuffer; /** - * Creates a ThreadContext from the two DirectByteBuffers returned by native initializeContextTLS0. + * Creates a ThreadContext from the three DirectByteBuffers returned by native initializeContextTLS0. * * @param recordBuffer 640-byte buffer over OtelThreadContextRecord * @param sidecarBuffer buffer over tag encodings + local root span id + * @param combinedBuffer 688-byte buffer aliasing record + sidecar contiguously (for bulk snapshot) * @param metadata array with [VALID_OFFSET, TRACE_ID_OFFSET, SPAN_ID_OFFSET, * ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_SIDECAR_OFFSET] */ - public ThreadContext(ByteBuffer recordBuffer, ByteBuffer sidecarBuffer, long[] metadata) { + public ThreadContext(ByteBuffer recordBuffer, ByteBuffer sidecarBuffer, ByteBuffer combinedBuffer, long[] metadata) { // Record buffer uses native order for uint16_t attrs_data_size (read by C as native uint16_t). // trace_id/span_id are uint8_t[] arrays requiring big-endian — handled via Long.reverseBytes() // in setContextDirect(). Only little-endian platforms are supported. this.recordBuffer = recordBuffer.order(ByteOrder.nativeOrder()); this.sidecarBuffer = sidecarBuffer.order(ByteOrder.nativeOrder()); + this.combinedBuffer = combinedBuffer; this.validOffset = (int) metadata[0]; this.traceIdOffset = (int) metadata[1]; this.spanIdOffset = (int) metadata[2]; @@ -196,6 +208,38 @@ public void copyCustoms(int[] value) { } } + /** + * Captures the full record + sidecar state into {@code scratch[offset .. offset+SNAPSHOT_SIZE)}. + * Pair with {@link #restore(byte[], int)} for nested-scope propagation. + * + *

The detach/attach pair is required so the captured {@code valid} byte is always 0. When + * {@link #restore} later memcpys the scratch back, the valid byte is overwritten mid-memcpy; + * if the scratch held {@code valid=1}, the live record would transiently expose itself as + * valid while still partially restored, racing any signal handler reading via ContextApi::get. + */ + public void snapshot(byte[] scratch, int offset) { + detach(); + combinedBuffer.position(0); + combinedBuffer.get(scratch, offset, SNAPSHOT_SIZE); + attach(); + } + + /** + * Restores a previously captured state. The detach/attach pair hides the memcpy from readers + * going through {@link #recordBuffer}'s valid flag ({@code ContextApi::get} in native code), + * which is the sole gate for sidecar reads (see {@code thread.h}). The scratch's own valid + * byte is always 0 (enforced in {@link #snapshot}), so the memcpy never transiently republishes. + * {@link #indexedValueCache} is invalidated so the next read re-scans the restored attrs_data; + * {@code null} denotes "not yet scanned", distinct from {@link #ABSENT}. + */ + public void restore(byte[] scratch, int offset) { + detach(); + combinedBuffer.position(0); + combinedBuffer.put(scratch, offset, SNAPSHOT_SIZE); + Arrays.fill(indexedValueCache, null); + attach(); + } + /** * Sets a custom attribute on the current thread's context by string value. * Uses a per-thread encoding cache to avoid JNI for repeated values From d83ef11c7c35bed956265ed55d8e7db1e643d490 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 23 Apr 2026 19:20:47 +0200 Subject: [PATCH 13/19] refactor: drop Java-side value cache; reject oversized values up front MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Java readers of custom attrs are test-only — the profiler signal handler uses sidecar encodings and OTEL eBPF reads attrs_data directly. Drop the per-slot String cache and scan attrs_data on the test-only read path. Reject values whose UTF-8 encoding exceeds 255 bytes at setContextAttribute to keep the OTEP view consistent and avoid orphan Dictionary entries. Co-Authored-By: Claude Sonnet 4.6 --- .../com/datadoghq/profiler/ScopeStack.java | 4 +- .../com/datadoghq/profiler/ThreadContext.java | 99 ++++-------- .../profiler/context/ScopeStackTest.java | 142 ++++++++++++++++++ .../profiler/context/TagContextTest.java | 21 --- 4 files changed, 172 insertions(+), 94 deletions(-) create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/context/ScopeStackTest.java diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java index a4474731d..65d6cb332 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java @@ -15,8 +15,8 @@ * Per-thread stack of {@link ThreadContext} snapshots for nested scopes. * *

Provides bulk save/restore of the full OTEP record + sidecar state via one memcpy per - * transition, avoiding per-attribute TLS resolution on scope enter/exit. Not thread-safe: - * a single stack instance must be accessed only from its owning thread. + * transition. Not thread-safe: a single stack instance must be accessed only from its + * owning thread. * *

Storage is tiered to keep shallow nesting allocation-free: *