diff --git a/ddprof-lib/src/main/cpp/j9/j9WallClock.cpp b/ddprof-lib/src/main/cpp/j9/j9WallClock.cpp index a094cd663..c965542f8 100644 --- a/ddprof-lib/src/main/cpp/j9/j9WallClock.cpp +++ b/ddprof-lib/src/main/cpp/j9/j9WallClock.cpp @@ -19,6 +19,7 @@ #include "j9/j9Support.h" #include "profiler.h" #include "threadState.h" +#include #include volatile bool J9WallClock::_enabled = false; @@ -57,68 +58,90 @@ void J9WallClock::stop() { } void J9WallClock::timerLoop() { - JNIEnv *jni = VM::attachThread("java-profiler Sampler"); - jvmtiEnv *jvmti = VM::jvmti(); - - int max_frames = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES; - ASGCT_CallFrame *frames = - (ASGCT_CallFrame *)malloc(max_frames * sizeof(ASGCT_CallFrame)); - - while (_running) { - if (!_enabled) { - OS::sleep(_interval); - continue; - } - - jni->PushLocalFrame(64); - - jvmtiStackInfoExtended *stack_infos; - jint thread_count; - if (J9Support::GetAllStackTracesExtended( - _max_stack_depth, (void **)&stack_infos, &thread_count) == 0) { - for (int i = 0; i < thread_count; i++) { - jvmtiStackInfoExtended *si = &stack_infos[i]; - if (si->frame_count <= 0) { - // no frames recorded - continue; - } - OSThreadState ts = (si->state & JVMTI_THREAD_STATE_RUNNABLE) - ? OSThreadState::RUNNABLE - : OSThreadState::SLEEPING; - if (!_sample_idle_threads && ts != OSThreadState::RUNNABLE) { - // in execution profiler mode the non-running threads are skipped - continue; - } - for (int j = 0; j < si->frame_count; j++) { - jvmtiFrameInfoExtended *fi = &si->frame_buffer[j]; - frames[j].method_id = fi->method; - frames[j].bci = FrameType::encode(sanitizeJ9FrameType(fi->type), fi->location); - } + // IBM J9 may cancel this thread via abi::__forced_unwind during JVM shutdown. + // Catch it to ensure free(frames) and VM::detachThread() always run — including + // if the unwind fires during setup (VM::attachThread or malloc) before the + // sampling loop starts — then re-throw so the thread exits properly through + // the normal unwind path. jni and frames are declared before the try and + // initialized to null so the catch handler's cleanup is safe in either case: + // free(nullptr) is a no-op, and detachThread() is only called when attach + // succeeded. + // + // PushLocalFrame / PopLocalFrame balance: if the forced unwind fires between + // PushLocalFrame and PopLocalFrame, the outstanding JNI local frame is released + // by DetachCurrentThread (called via VM::detachThread()), which is specified to + // destroy the current stack frame's local references. The common cancellation + // point is OS::sleep() which runs after PopLocalFrame, so no imbalance occurs + // in the typical case. + JNIEnv *jni = nullptr; + ASGCT_CallFrame *frames = nullptr; + try { + jni = VM::attachThread("java-profiler Sampler"); + jvmtiEnv *jvmti = VM::jvmti(); + + int max_frames = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES; + frames = (ASGCT_CallFrame *)malloc(max_frames * sizeof(ASGCT_CallFrame)); + + while (_running) { + if (!_enabled) { + OS::sleep(_interval); + continue; + } - int tid = J9Support::GetOSThreadID(si->thread); - if (tid == -1) { - // clearly an invalid TID; skip the thread - continue; - } - ExecutionEvent event; - event._thread_state = ts; - if (ts == OSThreadState::RUNNABLE) { - Profiler::instance()->recordExternalSample( - _interval, tid, si->frame_count, frames, /*truncated=*/false, - BCI_CPU, &event); - } - if (_sample_idle_threads) { - Profiler::instance()->recordExternalSample( - _interval, tid, si->frame_count, frames, /*truncated=*/false, - BCI_WALL, &event); + jni->PushLocalFrame(64); + + jvmtiStackInfoExtended *stack_infos; + jint thread_count; + if (J9Support::GetAllStackTracesExtended( + _max_stack_depth, (void **)&stack_infos, &thread_count) == 0) { + for (int i = 0; i < thread_count; i++) { + jvmtiStackInfoExtended *si = &stack_infos[i]; + if (si->frame_count <= 0) { + // no frames recorded + continue; + } + OSThreadState ts = (si->state & JVMTI_THREAD_STATE_RUNNABLE) + ? OSThreadState::RUNNABLE + : OSThreadState::SLEEPING; + if (!_sample_idle_threads && ts != OSThreadState::RUNNABLE) { + // in execution profiler mode the non-running threads are skipped + continue; + } + for (int j = 0; j < si->frame_count; j++) { + jvmtiFrameInfoExtended *fi = &si->frame_buffer[j]; + frames[j].method_id = fi->method; + frames[j].bci = FrameType::encode(sanitizeJ9FrameType(fi->type), fi->location); + } + + int tid = J9Support::GetOSThreadID(si->thread); + if (tid == -1) { + // clearly an invalid TID; skip the thread + continue; + } + ExecutionEvent event; + event._thread_state = ts; + if (ts == OSThreadState::RUNNABLE) { + Profiler::instance()->recordExternalSample( + _interval, tid, si->frame_count, frames, /*truncated=*/false, + BCI_CPU, &event); + } + if (_sample_idle_threads) { + Profiler::instance()->recordExternalSample( + _interval, tid, si->frame_count, frames, /*truncated=*/false, + BCI_WALL, &event); + } } + jvmti->Deallocate((unsigned char *)stack_infos); } - jvmti->Deallocate((unsigned char *)stack_infos); - } - jni->PopLocalFrame(NULL); + jni->PopLocalFrame(NULL); - OS::sleep(_interval); + OS::sleep(_interval); + } + } catch (abi::__forced_unwind&) { + free(frames); + VM::detachThread(); // DetachCurrentThread releases any outstanding JNI local frames + throw; } free(frames); diff --git a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp index d1bc8c23d..cfb6d1802 100644 --- a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp +++ b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp @@ -11,6 +11,7 @@ #include "guards.h" #include +#include #include #include #include @@ -64,21 +65,16 @@ static void delete_routine_info(RoutineInfo* thr) { delete thr; } -// Initialize the current thread's TLS with profiling signals blocked. -// Kept noinline for the same stack-protector reason as delete_routine_info. -__attribute__((noinline)) -static void init_thread_tls() { - SignalBlocker blocker; - ProfiledThread::initCurrentThread(); -} - -// Arm the CPU timer with profiling signals blocked and open the init window -// (PROF-13072). Kept noinline for the same stack-protector reason as +// Initialize the current thread's TLS, open the init window (PROF-13072), and +// register the thread with the profiler — all under a single SignalBlocker so +// profiling signals cannot fire in the gap between initCurrentThread() and +// startInitWindow(). Kept noinline for the same stack-protector reason as // delete_routine_info: SignalBlocker's sigset_t must not appear in // start_routine_wrapper_spec's own stack frame on musl/aarch64. __attribute__((noinline)) -static void start_window_and_register() { +static void init_tls_and_register() { SignalBlocker blocker; + ProfiledThread::initCurrentThread(); if (ProfiledThread *pt = ProfiledThread::currentSignalSafe()) { pt->startInitWindow(); } @@ -98,12 +94,25 @@ static void* start_routine_wrapper_spec(void* args) { func_start_routine routine = thr->routine(); void* params = thr->args(); delete_routine_info(thr); - init_thread_tls(); - start_window_and_register(); - void* result = routine(params); - Profiler::unregisterThread(ProfiledThread::currentTid()); + init_tls_and_register(); + // Capture tid from TLS while it is guaranteed non-null (set by init_tls_and_register above). + // Using a cached tid avoids the lazy-allocating ProfiledThread::current() path inside + // the catch block, which may call 'new' at an unsafe point during forced unwind. + int tid = ProfiledThread::currentTid(); + // IBM J9 (and glibc pthread_cancel) use abi::__forced_unwind for thread teardown. + // Catch it explicitly so cleanup runs even during forced unwind, then re-throw + // to allow the thread to exit properly. A plain catch(...) without re-throw + // would swallow the forced unwind and prevent the thread from actually exiting. + try { + routine(params); + } catch (abi::__forced_unwind&) { + Profiler::unregisterThread(tid); + ProfiledThread::release(); + throw; + } + Profiler::unregisterThread(tid); ProfiledThread::release(); - return result; + return nullptr; } static int pthread_create_hook_spec(pthread_t* thread, @@ -125,12 +134,6 @@ static int pthread_create_hook_spec(pthread_t* thread, #endif // __aarch64__ -static void thread_cleanup(void* arg) { - int tid = *static_cast(arg); - Profiler::unregisterThread(tid); - ProfiledThread::release(); -} - // Wrapper around the real start routine. // See comments for start_routine_wrapper_spec() for details __attribute__((visibility("hidden"))) @@ -160,13 +163,23 @@ static void* start_routine_wrapper(void* args) { ProfiledThread::currentSignalSafe()->startInitWindow(); Profiler::registerThread(tid); } - void* result = nullptr; - // Handle pthread_exit() bypass - the thread calls pthread_exit() - // instead of normal termination - pthread_cleanup_push(thread_cleanup, &tid); - result = routine(params); - pthread_cleanup_pop(1); - return result; + // IBM J9 (and glibc pthread_cancel) use abi::__forced_unwind for thread + // teardown. pthread_cleanup_push/pop creates a __pthread_cleanup_class + // with an implicitly-noexcept destructor; when J9's forced-unwind + // propagates through it, the C++ runtime calls std::terminate() → abort(). + // Replacing with an explicit catch ensures cleanup runs on forced unwind + // without triggering terminate, and the re-throw lets the thread exit cleanly. + // pthread_exit() is also covered: on glibc it raises its own __forced_unwind. + try { + routine(params); + } catch (abi::__forced_unwind&) { + Profiler::unregisterThread(tid); + ProfiledThread::release(); + throw; + } + Profiler::unregisterThread(tid); + ProfiledThread::release(); + return nullptr; } static int pthread_create_hook(pthread_t* thread, diff --git a/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp b/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp new file mode 100644 index 000000000..2c69e34bb --- /dev/null +++ b/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp @@ -0,0 +1,159 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + * + * Regression test for SCP-1154: IBM J9 JVM crash in start_routine_wrapper. + * + * Root cause: pthread_cleanup_push in C++ mode creates __pthread_cleanup_class + * with an implicitly-noexcept destructor. When IBM J9's thread teardown raises + * _Unwind_ForcedUnwind (via libgcc, sourced from libj9thr29.so), the C++ runtime + * calls std::terminate() -> abort() because the forced-unwind exception tries to + * exit a noexcept-bounded destructor. + * + * Fix: replace pthread_cleanup_push/pop with catch(abi::__forced_unwind&) + rethrow. + * + * These tests verify: + * 1. abi::__forced_unwind (raised by pthread_cancel / pthread_exit) is caught by + * the new pattern. + * 2. The cleanup block runs. + * 3. The rethrow allows the thread to exit as PTHREAD_CANCELED. + * 4. ProfiledThread::release() can be called safely from within the catch block. + */ + +#include + +#ifdef __linux__ + +#include "thread.h" + +#include +#include +#include +#include + +// --------------------------------------------------------------------------- +// Test 1: bare catch(abi::__forced_unwind&) + rethrow +// --------------------------------------------------------------------------- + +static std::atomic g_bare_cleanup_called{false}; + +static void* bare_forced_unwind_thread(void*) { + g_bare_cleanup_called.store(false, std::memory_order_relaxed); + try { + while (true) { + pthread_testcancel(); + usleep(100); + } + } catch (abi::__forced_unwind&) { + g_bare_cleanup_called.store(true, std::memory_order_relaxed); + throw; // must re-throw so thread exits as PTHREAD_CANCELED + } + return nullptr; +} + +// Regression: catch(abi::__forced_unwind&) + rethrow must intercept the forced +// unwind raised by pthread_cancel, run the cleanup block, and let the thread +// exit cleanly as PTHREAD_CANCELED — not via std::terminate(). +TEST(ForcedUnwindTest, BarePatternInterceptsAndRethrows) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, bare_forced_unwind_thread, nullptr)); + + usleep(5000); // let thread reach its testcancel loop + pthread_cancel(t); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_bare_cleanup_called.load()) + << "catch(abi::__forced_unwind&) must fire on pthread_cancel"; + EXPECT_EQ(PTHREAD_CANCELED, retval) + << "rethrow must let thread exit as PTHREAD_CANCELED"; +} + +// --------------------------------------------------------------------------- +// Test 2: pattern with ProfiledThread (mirrors start_routine_wrapper) +// --------------------------------------------------------------------------- + +static std::atomic g_pt_cleanup_called{false}; +static std::atomic g_pt_release_called{false}; + +static void* profiled_forced_unwind_thread(void*) { + g_pt_cleanup_called.store(false, std::memory_order_relaxed); + g_pt_release_called.store(false, std::memory_order_relaxed); + + // Mirrors what start_routine_wrapper does before calling routine(params). + ProfiledThread::initCurrentThread(); + int tid = ProfiledThread::currentTid(); + (void)tid; + + try { + while (true) { + pthread_testcancel(); + usleep(100); + } + } catch (abi::__forced_unwind&) { + g_pt_cleanup_called.store(true, std::memory_order_relaxed); + // Mirrors the catch block in the fixed start_routine_wrapper: + // unregisterThread is omitted here (requires an initialised Profiler); + // release() is the critical cleanup that must always run. + ProfiledThread::release(); + g_pt_release_called.store(true, std::memory_order_relaxed); + throw; + } + + ProfiledThread::release(); + return nullptr; +} + +// Regression: the start_routine_wrapper pattern with ProfiledThread lifecycle +// must survive pthread_cancel without terminate(). +TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, profiled_forced_unwind_thread, nullptr)); + + usleep(5000); + pthread_cancel(t); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_pt_cleanup_called.load()) + << "catch(abi::__forced_unwind&) must fire when thread is cancelled"; + EXPECT_TRUE(g_pt_release_called.load()) + << "ProfiledThread::release() must complete inside the catch block"; + EXPECT_EQ(PTHREAD_CANCELED, retval); +} + +// --------------------------------------------------------------------------- +// Test 3: pthread_exit() also raises abi::__forced_unwind on glibc +// --------------------------------------------------------------------------- + +static std::atomic g_exit_cleanup_called{false}; + +static void* pthread_exit_thread(void*) { + g_exit_cleanup_called.store(false, std::memory_order_relaxed); + try { + pthread_exit(reinterpret_cast(42)); + } catch (abi::__forced_unwind&) { + g_exit_cleanup_called.store(true, std::memory_order_relaxed); + throw; + } + return nullptr; +} + +// pthread_exit() also uses abi::__forced_unwind on glibc — the same catch +// block must handle it so that threads calling pthread_exit() inside a +// wrapped routine don't bypass cleanup. +TEST(ForcedUnwindTest, PthreadExitAlsoCaughtByForcedUnwindCatch) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, pthread_exit_thread, nullptr)); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_exit_cleanup_called.load()) + << "catch(abi::__forced_unwind&) must also catch pthread_exit()"; + EXPECT_EQ(reinterpret_cast(42), retval); +} + +#endif // __linux__